From 34909cbbd42335c6a7b8258b43cabad0d260b12f Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Wed, 13 Dec 2023 09:37:22 -0800
Subject: [PATCH 001/128] minimal pytest

---
 tests/test_integration.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/test_integration.py b/tests/test_integration.py
index e60b3a871..438f75c5c 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1705,3 +1705,14 @@ def test_unused_future_regressors():
     m.add_future_regressor("price")
     m.add_lagged_regressor("cost")
     m.fit(df, freq="D")
+
+def test_on_the_fly_sampling():
+    start_date = "2019-01-01"
+    end_date = "2019-03-01"
+    date_range = pd.date_range(start=start_date, end=end_date, freq="H")
+    y = np.random.randint(0, 1000, size=(len(date_range),))
+    df = pd.DataFrame({"ds": date_range, "y": y})
+
+    m = NeuralProphet(epochs=1)
+    m.fit(df, freq='H')
+    m.predict(df)

From 687c08559ee282da4eec06ee89613db2a117d51f Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Wed, 13 Dec 2023 10:02:49 -0800
Subject: [PATCH 002/128] move_func_getitem

---
 neuralprophet/time_dataset.py | 86 +++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 39 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index dca97da79..7a889508d 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -63,10 +63,54 @@ def __init__(self, df, name, **kwargs):
             "events",
             "regressors",
         ]
-        inputs, targets, drop_missing = tabularize_univariate_datetime(df, **kwargs)
+
+        self.df = df
+        self.kwargs = kwargs
+        #inputs, targets, drop_missing = tabularize_univariate_datetime(df, **kwargs)
+        #self.init_after_tabularized(inputs, targets)
+        #self.filter_samples_after_init(kwargs["prediction_frequency"])
+        #self.drop_nan_after_init(df, kwargs["predict_steps"], drop_missing)
+
+    def __getitem__(self, index):
+        """Overrides parent class method to get an item at index.
+        Parameters
+        ----------
+            index : int
+                Sample location in dataset
+        Returns
+        -------
+        OrderedDict
+            Model inputs, each of len(df) but with varying dimensions
+            Note
+            ----
+            Contains the following data:
+            Model Inputs
+                * ``time`` (np.array, float), dims: (num_samples, 1)
+                * ``seasonalities`` (OrderedDict), named seasonalities
+                each with features (np.array, float) - dims: (num_samples, n_features[name])
+                * ``lags`` (np.array, float), dims: (num_samples, n_lags)
+                * ``covariates`` (OrderedDict), named covariates,
+                each with features (np.array, float) of dims: (num_samples, n_lags)
+                * ``events`` (OrderedDict), events,
+                each with features (np.array, float) of dims: (num_samples, n_lags)
+                * ``regressors`` (OrderedDict), regressors,
+                each with features (np.array, float) of dims: (num_samples, n_lags)
+        np.array, float
+            Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
+        """
+        inputs, targets, drop_missing = tabularize_univariate_datetime(self.df, **self.kwargs)
         self.init_after_tabularized(inputs, targets)
-        self.filter_samples_after_init(kwargs["prediction_frequency"])
-        self.drop_nan_after_init(df, kwargs["predict_steps"], drop_missing)
+        self.filter_samples_after_init(self.kwargs["prediction_frequency"])
+        self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], drop_missing)
+
+        sample = self.samples[index]
+        targets = self.targets[index]
+        meta = self.meta
+        return sample, targets, meta
+
+    def __len__(self):
+        """Overrides Parent class method to get data length."""
+        return self.length
 
     def drop_nan_after_init(self, df, predict_steps, drop_missing):
         """Checks if inputs/targets contain any NaN values and drops them, if user opts to.
@@ -223,42 +267,6 @@ def filter_samples_after_init(
             sample.pop("timestamps")
         self.length = len(self.samples)
 
-    def __getitem__(self, index):
-        """Overrides parent class method to get an item at index.
-        Parameters
-        ----------
-            index : int
-                Sample location in dataset
-        Returns
-        -------
-        OrderedDict
-            Model inputs, each of len(df) but with varying dimensions
-            Note
-            ----
-            Contains the following data:
-            Model Inputs
-                * ``time`` (np.array, float), dims: (num_samples, 1)
-                * ``seasonalities`` (OrderedDict), named seasonalities
-                each with features (np.array, float) - dims: (num_samples, n_features[name])
-                * ``lags`` (np.array, float), dims: (num_samples, n_lags)
-                * ``covariates`` (OrderedDict), named covariates,
-                each with features (np.array, float) of dims: (num_samples, n_lags)
-                * ``events`` (OrderedDict), events,
-                each with features (np.array, float) of dims: (num_samples, n_lags)
-                * ``regressors`` (OrderedDict), regressors,
-                each with features (np.array, float) of dims: (num_samples, n_lags)
-        np.array, float
-            Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
-        """
-        sample = self.samples[index]
-        targets = self.targets[index]
-        meta = self.meta
-        return sample, targets, meta
-
-    def __len__(self):
-        """Overrides Parent class method to get data length."""
-        return self.length
-
 
 def tabularize_univariate_datetime(
     df,

From 5215340aa51f2c8ef5d71f7e1ce07db9d7b30433 Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Thu, 14 Dec 2023 19:26:26 -0800
Subject: [PATCH 003/128] slicing

---
 neuralprophet/time_dataset.py | 8 +++++++-
 tests/test_integration.py     | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 7a889508d..afa69beab 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -98,7 +98,13 @@ def __getitem__(self, index):
         np.array, float
             Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
         """
-        inputs, targets, drop_missing = tabularize_univariate_datetime(self.df, **self.kwargs)
+        start_idx = index
+        #end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') - 1 #correct?
+        end_idx = start_idx + 1
+        df_slice = self.df.iloc[start_idx:end_idx]
+
+        # Functions
+        inputs, targets, drop_missing = tabularize_univariate_datetime(df_slice, **self.kwargs)
         self.init_after_tabularized(inputs, targets)
         self.filter_samples_after_init(self.kwargs["prediction_frequency"])
         self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], drop_missing)
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 438f75c5c..601b9dff9 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1708,7 +1708,7 @@ def test_unused_future_regressors():
 
 def test_on_the_fly_sampling():
     start_date = "2019-01-01"
-    end_date = "2019-03-01"
+    end_date = "2019-01-04"
     date_range = pd.date_range(start=start_date, end=end_date, freq="H")
     y = np.random.randint(0, 1000, size=(len(date_range),))
     df = pd.DataFrame({"ds": date_range, "y": y})

From c70fae292623001d66ae1a0efddc7ff96162ab9a Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Thu, 14 Dec 2023 19:48:03 -0800
Subject: [PATCH 004/128] predict_mode

---
 neuralprophet/time_dataset.py | 11 +++++++----
 tests/test_integration.py     |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index afa69beab..885a06165 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -98,10 +98,13 @@ def __getitem__(self, index):
         np.array, float
             Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
         """
-        start_idx = index
-        #end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') - 1 #correct?
-        end_idx = start_idx + 1
-        df_slice = self.df.iloc[start_idx:end_idx]
+        if self.kwargs['predict_mode']:
+            df_slice = self.df
+        else:
+            start_idx = index
+            #end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') - 1 #correct?
+            end_idx = start_idx + 1
+            df_slice = self.df.iloc[start_idx:end_idx]
 
         # Functions
         inputs, targets, drop_missing = tabularize_univariate_datetime(df_slice, **self.kwargs)
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 601b9dff9..76517b084 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1713,6 +1713,6 @@ def test_on_the_fly_sampling():
     y = np.random.randint(0, 1000, size=(len(date_range),))
     df = pd.DataFrame({"ds": date_range, "y": y})
 
-    m = NeuralProphet(epochs=1)
+    m = NeuralProphet(epochs=1, learning_rate=0.01)
     m.fit(df, freq='H')
     m.predict(df)

From b78d5e021552d95daa800b36572d62b56ab47244 Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Mon, 18 Dec 2023 13:20:43 -0800
Subject: [PATCH 005/128] typos

---
 neuralprophet/forecaster.py   |  4 ++--
 neuralprophet/time_dataset.py |  2 ++
 tests/test_integration.py     | 15 ++++++++++-----
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index 852fc297b..d81712388 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -2684,7 +2684,7 @@ def _train(
                     val_dataloaders=val_loader,
                     **self.config_train.lr_finder_args,
                 )
-                # Estimate the optimat learning rate from the loss curve
+                # Estimate the optimal learning rate from the loss curve
                 assert lr_finder is not None
                 _, _, lr_suggestion = utils.smooth_loss_and_suggest(lr_finder.results)
                 self.model.learning_rate = lr_suggestion
@@ -2706,7 +2706,7 @@ def _train(
                     **self.config_train.lr_finder_args,
                 )
                 assert lr_finder is not None
-                # Estimate the optimat learning rate from the loss curve
+                # Estimate the optimal learning rate from the loss curve
                 _, _, lr_suggestion = utils.smooth_loss_and_suggest(lr_finder.results)
                 self.model.learning_rate = lr_suggestion
             start = time.time()
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 885a06165..ef1d7baa5 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -106,6 +106,8 @@ def __getitem__(self, index):
             end_idx = start_idx + 1
             df_slice = self.df.iloc[start_idx:end_idx]
 
+        #df_slice = self.df
+
         # Functions
         inputs, targets, drop_missing = tabularize_univariate_datetime(df_slice, **self.kwargs)
         self.init_after_tabularized(inputs, targets)
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 76517b084..ead9a17a7 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1710,9 +1710,14 @@ def test_on_the_fly_sampling():
     start_date = "2019-01-01"
     end_date = "2019-01-04"
     date_range = pd.date_range(start=start_date, end=end_date, freq="H")
-    y = np.random.randint(0, 1000, size=(len(date_range),))
-    df = pd.DataFrame({"ds": date_range, "y": y})
-
-    m = NeuralProphet(epochs=1, learning_rate=0.01)
+    #y = np.random.randint(0, 1000, size=(len(date_range),))
+    #df = pd.DataFrame({"ds": date_range, "y": y})
+    df = pd.DataFrame(
+        {
+            "ds": {0: "2022-10-16 00:00:00", 1: "2022-10-17 00:00:00", 2: "2022-10-18 00:00:00", 3: "2022-10-19 00:00:00", 4: "2022-10-20 00:00:00",},
+            "y": {0: 17, 1: 18, 2: 10, 3: 8, 4: 5},
+        }
+    )
+    m = NeuralProphet(epochs=1) #, learning_rate=0.01)
     m.fit(df, freq='H')
-    m.predict(df)
+    metrics = m.predict(df)

From beae5bb21ce8d870ff0fd212f70ac2c462c7ee2d Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Mon, 18 Dec 2023 17:56:49 -0800
Subject: [PATCH 006/128] lr-finder

---
 neuralprophet/data/process.py |  1 +
 neuralprophet/time_dataset.py | 12 +++++++-----
 tests/test_integration.py     | 19 +++++++------------
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py
index 9f8861016..c9190f21a 100644
--- a/neuralprophet/data/process.py
+++ b/neuralprophet/data/process.py
@@ -623,4 +623,5 @@ def _create_dataset(model, df, predict_mode, prediction_frequency=None):
         config_regressors=model.config_regressors,
         config_missing=model.config_missing,
         prediction_frequency=prediction_frequency,
+        config_train=model.config_train
     )
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index ef1d7baa5..fdfaf7503 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -98,16 +98,17 @@ def __getitem__(self, index):
         np.array, float
             Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
         """
-        if self.kwargs['predict_mode']:
+        learning_rate = self.kwargs['config_train'].learning_rate
+        # TODO: Drop config_train from self!
+
+        if self.kwargs['predict_mode'] or (learning_rate is None):
             df_slice = self.df
         else:
             start_idx = index
-            #end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts') - 1 #correct?
-            end_idx = start_idx + 1
+            end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts')
+            #end_idx = start_idx + 1
             df_slice = self.df.iloc[start_idx:end_idx]
 
-        #df_slice = self.df
-
         # Functions
         inputs, targets, drop_missing = tabularize_univariate_datetime(df_slice, **self.kwargs)
         self.init_after_tabularized(inputs, targets)
@@ -291,6 +292,7 @@ def tabularize_univariate_datetime(
     config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None,
     config_regressors: Optional[configure.ConfigFutureRegressors] = None,
     config_missing=None,
+    config_train=None,
     prediction_frequency=None,
 ):
     """Create a tabular dataset from univariate timeseries for supervised forecasting.
diff --git a/tests/test_integration.py b/tests/test_integration.py
index ead9a17a7..5a9cf80b9 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1707,17 +1707,12 @@ def test_unused_future_regressors():
     m.fit(df, freq="D")
 
 def test_on_the_fly_sampling():
-    start_date = "2019-01-01"
-    end_date = "2019-01-04"
-    date_range = pd.date_range(start=start_date, end=end_date, freq="H")
-    #y = np.random.randint(0, 1000, size=(len(date_range),))
-    #df = pd.DataFrame({"ds": date_range, "y": y})
-    df = pd.DataFrame(
-        {
-            "ds": {0: "2022-10-16 00:00:00", 1: "2022-10-17 00:00:00", 2: "2022-10-18 00:00:00", 3: "2022-10-19 00:00:00", 4: "2022-10-20 00:00:00",},
-            "y": {0: 17, 1: 18, 2: 10, 3: 8, 4: 5},
-        }
-    )
-    m = NeuralProphet(epochs=1) #, learning_rate=0.01)
+    start_date = "2022-10-16 00:00:00"
+    end_date = "2022-12-30 00:00:00"
+    date_range = pd.date_range(start=start_date, end=end_date, freq="D")
+    y = np.random.randint(0, 20, size=(len(date_range),))
+    df = pd.DataFrame({"ds": date_range, "y": y})
+
+    m = NeuralProphet(epochs=1, learning_rate=0.01)
     m.fit(df, freq='H')
     metrics = m.predict(df)

From 8427ffc46da4aad65f017d937eff801b0f7f1642 Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Tue, 19 Dec 2023 12:05:35 -0800
Subject: [PATCH 007/128] drop_missing

---
 neuralprophet/time_dataset.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index fdfaf7503..5f090400c 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -106,14 +106,13 @@ def __getitem__(self, index):
         else:
             start_idx = index
             end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts')
-            #end_idx = start_idx + 1
             df_slice = self.df.iloc[start_idx:end_idx]
 
         # Functions
-        inputs, targets, drop_missing = tabularize_univariate_datetime(df_slice, **self.kwargs)
+        inputs, targets = tabularize_univariate_datetime(df_slice, **self.kwargs)
         self.init_after_tabularized(inputs, targets)
         self.filter_samples_after_init(self.kwargs["prediction_frequency"])
-        self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], drop_missing)
+        self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
 
         sample = self.samples[index]
         targets = self.targets[index]
@@ -502,7 +501,7 @@ def _stride_timestamps_for_forecasts(x):
             tabularized_input_shapes_str += f"    {key} {value.shape} \n"
     log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
 
-    return inputs, targets, config_missing.drop_missing
+    return inputs, targets
 
 
 def fourier_series(dates, period, series_order):

From ff05b2a179f672f3b8cf86db6defe3211b0a814c Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Tue, 19 Dec 2023 13:54:23 -0800
Subject: [PATCH 008/128] predict_v2

---
 neuralprophet/forecaster.py   |  1 +
 neuralprophet/time_dataset.py | 52 +++++++++++++++++++++++------------
 tests/test_integration.py     |  1 +
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index d81712388..72640cbe0 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -1774,6 +1774,7 @@ def predict_seasonal_components(self, df: pd.DataFrame, quantile: float = 0.5):
                 predict_mode=True,
                 config_missing=self.config_missing,
                 prediction_frequency=self.prediction_frequency,
+                config_train=self.config_train,
             )
             loader = DataLoader(dataset, batch_size=min(4096, len(df)), shuffle=False, drop_last=False)
             predicted = {}
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 5f090400c..0a7910c40 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -63,13 +63,16 @@ def __init__(self, df, name, **kwargs):
             "events",
             "regressors",
         ]
-
-        self.df = df
         self.kwargs = kwargs
-        #inputs, targets, drop_missing = tabularize_univariate_datetime(df, **kwargs)
-        #self.init_after_tabularized(inputs, targets)
-        #self.filter_samples_after_init(kwargs["prediction_frequency"])
-        #self.drop_nan_after_init(df, kwargs["predict_steps"], drop_missing)
+
+        learning_rate = kwargs['config_train'].learning_rate
+        if kwargs['predict_mode'] or (learning_rate is None):
+            inputs, targets = tabularize_univariate_datetime(df, **kwargs)
+            self.init_after_tabularized(inputs, targets)
+            self.filter_samples_after_init(kwargs["prediction_frequency"])
+            self.drop_nan_after_init(df, kwargs["predict_steps"], kwargs["config_missing"].drop_missing)
+        else:
+            self.df = df
 
     def __getitem__(self, index):
         """Overrides parent class method to get an item at index.
@@ -98,31 +101,44 @@ def __getitem__(self, index):
         np.array, float
             Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
         """
-        learning_rate = self.kwargs['config_train'].learning_rate
         # TODO: Drop config_train from self!
-
+        learning_rate = self.kwargs['config_train'].learning_rate
         if self.kwargs['predict_mode'] or (learning_rate is None):
-            df_slice = self.df
+            sample = self.samples[index]
+            targets = self.targets[index]
+            meta = self.meta
+            return sample, targets, meta
         else:
             start_idx = index
             end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts')
             df_slice = self.df.iloc[start_idx:end_idx]
 
-        # Functions
-        inputs, targets = tabularize_univariate_datetime(df_slice, **self.kwargs)
-        self.init_after_tabularized(inputs, targets)
-        self.filter_samples_after_init(self.kwargs["prediction_frequency"])
-        self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
+            # Functions
+            inputs, targets = tabularize_univariate_datetime(df_slice, **self.kwargs)
+            self.init_after_tabularized(inputs, targets)
+            self.filter_samples_after_init(self.kwargs["prediction_frequency"])
+            self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
 
-        sample = self.samples[index]
-        targets = self.targets[index]
-        meta = self.meta
-        return sample, targets, meta
+            sample = self.samples[index]
+            targets = self.targets[index]
+            meta = self.meta
+            return sample, targets, meta
 
     def __len__(self):
         """Overrides Parent class method to get data length."""
         return self.length
 
+    def drop_nan_init(self, drop_missing):
+        """Checks if inputs/targets contain any NaN values and drops them, if user opts to.
+        Parameters
+        ----------
+            drop_missing : bool
+                whether to automatically drop missing samples from the data
+            predict_steps : int
+                number of steps to predict
+        """
+
+
     def drop_nan_after_init(self, df, predict_steps, drop_missing):
         """Checks if inputs/targets contain any NaN values and drops them, if user opts to.
         Parameters
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 5a9cf80b9..6d1799f64 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1712,6 +1712,7 @@ def test_on_the_fly_sampling():
     date_range = pd.date_range(start=start_date, end=end_date, freq="D")
     y = np.random.randint(0, 20, size=(len(date_range),))
     df = pd.DataFrame({"ds": date_range, "y": y})
+    df.loc[3, "y"] = np.nan
 
     m = NeuralProphet(epochs=1, learning_rate=0.01)
     m.fit(df, freq='H')

From c408e950095b83c7711c807eedec57df8a65bb1e Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Tue, 19 Dec 2023 15:16:14 -0800
Subject: [PATCH 009/128] predict_v3

---
 tests/test_unit.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/test_unit.py b/tests/test_unit.py
index 7600f8c3d..c07c98527 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -82,7 +82,7 @@ def test_time_dataset():
     local_data_params, global_data_params = df_utils.init_data_params(df=df, normalize="minmax")
     df = df.drop("ID", axis=1)
     df = df_utils.normalize(df, global_data_params)
-    inputs, targets, _ = time_dataset.tabularize_univariate_datetime(
+    inputs, targets = time_dataset.tabularize_univariate_datetime(
         df, n_lags=n_lags, n_forecasts=n_forecasts, config_missing=config_missing
     )
     log.debug(
@@ -806,6 +806,13 @@ def test_too_many_NaN():
     config_missing = configure.MissingDataHandling(
         impute_missing=True, impute_linear=5, impute_rolling=5, drop_missing=False
     )
+    config_train = configure.Train(
+        learning_rate=LR,
+        epochs=EPOCHS,
+        batch_size=BATCH_SIZE,
+        loss_func="SmoothL1Loss",
+        optimizer="AdamW",
+    )
     length = 100
     days = pd.date_range(start="2017-01-01", periods=length)
     y = np.ones(length)
@@ -825,7 +832,7 @@ def test_too_many_NaN():
     df["ID"] = "__df__"
     # Check if ValueError is thrown, if NaN values remain after auto-imputing
     with pytest.raises(ValueError):
-        time_dataset.TimeDataset(df, "name", config_missing=config_missing, predict_steps=1, prediction_frequency=None)
+        time_dataset.TimeDataset(df, "name", predict_mode=False, config_missing=config_missing, config_train=config_train, predict_steps=1, prediction_frequency=None)
 
 
 def test_future_df_with_nan():

From df29f33fbbbd32815d3405acca9e9c59cf99cb29 Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Wed, 20 Dec 2023 11:01:41 -0800
Subject: [PATCH 010/128] samples

---
 neuralprophet/time_dataset.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 0a7910c40..db5727448 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -361,6 +361,11 @@ def tabularize_univariate_datetime(
     """
     max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
     n_samples = len(df) - max_lags + 1 - n_forecasts
+    #TODO
+    #n_samples = max_lags + n_forecasts
+    #if n_samples < 0:
+    #    n_samples = max_lags + n_forecasts
+
     # data is stored in OrderedDict
     inputs = OrderedDict({})
 

From 29fe999148ff1c6e8a23c701f0248ed859314e7e Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Thu, 21 Dec 2023 11:10:22 -0800
Subject: [PATCH 011/128] lagged regressor n_lags

---
 neuralprophet/time_dataset.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index db5727448..8ea20ebd2 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -66,7 +66,7 @@ def __init__(self, df, name, **kwargs):
         self.kwargs = kwargs
 
         learning_rate = kwargs['config_train'].learning_rate
-        if kwargs['predict_mode'] or (learning_rate is None):
+        if kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors']:
             inputs, targets = tabularize_univariate_datetime(df, **kwargs)
             self.init_after_tabularized(inputs, targets)
             self.filter_samples_after_init(kwargs["prediction_frequency"])
@@ -103,7 +103,7 @@ def __getitem__(self, index):
         """
         # TODO: Drop config_train from self!
         learning_rate = self.kwargs['config_train'].learning_rate
-        if self.kwargs['predict_mode'] or (learning_rate is None):
+        if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors']:
             sample = self.samples[index]
             targets = self.targets[index]
             meta = self.meta
@@ -111,6 +111,7 @@ def __getitem__(self, index):
         else:
             start_idx = index
             end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts')
+
             df_slice = self.df.iloc[start_idx:end_idx]
 
             # Functions
@@ -360,11 +361,13 @@ def tabularize_univariate_datetime(
             Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
     """
     max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
-    n_samples = len(df) - max_lags + 1 - n_forecasts
+    #n_samples = len(df) - max_lags + 1 - n_forecasts
     #TODO
-    #n_samples = max_lags + n_forecasts
-    #if n_samples < 0:
-    #    n_samples = max_lags + n_forecasts
+    learning_rate = config_train.learning_rate
+    if predict_mode or (learning_rate is None):
+        n_samples = len(df) - max_lags + 1 - n_forecasts
+    else:
+        n_samples=1
 
     # data is stored in OrderedDict
     inputs = OrderedDict({})

From 2f584c23a66c99ed83ea33f7fce73ccda3b8dc7a Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Thu, 21 Dec 2023 15:31:58 -0800
Subject: [PATCH 012/128] preliminary: events, holidays

---
 neuralprophet/time_dataset.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 8ea20ebd2..26b822990 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -66,7 +66,7 @@ def __init__(self, df, name, **kwargs):
         self.kwargs = kwargs
 
         learning_rate = kwargs['config_train'].learning_rate
-        if kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors']:
+        if kwargs['predict_mode'] or (learning_rate is None) or kwargs['config_lagged_regressors'] or kwargs['config_country_holidays'] or kwargs['config_events']:
             inputs, targets = tabularize_univariate_datetime(df, **kwargs)
             self.init_after_tabularized(inputs, targets)
             self.filter_samples_after_init(kwargs["prediction_frequency"])
@@ -103,7 +103,7 @@ def __getitem__(self, index):
         """
         # TODO: Drop config_train from self!
         learning_rate = self.kwargs['config_train'].learning_rate
-        if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors']:
+        if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors'] or self.kwargs['config_country_holidays'] or self.kwargs['config_events']:
             sample = self.samples[index]
             targets = self.targets[index]
             meta = self.meta
@@ -364,7 +364,7 @@ def tabularize_univariate_datetime(
     #n_samples = len(df) - max_lags + 1 - n_forecasts
     #TODO
     learning_rate = config_train.learning_rate
-    if predict_mode or (learning_rate is None):
+    if predict_mode or (learning_rate is None) or config_lagged_regressors or config_country_holidays or config_events:
         n_samples = len(df) - max_lags + 1 - n_forecasts
     else:
         n_samples=1

From fca7adff3d035ab5e47e44f90faa4e3bbd83ef3e Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Thu, 21 Dec 2023 15:47:15 -0800
Subject: [PATCH 013/128] adjustes pytests

---
 tests/test_unit.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/test_unit.py b/tests/test_unit.py
index c07c98527..b8d6a26d8 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -76,6 +76,13 @@ def test_time_dataset():
     n_forecasts = 1
     valid_p = 0.2
     config_missing = configure.MissingDataHandling()
+    config_train = configure.Train(
+        learning_rate=LR,
+        epochs=EPOCHS,
+        batch_size=BATCH_SIZE,
+        loss_func="SmoothL1Loss",
+        optimizer="AdamW",
+    )
     df_train, df_val = df_utils.split_df(df_in, n_lags, n_forecasts, valid_p)
     # create a tabularized dataset from time series
     df, _, _ = df_utils.check_dataframe(df_train)
@@ -83,7 +90,7 @@ def test_time_dataset():
     df = df.drop("ID", axis=1)
     df = df_utils.normalize(df, global_data_params)
     inputs, targets = time_dataset.tabularize_univariate_datetime(
-        df, n_lags=n_lags, n_forecasts=n_forecasts, config_missing=config_missing
+        df, n_lags=n_lags, n_forecasts=n_forecasts, config_missing=config_missing, config_train=config_train
     )
     log.debug(
         "tabularized inputs: {}".format(

From 139a97f908564175c73cbab6ecd5e9d6787afdbd Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Thu, 21 Dec 2023 16:12:28 -0800
Subject: [PATCH 014/128] selective forecasting

---
 neuralprophet/time_dataset.py | 6 +++---
 tests/test_unit.py            | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 26b822990..7642eb06f 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -66,7 +66,7 @@ def __init__(self, df, name, **kwargs):
         self.kwargs = kwargs
 
         learning_rate = kwargs['config_train'].learning_rate
-        if kwargs['predict_mode'] or (learning_rate is None) or kwargs['config_lagged_regressors'] or kwargs['config_country_holidays'] or kwargs['config_events']:
+        if kwargs['predict_mode'] or (learning_rate is None) or kwargs['config_lagged_regressors'] or kwargs['config_country_holidays'] or kwargs['config_events'] or kwargs['prediction_frequency']:
             inputs, targets = tabularize_univariate_datetime(df, **kwargs)
             self.init_after_tabularized(inputs, targets)
             self.filter_samples_after_init(kwargs["prediction_frequency"])
@@ -103,7 +103,7 @@ def __getitem__(self, index):
         """
         # TODO: Drop config_train from self!
         learning_rate = self.kwargs['config_train'].learning_rate
-        if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors'] or self.kwargs['config_country_holidays'] or self.kwargs['config_events']:
+        if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors'] or self.kwargs['config_country_holidays'] or self.kwargs['config_events'] or self.kwargs['prediction_frequency']:
             sample = self.samples[index]
             targets = self.targets[index]
             meta = self.meta
@@ -364,7 +364,7 @@ def tabularize_univariate_datetime(
     #n_samples = len(df) - max_lags + 1 - n_forecasts
     #TODO
     learning_rate = config_train.learning_rate
-    if predict_mode or (learning_rate is None) or config_lagged_regressors or config_country_holidays or config_events:
+    if predict_mode or (learning_rate is None) or config_lagged_regressors or config_country_holidays or config_events or prediction_frequency:
         n_samples = len(df) - max_lags + 1 - n_forecasts
     else:
         n_samples=1
diff --git a/tests/test_unit.py b/tests/test_unit.py
index b8d6a26d8..6a3df35bb 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -814,7 +814,7 @@ def test_too_many_NaN():
         impute_missing=True, impute_linear=5, impute_rolling=5, drop_missing=False
     )
     config_train = configure.Train(
-        learning_rate=LR,
+        learning_rate=None,
         epochs=EPOCHS,
         batch_size=BATCH_SIZE,
         loss_func="SmoothL1Loss",
@@ -839,7 +839,7 @@ def test_too_many_NaN():
     df["ID"] = "__df__"
     # Check if ValueError is thrown, if NaN values remain after auto-imputing
     with pytest.raises(ValueError):
-        time_dataset.TimeDataset(df, "name", predict_mode=False, config_missing=config_missing, config_train=config_train, predict_steps=1, prediction_frequency=None)
+        time_dataset.TimeDataset(df, "name", predict_mode=False, config_missing=config_missing, config_lagged_regressors=None, config_country_holidays=None, config_events=None, config_train=config_train, predict_steps=1, prediction_frequency=None)
 
 
 def test_future_df_with_nan():

From 30aa303449de2dae12d8e44d884157010d252777 Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Thu, 21 Dec 2023 16:26:19 -0800
Subject: [PATCH 015/128] black

---
 neuralprophet/data/process.py |  2 +-
 neuralprophet/time_dataset.py | 40 ++++++++++++++++++++++++++---------
 tests/test_integration.py     |  3 ++-
 tests/test_unit.py            | 13 +++++++++++-
 4 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py
index c9190f21a..f3e44f9bb 100644
--- a/neuralprophet/data/process.py
+++ b/neuralprophet/data/process.py
@@ -623,5 +623,5 @@ def _create_dataset(model, df, predict_mode, prediction_frequency=None):
         config_regressors=model.config_regressors,
         config_missing=model.config_missing,
         prediction_frequency=prediction_frequency,
-        config_train=model.config_train
+        config_train=model.config_train,
     )
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 7642eb06f..f93e4e7a3 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -65,8 +65,15 @@ def __init__(self, df, name, **kwargs):
         ]
         self.kwargs = kwargs
 
-        learning_rate = kwargs['config_train'].learning_rate
-        if kwargs['predict_mode'] or (learning_rate is None) or kwargs['config_lagged_regressors'] or kwargs['config_country_holidays'] or kwargs['config_events'] or kwargs['prediction_frequency']:
+        learning_rate = kwargs["config_train"].learning_rate
+        if (
+            kwargs["predict_mode"]
+            or (learning_rate is None)
+            or kwargs["config_lagged_regressors"]
+            or kwargs["config_country_holidays"]
+            or kwargs["config_events"]
+            or kwargs["prediction_frequency"]
+        ):
             inputs, targets = tabularize_univariate_datetime(df, **kwargs)
             self.init_after_tabularized(inputs, targets)
             self.filter_samples_after_init(kwargs["prediction_frequency"])
@@ -102,15 +109,22 @@ def __getitem__(self, index):
             Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
         """
         # TODO: Drop config_train from self!
-        learning_rate = self.kwargs['config_train'].learning_rate
-        if self.kwargs['predict_mode'] or (learning_rate is None) or self.kwargs['config_lagged_regressors'] or self.kwargs['config_country_holidays'] or self.kwargs['config_events'] or self.kwargs['prediction_frequency']:
+        learning_rate = self.kwargs["config_train"].learning_rate
+        if (
+            self.kwargs["predict_mode"]
+            or (learning_rate is None)
+            or self.kwargs["config_lagged_regressors"]
+            or self.kwargs["config_country_holidays"]
+            or self.kwargs["config_events"]
+            or self.kwargs["prediction_frequency"]
+        ):
             sample = self.samples[index]
             targets = self.targets[index]
             meta = self.meta
             return sample, targets, meta
         else:
             start_idx = index
-            end_idx = start_idx + self.kwargs.get('n_lags') + self.kwargs.get('n_forecasts')
+            end_idx = start_idx + self.kwargs.get("n_lags") + self.kwargs.get("n_forecasts")
 
             df_slice = self.df.iloc[start_idx:end_idx]
 
@@ -139,7 +153,6 @@ def drop_nan_init(self, drop_missing):
                 number of steps to predict
         """
 
-
     def drop_nan_after_init(self, df, predict_steps, drop_missing):
         """Checks if inputs/targets contain any NaN values and drops them, if user opts to.
         Parameters
@@ -361,13 +374,20 @@ def tabularize_univariate_datetime(
             Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
     """
     max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
-    #n_samples = len(df) - max_lags + 1 - n_forecasts
-    #TODO
+    # n_samples = len(df) - max_lags + 1 - n_forecasts
+    # TODO
     learning_rate = config_train.learning_rate
-    if predict_mode or (learning_rate is None) or config_lagged_regressors or config_country_holidays or config_events or prediction_frequency:
+    if (
+        predict_mode
+        or (learning_rate is None)
+        or config_lagged_regressors
+        or config_country_holidays
+        or config_events
+        or prediction_frequency
+    ):
         n_samples = len(df) - max_lags + 1 - n_forecasts
     else:
-        n_samples=1
+        n_samples = 1
 
     # data is stored in OrderedDict
     inputs = OrderedDict({})
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 6d1799f64..cdb3eebda 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1706,6 +1706,7 @@ def test_unused_future_regressors():
     m.add_lagged_regressor("cost")
     m.fit(df, freq="D")
 
+
 def test_on_the_fly_sampling():
     start_date = "2022-10-16 00:00:00"
     end_date = "2022-12-30 00:00:00"
@@ -1715,5 +1716,5 @@ def test_on_the_fly_sampling():
     df.loc[3, "y"] = np.nan
 
     m = NeuralProphet(epochs=1, learning_rate=0.01)
-    m.fit(df, freq='H')
+    m.fit(df, freq="H")
     metrics = m.predict(df)
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 6a3df35bb..be4d7d55a 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -839,7 +839,18 @@ def test_too_many_NaN():
     df["ID"] = "__df__"
     # Check if ValueError is thrown, if NaN values remain after auto-imputing
     with pytest.raises(ValueError):
-        time_dataset.TimeDataset(df, "name", predict_mode=False, config_missing=config_missing, config_lagged_regressors=None, config_country_holidays=None, config_events=None, config_train=config_train, predict_steps=1, prediction_frequency=None)
+        time_dataset.TimeDataset(
+            df,
+            "name",
+            predict_mode=False,
+            config_missing=config_missing,
+            config_lagged_regressors=None,
+            config_country_holidays=None,
+            config_events=None,
+            config_train=config_train,
+            predict_steps=1,
+            prediction_frequency=None,
+        )
 
 
 def test_future_df_with_nan():

From 381c9129d1ac3b57857e8b5cf10f9857e0f7e897 Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Thu, 21 Dec 2023 16:28:30 -0800
Subject: [PATCH 016/128] ruff

---
 tests/test_integration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_integration.py b/tests/test_integration.py
index cdb3eebda..4876f502a 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1717,4 +1717,4 @@ def test_on_the_fly_sampling():
 
     m = NeuralProphet(epochs=1, learning_rate=0.01)
     m.fit(df, freq="H")
-    metrics = m.predict(df)
+    _ = m.predict(df)

From 660934c0696806a81ea9da73fd44c2d5840b9161 Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Thu, 4 Jan 2024 12:29:37 +0100
Subject: [PATCH 017/128] lagged_regressors

---
 neuralprophet/time_dataset.py | 14 +++++++++++++-
 tests/test_integration.py     |  1 -
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index f93e4e7a3..333bc5d9c 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -124,7 +124,19 @@ def __getitem__(self, index):
             return sample, targets, meta
         else:
             start_idx = index
-            end_idx = start_idx + self.kwargs.get("n_lags") + self.kwargs.get("n_forecasts")
+
+            # Lagged Regressors
+            if self.kwargs["config_lagged_regressors"]:
+                n_lagged_regressor_list = []
+                for dict_name, nested_dict in self.kwargs["config_lagged_regressors"].items():
+                    name_of_nested_dict = dict_name
+                    n_lagged_regressor = self.kwargs["config_lagged_regressors"][name_of_nested_dict].n_lags
+                    n_lagged_regressor_list.append(n_lagged_regressor)
+                max_lag = max(self.kwargs["n_lags"], *n_lagged_regressor_list)
+                end_idx = start_idx + max_lag + self.kwargs.get("n_forecasts")
+
+            else:
+                end_idx = start_idx + self.kwargs.get("n_lags") + self.kwargs.get("n_forecasts")
 
             df_slice = self.df.iloc[start_idx:end_idx]
 
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 4876f502a..730493828 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1713,7 +1713,6 @@ def test_on_the_fly_sampling():
     date_range = pd.date_range(start=start_date, end=end_date, freq="D")
     y = np.random.randint(0, 20, size=(len(date_range),))
     df = pd.DataFrame({"ds": date_range, "y": y})
-    df.loc[3, "y"] = np.nan
 
     m = NeuralProphet(epochs=1, learning_rate=0.01)
     m.fit(df, freq="H")

From 51fa0a65c982923596c74192a8644071899a8f56 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 12 Jan 2024 11:58:38 -0800
Subject: [PATCH 018/128] Note down df path to TimeDataset

---
 neuralprophet/time_dataset.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 333bc5d9c..25493d4c7 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -52,7 +52,34 @@ def __init__(self, df, name, **kwargs):
             **kwargs : dict
                 Identical to :meth:`tabularize_univariate_datetime`
         """
+        self.df_original = df
         self.name = name
+
+        # Currently done to df before it arrives here:
+        # - fit calls prep_or_copy_df, _check_dataframe, and _handle_missing_data, passes to _train
+        # - _train calls prep_or_copy_df, then passes to init_train_loader, which returns the train_loader
+        # - init_train_loader calls prep_or_copy_df, _normalize, _create_dataset (returns TimeDataset), returns dataset wrapped in DataLoader
+        # _create_dataset calls prep_or_copy_df, then returns GlobalTimeDataset
+
+        # Filter missing samples and prediction frequency (does not actually drop, but index)
+        # filter samples
+        # drop nan
+
+        # Create index mapping of sample index to df index
+
+        # Preprocessing of features (added to df_original)
+        # events and holidays
+
+        # TBD
+        # meta
+
+        # Outcome after a call to init:
+        #
+
+        # Things that will not be done in init, but on the fly:
+        # tabularize all features for each sample, return as input, targets
+
+        #### OLD
         self.length = None
         self.inputs = OrderedDict({})
         self.targets = None

From da74f87fc4646f03d51b4788dbce0a2725987eb2 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 12 Jan 2024 12:13:38 -0800
Subject: [PATCH 019/128] complete notes on TimeDataset, move meta

---
 neuralprophet/time_dataset.py | 43 ++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 25493d4c7..6b493421b 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -52,38 +52,46 @@ def __init__(self, df, name, **kwargs):
             **kwargs : dict
                 Identical to :meth:`tabularize_univariate_datetime`
         """
-        self.df_original = df
+        self.df = df
         self.name = name
+        self.meta = OrderedDict({})
+        self.meta["df_name"] = self.name
 
         # Currently done to df before it arrives here:
-        # - fit calls prep_or_copy_df, _check_dataframe, and _handle_missing_data, passes to _train
-        # - _train calls prep_or_copy_df, then passes to init_train_loader, which returns the train_loader
-        # - init_train_loader calls prep_or_copy_df, _normalize, _create_dataset (returns TimeDataset), returns dataset wrapped in DataLoader
-        # _create_dataset calls prep_or_copy_df, then returns GlobalTimeDataset
-
-        # Filter missing samples and prediction frequency (does not actually drop, but index)
-        # filter samples
-        # drop nan
+        # -> fit calls prep_or_copy_df, _check_dataframe, and _handle_missing_data, passes to _train
+        # -> _train calls prep_or_copy_df, then passes to init_train_loader, which returns the train_loader
+        # -> init_train_loader calls prep_or_copy_df, _normalize, _create_dataset (returns TimeDataset), returns dataset wrapped in DataLoader
+        # ->_create_dataset calls prep_or_copy_df, then returns GlobalTimeDataset
 
         # Create index mapping of sample index to df index
+        # - Filter missing samples and prediction frequency (does not actually drop, but index)
+        # -- filter samples
+        # -- drop nan
+        # - Indexing:
+        # -- Note, outer indexing connected to self.length
 
-        # Preprocessing of features (added to df_original)
-        # events and holidays
+        # Preprocessing of features (added to df)
+        # - events and holidays
 
-        # TBD
-        # meta
+        # TODO:
+        # - init_after_tabularized: What must happen here, others in __getitem__?
+        # - define what happens in __getitem__
 
-        # Outcome after a call to init:
-        #
+        # Future TBD
+        # - integration of preprocessing steps happening outside?
 
-        # Things that will not be done in init, but on the fly:
+        # Outcome after a call to init (summary):
+        # - add events and holidays columns to df
+        # - calculated the number of usable samples (accounting for nan and filters)
+        # - creates mapping of sample index to df index
+
+        # Done later on the fly when calling __getitem__:
         # tabularize all features for each sample, return as input, targets
 
         #### OLD
         self.length = None
         self.inputs = OrderedDict({})
         self.targets = None
-        self.meta = OrderedDict({})
         self.two_level_inputs = [
             "seasonalities",
             "covariates",
@@ -300,7 +308,6 @@ def init_after_tabularized(self, inputs, targets=None):
                 else:
                     self.inputs[key] = torch.from_numpy(data).type(inputs_dtype[key])
         self.targets = torch.from_numpy(targets).type(targets_dtype).unsqueeze(dim=2)
-        self.meta["df_name"] = self.name
         self.samples = self._split_nested_dict(self.inputs)
 
     def filter_samples_after_init(

From 97fbe0799bf9d50989aab6874034660563d1d745 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Mon, 15 Jan 2024 16:57:49 -0800
Subject: [PATCH 020/128] Big rewrite with real and pseudocode

---
 neuralprophet/time_dataset.py | 228 ++++++++++++++++------------------
 1 file changed, 105 insertions(+), 123 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 6b493421b..960a4fca2 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -52,69 +52,36 @@ def __init__(self, df, name, **kwargs):
             **kwargs : dict
                 Identical to :meth:`tabularize_univariate_datetime`
         """
-        self.df = df
-        self.name = name
-        self.meta = OrderedDict({})
-        self.meta["df_name"] = self.name
+        ## Outcome after a call to init (summary):
+        # - add events and holidays columns to df
+        # - calculated the number of usable samples (accounting for nan and filters)
+        # - creates mapping of sample index to df index
 
+        ## Context Notes
         # Currently done to df before it arrives here:
         # -> fit calls prep_or_copy_df, _check_dataframe, and _handle_missing_data, passes to _train
         # -> _train calls prep_or_copy_df, then passes to init_train_loader, which returns the train_loader
         # -> init_train_loader calls prep_or_copy_df, _normalize, _create_dataset (returns TimeDataset), returns dataset wrapped in DataLoader
         # ->_create_dataset calls prep_or_copy_df, then returns GlobalTimeDataset
+        # Future TODO: integrate these preprocessing steps happening outside?
 
-        # Create index mapping of sample index to df index
-        # - Filter missing samples and prediction frequency (does not actually drop, but index)
-        # -- filter samples
-        # -- drop nan
-        # - Indexing:
-        # -- Note, outer indexing connected to self.length
-
-        # Preprocessing of features (added to df)
-        # - events and holidays
-
-        # TODO:
-        # - init_after_tabularized: What must happen here, others in __getitem__?
-        # - define what happens in __getitem__
-
-        # Future TBD
-        # - integration of preprocessing steps happening outside?
-
-        # Outcome after a call to init (summary):
-        # - add events and holidays columns to df
-        # - calculated the number of usable samples (accounting for nan and filters)
-        # - creates mapping of sample index to df index
+        self.df = df
+        self.name = name
+        self.meta = OrderedDict({})
+        self.meta["df_name"] = self.name
 
-        # Done later on the fly when calling __getitem__:
-        # tabularize all features for each sample, return as input, targets
+        # TODO: Preprocessing of features (added to self.df)
+        # - events and holidays: convert date-time occurence dictionary to a column of values in the self.df
+        # - These will then be later tabularized in __get_item___
 
-        #### OLD
-        self.length = None
-        self.inputs = OrderedDict({})
-        self.targets = None
-        self.two_level_inputs = [
-            "seasonalities",
-            "covariates",
-            "events",
-            "regressors",
-        ]
-        self.kwargs = kwargs
-
-        learning_rate = kwargs["config_train"].learning_rate
-        if (
-            kwargs["predict_mode"]
-            or (learning_rate is None)
-            or kwargs["config_lagged_regressors"]
-            or kwargs["config_country_holidays"]
-            or kwargs["config_events"]
-            or kwargs["prediction_frequency"]
-        ):
-            inputs, targets = tabularize_univariate_datetime(df, **kwargs)
-            self.init_after_tabularized(inputs, targets)
-            self.filter_samples_after_init(kwargs["prediction_frequency"])
-            self.drop_nan_after_init(df, kwargs["predict_steps"], kwargs["config_missing"].drop_missing)
-        else:
-            self.df = df
+        ## TODO Create index mapping of sample index to df index
+        # - Filter missing samples and prediction frequency (does not actually drop, but creates indexmapping)
+        # -- filter samples
+        # analogous to `self.filter_samples_after_init(self.kwargs["prediction_frequency"])`
+        # -- drop nan
+        # analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
+        # save the created mapping to self.sample2index_map (used by self.sample2index_map)
+        self.sample2index_map, self.length = self.create_sample2index_map(df)
 
     def __getitem__(self, index):
         """Overrides parent class method to get an item at index.
@@ -142,54 +109,61 @@ def __getitem__(self, index):
                 each with features (np.array, float) of dims: (num_samples, n_lags)
         np.array, float
             Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
+        OrderedDict
+            Meta information: static information about the local dataset
         """
-        # TODO: Drop config_train from self!
-        learning_rate = self.kwargs["config_train"].learning_rate
-        if (
-            self.kwargs["predict_mode"]
-            or (learning_rate is None)
-            or self.kwargs["config_lagged_regressors"]
-            or self.kwargs["config_country_holidays"]
-            or self.kwargs["config_events"]
-            or self.kwargs["prediction_frequency"]
-        ):
-            sample = self.samples[index]
-            targets = self.targets[index]
-            meta = self.meta
-            return sample, targets, meta
-        else:
-            start_idx = index
-
-            # Lagged Regressors
-            if self.kwargs["config_lagged_regressors"]:
-                n_lagged_regressor_list = []
-                for dict_name, nested_dict in self.kwargs["config_lagged_regressors"].items():
-                    name_of_nested_dict = dict_name
-                    n_lagged_regressor = self.kwargs["config_lagged_regressors"][name_of_nested_dict].n_lags
-                    n_lagged_regressor_list.append(n_lagged_regressor)
-                max_lag = max(self.kwargs["n_lags"], *n_lagged_regressor_list)
-                end_idx = start_idx + max_lag + self.kwargs.get("n_forecasts")
-
-            else:
-                end_idx = start_idx + self.kwargs.get("n_lags") + self.kwargs.get("n_forecasts")
-
-            df_slice = self.df.iloc[start_idx:end_idx]
-
-            # Functions
-            inputs, targets = tabularize_univariate_datetime(df_slice, **self.kwargs)
-            self.init_after_tabularized(inputs, targets)
-            self.filter_samples_after_init(self.kwargs["prediction_frequency"])
-            self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
+        # Convert dataset sample index to valid dataframe positional index
+        # - sample index is any index up to len(dataset)
+        # - dataframe positional index is given by position of first target in dataframe for given sample index
+        df_index = self.sample_index_to_df_index(index)
 
-            sample = self.samples[index]
-            targets = self.targets[index]
-            meta = self.meta
-            return sample, targets, meta
+        # Tabularize - extract features from dataframe at given target index position
+        inputs, target = tabularize_univariate_datetime_single_index(self.df, target_index=df_index, **self.kwargs)
+        sample, target = self.format_sample(inputs, target)
+        return sample, target, self.meta
 
     def __len__(self):
         """Overrides Parent class method to get data length."""
         return self.length
 
+    def create_sample2index_map(self, df):
+        """creates mapping of sample index to df index.
+        Create index mapping of sample index to df index
+        Filter missing samples and prediction frequency (does not actually drop, but creates indexmapping)
+        -- filter samples
+            analogous to `self.filter_samples_after_init(self.kwargs["prediction_frequency"])`
+        -- drop nan
+            analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
+        save the created mapping to sample2index_map
+        """
+        # Prediction Frequency
+        prediction_frequency_mask = self.create_prediction_frequency_filter_mask(
+            self, self.kwargs["prediction_frequency"]
+        )
+
+        # TODO: limit start end range
+        # Pseudo code: concat[np.zeros(n_lags), np.ones(n_samples - n_lags -n_forecasts +1),np.zeros(n_forecasts-1)]
+        start_end_target_mask = np.ones(len(df))
+
+        # TODO Create index mapping of sample index to df index
+        # - Filter missing samples (does not actually drop, but creates indexmapping)
+        # -- drop nan analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
+        nan_mask = self.create_nan_mask(df)  # vector of all ones, except nans are zeros
+
+        # TODO: Combine
+        # Psedocode: valid_sample = elementwise_and_operator(prediction_frequency_mask & start_end_target_mask & nan_mask)
+        # num_samples = sum(valid_sample)
+        # sample2index_map = convert valid_sample to list of the positinal index of all true/one entries
+        #   e.g. [0,0,1,1,0,1,0] -> [2,3,5]
+        sample2index_map = np.ones(len(df))
+
+        return sample2index_map, num_samples
+
+    def sample_index_to_df_index(self, sample_index):
+        """Translates a single outer sample to dataframe index"""
+        # Will need more sophisticated mapping for GlobalTimeDataset
+        return self.sample2index_map[sample_index]
+
     def drop_nan_init(self, drop_missing):
         """Checks if inputs/targets contain any NaN values and drops them, if user opts to.
         Parameters
@@ -200,7 +174,7 @@ def drop_nan_init(self, drop_missing):
                 number of steps to predict
         """
 
-    def drop_nan_after_init(self, df, predict_steps, drop_missing):
+    def create_nan_mask(self, df, predict_steps, drop_missing):
         """Checks if inputs/targets contain any NaN values and drops them, if user opts to.
         Parameters
         ----------
@@ -209,6 +183,8 @@ def drop_nan_after_init(self, df, predict_steps, drop_missing):
             predict_steps : int
                 number of steps to predict
         """
+        # TODO: rewrite to return mask instead of filtering df.
+
         nan_idx = []
         # NaNs in inputs
         for key, data in self.inputs.items():
@@ -266,8 +242,8 @@ def split_dict(inputs, index):
         length = next(iter(inputs.values())).shape[0]
         return [split_dict(inputs, i) for i in range(length)]
 
-    def init_after_tabularized(self, inputs, targets=None):
-        """Create Timedataset with data.
+    def format_sample(self, inputs, targets=None):
+        """Convert tabularizes sample to correct formats.
         Parameters
         ----------
             inputs : ordered dict
@@ -275,6 +251,7 @@ def init_after_tabularized(self, inputs, targets=None):
             targets : np.array, float
                 Identical to returns from :meth:`tabularize_univariate_datetime`
         """
+        sample_input = OrderedDict({})
         inputs_dtype = {
             "time": torch.float,
             "timestamps": np.datetime64,
@@ -285,11 +262,12 @@ def init_after_tabularized(self, inputs, targets=None):
             "regressors": torch.float,
         }
         targets_dtype = torch.float
-        self.length = inputs["time"].shape[0]
+
+        sample_target = torch.from_numpy(targets).type(targets_dtype).unsqueeze(dim=2)
 
         for key, data in inputs.items():
             if key in self.two_level_inputs:
-                self.inputs[key] = OrderedDict({})
+                sample_input[key] = OrderedDict({})
                 for name, features in data.items():
                     if features.dtype != np.float32:
                         features = features.astype(np.float32, copy=False)
@@ -297,24 +275,29 @@ def init_after_tabularized(self, inputs, targets=None):
                     tensor = torch.from_numpy(features)
 
                     if tensor.dtype != inputs_dtype[key]:
-                        self.inputs[key][name] = tensor.to(
+                        sample_input[key][name] = tensor.to(
                             dtype=inputs_dtype[key]
                         )  # this can probably be removed, but was included in the previous code
                     else:
-                        self.inputs[key][name] = tensor
+                        sample_input[key][name] = tensor
             else:
                 if key == "timestamps":
-                    self.inputs[key] = data
+                    sample_input[key] = data
                 else:
-                    self.inputs[key] = torch.from_numpy(data).type(inputs_dtype[key])
-        self.targets = torch.from_numpy(targets).type(targets_dtype).unsqueeze(dim=2)
-        self.samples = self._split_nested_dict(self.inputs)
+                    sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
+        sample_input = self._split_nested_dict(sample_input)
+
+        ## Not sure if this needs be done here anymore?
+        # Exact timestamps are not needed anymore
+        sample_input.pop("timestamps")
 
-    def filter_samples_after_init(
+        return sample_input, sample_target
+
+    def create_prediction_frequency_filter_mask(
         self,
         prediction_frequency=None,
     ):
-        """Filters samples from the dataset based on the forecast frequency.
+        """Filters prediction target index from df based on the forecast frequency setting.
         Parameters
         ----------
             prediction_frequency : int
@@ -323,40 +306,37 @@ def filter_samples_after_init(
             ----
             E.g. if prediction_frequency=7, forecasts are only made on every 7th step (once in a week in case of daily
             resolution).
+
+        Returns mask where prediction target start indexes to be included are ones, and the rest zeros.
         """
         if prediction_frequency is None or prediction_frequency == 1:
             return
         # Only the first target timestamp is of interest for filtering
-        timestamps = pd.to_datetime([sample["timestamps"][0] for sample in self.samples])
+        timestamps = pd.to_datetime([x["timestamps"][0] for x in self.df])  # This may need adjusting
         masks = []
         for key, value in prediction_frequency.items():
             if key == "daily-hour":
-                mask = timestamps.hour == value + 1  # because prediction starts one step after origin
+                mask = timestamps.hour == value
             elif key == "weekly-day":
-                mask = timestamps.dayofweek == value + 1
+                mask = timestamps.dayofweek == value
             elif key == "monthly-day":
-                mask = timestamps.day == value + 1
+                mask = timestamps.day == value
             elif key == "yearly-month":
-                mask = timestamps.month == value + 1
+                mask = timestamps.month == value
             elif key == "hourly-minute":
-                mask = timestamps.minute == value + 1
+                mask = timestamps.minute == value
             else:
                 raise ValueError(f"Invalid prediction frequency: {key}")
             masks.append(mask)
         mask = np.ones((len(timestamps),), dtype=bool)
         for m in masks:
             mask = mask & m
-        self.samples = [self.samples[i] for i in range(len(self.samples)) if mask[i]]
-
-        # Exact timestamps are not needed anymore
-        self.inputs.pop("timestamps")
-        for sample in self.samples:
-            sample.pop("timestamps")
-        self.length = len(self.samples)
+        return mask
 
 
-def tabularize_univariate_datetime(
+def tabularize_univariate_datetime_single_index(
     df,
+    target_index,
     predict_mode=False,
     n_lags=0,
     n_forecasts=1,
@@ -421,7 +401,9 @@ def tabularize_univariate_datetime(
     """
     max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
     # n_samples = len(df) - max_lags + 1 - n_forecasts
-    # TODO
+
+    # TODO convert to single sample version
+
     learning_rate = config_train.learning_rate
     if (
         predict_mode

From bdf529c56f11fcf0dcf92c9cad9e72e1fb711a1b Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 17 Jan 2024 15:54:05 -0800
Subject: [PATCH 021/128] create_target_start_end_mask

---
 neuralprophet/time_dataset.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 960a4fca2..58249913a 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -69,6 +69,7 @@ def __init__(self, df, name, **kwargs):
         self.name = name
         self.meta = OrderedDict({})
         self.meta["df_name"] = self.name
+        self.config_args = kwargs
 
         # TODO: Preprocessing of features (added to self.df)
         # - events and holidays: convert date-time occurence dictionary to a column of values in the self.df
@@ -140,10 +141,8 @@ def create_sample2index_map(self, df):
         prediction_frequency_mask = self.create_prediction_frequency_filter_mask(
             self, self.kwargs["prediction_frequency"]
         )
-
-        # TODO: limit start end range
-        # Pseudo code: concat[np.zeros(n_lags), np.ones(n_samples - n_lags -n_forecasts +1),np.zeros(n_forecasts-1)]
-        start_end_target_mask = np.ones(len(df))
+        # Limit target range due to input lags and number of forecasts
+        target_start_end_mask = self.create_target_start_end_mask()
 
         # TODO Create index mapping of sample index to df index
         # - Filter missing samples (does not actually drop, but creates indexmapping)
@@ -151,7 +150,7 @@ def create_sample2index_map(self, df):
         nan_mask = self.create_nan_mask(df)  # vector of all ones, except nans are zeros
 
         # TODO: Combine
-        # Psedocode: valid_sample = elementwise_and_operator(prediction_frequency_mask & start_end_target_mask & nan_mask)
+        # Psedocode: valid_sample = elementwise_and_operator(prediction_frequency_mask & target_start_end_mask & nan_mask)
         # num_samples = sum(valid_sample)
         # sample2index_map = convert valid_sample to list of the positinal index of all true/one entries
         #   e.g. [0,0,1,1,0,1,0] -> [2,3,5]
@@ -174,6 +173,17 @@ def drop_nan_init(self, drop_missing):
                 number of steps to predict
         """
 
+    def create_target_start_end_mask(self, df):
+        """Creates binary mask for valid targets based on limiting input lags and forecast targets."""
+        max_lags = get_max_num_lags(self.config_args["config_lagged_regressors"], self.config_args["n_lags"])
+        n_forecasts = self.config_args["n_forecasts"]
+        length = len(df)
+        start_pad = np.zeros(max_lags)
+        valid_targets = np.ones(length - max_lags - n_forecasts + 1)
+        end_pad = np.zeros(n_forecasts - 1)
+        target_start_end_mask = np.concatenate((start_pad, valid_targets, end_pad), axis=None)
+        return target_start_end_mask
+
     def create_nan_mask(self, df, predict_steps, drop_missing):
         """Checks if inputs/targets contain any NaN values and drops them, if user opts to.
         Parameters

From c814115da5c60d8467d0cfa999ac9bbe90e77f58 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 17 Jan 2024 16:02:14 -0800
Subject: [PATCH 022/128] boolean mask

---
 neuralprophet/time_dataset.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 58249913a..7fc2ac47d 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -174,13 +174,13 @@ def drop_nan_init(self, drop_missing):
         """
 
     def create_target_start_end_mask(self, df):
-        """Creates binary mask for valid targets based on limiting input lags and forecast targets."""
+        """Creates a boolean mask for valid targets based on limiting input lags and forecast targets."""
         max_lags = get_max_num_lags(self.config_args["config_lagged_regressors"], self.config_args["n_lags"])
         n_forecasts = self.config_args["n_forecasts"]
         length = len(df)
-        start_pad = np.zeros(max_lags)
-        valid_targets = np.ones(length - max_lags - n_forecasts + 1)
-        end_pad = np.zeros(n_forecasts - 1)
+        start_pad = np.zeros(max_lags, dtype=bool)
+        valid_targets = np.ones(length - max_lags - n_forecasts + 1, dtype=bool)
+        end_pad = np.zeros(n_forecasts - 1, dtype=bool)
         target_start_end_mask = np.concatenate((start_pad, valid_targets, end_pad), axis=None)
         return target_start_end_mask
 

From 711941992442ae6ebf6d26a28529952a6dea0eab Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 17 Jan 2024 16:18:56 -0800
Subject: [PATCH 023/128] combine masks into map

---
 neuralprophet/time_dataset.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 7fc2ac47d..00e33c363 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -139,22 +139,31 @@ def create_sample2index_map(self, df):
         """
         # Prediction Frequency
         prediction_frequency_mask = self.create_prediction_frequency_filter_mask(
-            self, self.kwargs["prediction_frequency"]
+            self, self.config_args["prediction_frequency"]
         )
         # Limit target range due to input lags and number of forecasts
-        target_start_end_mask = self.create_target_start_end_mask()
+        df_length = len(df)
+        max_lags = get_max_num_lags(self.config_args["config_lagged_regressors"], self.config_args["n_lags"])
+        n_forecasts = self.config_args["n_forecasts"]
+        target_start_end_mask = self.create_target_start_end_mask(
+            df_length=df_length, max_lags=max_lags, n_forecasts=n_forecasts
+        )
 
         # TODO Create index mapping of sample index to df index
         # - Filter missing samples (does not actually drop, but creates indexmapping)
         # -- drop nan analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
         nan_mask = self.create_nan_mask(df)  # vector of all ones, except nans are zeros
 
-        # TODO: Combine
-        # Psedocode: valid_sample = elementwise_and_operator(prediction_frequency_mask & target_start_end_mask & nan_mask)
-        # num_samples = sum(valid_sample)
-        # sample2index_map = convert valid_sample to list of the positinal index of all true/one entries
+        # Combine masks
+        mask = np.logical_and(prediction_frequency_mask, target_start_end_mask)
+        valid_sample_mask = np.logical_and(mask, nan_mask)
+        # Convert boolean valid_sample to list of the positinal index of all true/one entries
         #   e.g. [0,0,1,1,0,1,0] -> [2,3,5]
-        sample2index_map = np.ones(len(df))
+        index_range = np.arange(0, df_length)
+        sample2index_map = index_range[valid_sample_mask]
+
+        num_samples = np.sum(valid_sample_mask)
+        assert len(sample2index_map) == num_samples
 
         return sample2index_map, num_samples
 
@@ -173,13 +182,10 @@ def drop_nan_init(self, drop_missing):
                 number of steps to predict
         """
 
-    def create_target_start_end_mask(self, df):
+    def create_target_start_end_mask(self, df_length, max_lags, n_forecasts):
         """Creates a boolean mask for valid targets based on limiting input lags and forecast targets."""
-        max_lags = get_max_num_lags(self.config_args["config_lagged_regressors"], self.config_args["n_lags"])
-        n_forecasts = self.config_args["n_forecasts"]
-        length = len(df)
         start_pad = np.zeros(max_lags, dtype=bool)
-        valid_targets = np.ones(length - max_lags - n_forecasts + 1, dtype=bool)
+        valid_targets = np.ones(df_length - max_lags - n_forecasts + 1, dtype=bool)
         end_pad = np.zeros(n_forecasts - 1, dtype=bool)
         target_start_end_mask = np.concatenate((start_pad, valid_targets, end_pad), axis=None)
         return target_start_end_mask
@@ -317,7 +323,7 @@ def create_prediction_frequency_filter_mask(
             E.g. if prediction_frequency=7, forecasts are only made on every 7th step (once in a week in case of daily
             resolution).
 
-        Returns mask where prediction target start indexes to be included are ones, and the rest zeros.
+        Returns boolean mask where prediction target start indexes to be included are True, and the rest False.
         """
         if prediction_frequency is None or prediction_frequency == 1:
             return

From 66bb911c4c2570bf2ac9cee5db7c4652660764da Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 17 Jan 2024 16:27:05 -0800
Subject: [PATCH 024/128] notes for nan check

---
 neuralprophet/time_dataset.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 00e33c363..b238432a4 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -141,6 +141,7 @@ def create_sample2index_map(self, df):
         prediction_frequency_mask = self.create_prediction_frequency_filter_mask(
             self, self.config_args["prediction_frequency"]
         )
+
         # Limit target range due to input lags and number of forecasts
         df_length = len(df)
         max_lags = get_max_num_lags(self.config_args["config_lagged_regressors"], self.config_args["n_lags"])
@@ -152,7 +153,10 @@ def create_sample2index_map(self, df):
         # TODO Create index mapping of sample index to df index
         # - Filter missing samples (does not actually drop, but creates indexmapping)
         # -- drop nan analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
-        nan_mask = self.create_nan_mask(df)  # vector of all ones, except nans are zeros
+        # Note: needs to also account for NANs in lagged inputs or in n_forecasts, not just first target.
+        # Implement a convolutional filter for targets and each lagged regressor.
+        # Also account for future regressors and events.
+        nan_mask = self.create_nan_mask(df)  # boolean array where NAN are False
 
         # Combine masks
         mask = np.logical_and(prediction_frequency_mask, target_start_end_mask)

From fe382c1f722e90bec27a25b7a51c01976ea58027 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 17 Jan 2024 16:30:54 -0800
Subject: [PATCH 025/128] bypass NAN filter

---
 neuralprophet/time_dataset.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index b238432a4..7478f4a04 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -203,8 +203,10 @@ def create_nan_mask(self, df, predict_steps, drop_missing):
             predict_steps : int
                 number of steps to predict
         """
-        # TODO: rewrite to return mask instead of filtering df.
+        # TODO implement actual filtering
+        return np.ones(len(df), dtype=bool)
 
+        # TODO: rewrite to return mask instead of filtering df.
         nan_idx = []
         # NaNs in inputs
         for key, data in self.inputs.items():

From 8ec4f9f43eeb2d5670f5bd2c1a8b00f6b55c3a55 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 18 Jan 2024 16:19:04 -0800
Subject: [PATCH 026/128] rework index to point at prediction origin, not first
 forecast.

---
 neuralprophet/time_dataset.py | 291 ++++++++++++++++++----------------
 1 file changed, 150 insertions(+), 141 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 7478f4a04..9abc38a4a 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -63,7 +63,11 @@ def __init__(self, df, name, **kwargs):
         # -> _train calls prep_or_copy_df, then passes to init_train_loader, which returns the train_loader
         # -> init_train_loader calls prep_or_copy_df, _normalize, _create_dataset (returns TimeDataset), returns dataset wrapped in DataLoader
         # ->_create_dataset calls prep_or_copy_df, then returns GlobalTimeDataset
-        # Future TODO: integrate these preprocessing steps happening outside?
+        # Future TODO: integrate some of these preprocessing steps happening outside?
+
+        # TODO: Preprocessing of features (added to self.df)
+        # - events and holidays: convert date-time occurence dictionary to a column of values in the self.df
+        # - These will then be later tabularized in __get_item___
 
         self.df = df
         self.name = name
@@ -71,17 +75,6 @@ def __init__(self, df, name, **kwargs):
         self.meta["df_name"] = self.name
         self.config_args = kwargs
 
-        # TODO: Preprocessing of features (added to self.df)
-        # - events and holidays: convert date-time occurence dictionary to a column of values in the self.df
-        # - These will then be later tabularized in __get_item___
-
-        ## TODO Create index mapping of sample index to df index
-        # - Filter missing samples and prediction frequency (does not actually drop, but creates indexmapping)
-        # -- filter samples
-        # analogous to `self.filter_samples_after_init(self.kwargs["prediction_frequency"])`
-        # -- drop nan
-        # analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
-        # save the created mapping to self.sample2index_map (used by self.sample2index_map)
         self.sample2index_map, self.length = self.create_sample2index_map(df)
 
     def __getitem__(self, index):
@@ -119,7 +112,7 @@ def __getitem__(self, index):
         df_index = self.sample_index_to_df_index(index)
 
         # Tabularize - extract features from dataframe at given target index position
-        inputs, target = tabularize_univariate_datetime_single_index(self.df, target_index=df_index, **self.kwargs)
+        inputs, target = tabularize_univariate_datetime_single_index(self.df, target_index=df_index, **self.config_args)
         sample, target = self.format_sample(inputs, target)
         return sample, target, self.meta
 
@@ -127,75 +120,121 @@ def __len__(self):
         """Overrides Parent class method to get data length."""
         return self.length
 
+    def sample_index_to_df_index(self, sample_index):
+        """Translates a single outer sample to dataframe index"""
+        # Will need more sophisticated mapping for GlobalTimeDataset
+        return self.sample2index_map[sample_index]
+
     def create_sample2index_map(self, df):
-        """creates mapping of sample index to df index.
-        Create index mapping of sample index to df index
-        Filter missing samples and prediction frequency (does not actually drop, but creates indexmapping)
-        -- filter samples
-            analogous to `self.filter_samples_after_init(self.kwargs["prediction_frequency"])`
-        -- drop nan
-            analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
-        save the created mapping to sample2index_map
+        """creates mapping of sample index to corresponding df index at prediction origin.
+        (prediction origin: last observation before forecast / future period starts).
+        return created mapping to sample2index_map and number of samples.
         """
-        # Prediction Frequency
-        prediction_frequency_mask = self.create_prediction_frequency_filter_mask(
-            self, self.config_args["prediction_frequency"]
-        )
 
         # Limit target range due to input lags and number of forecasts
         df_length = len(df)
         max_lags = get_max_num_lags(self.config_args["config_lagged_regressors"], self.config_args["n_lags"])
         n_forecasts = self.config_args["n_forecasts"]
-        target_start_end_mask = self.create_target_start_end_mask(
+        origin_start_end_mask = self.create_origin_start_end_mask(
             df_length=df_length, max_lags=max_lags, n_forecasts=n_forecasts
         )
 
+        # Prediction Frequency
+        # Filter missing samples and prediction frequency (does not actually drop, but creates indexmapping)
+        # analogous to `self.filter_samples_after_init(
+        # self.kwargs["prediction_frequency"])`
+        prediction_frequency_mask = self.create_prediction_frequency_filter_mask(
+            self, df, self.config_args["prediction_frequency"]
+        )
+
         # TODO Create index mapping of sample index to df index
-        # - Filter missing samples (does not actually drop, but creates indexmapping)
-        # -- drop nan analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
-        # Note: needs to also account for NANs in lagged inputs or in n_forecasts, not just first target.
-        # Implement a convolutional filter for targets and each lagged regressor.
-        # Also account for future regressors and events.
+        # Drop nan analogous to `self.drop_nan_after_init(
+        # self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
         nan_mask = self.create_nan_mask(df)  # boolean array where NAN are False
 
         # Combine masks
-        mask = np.logical_and(prediction_frequency_mask, target_start_end_mask)
+        mask = np.logical_and(prediction_frequency_mask, origin_start_end_mask)
         valid_sample_mask = np.logical_and(mask, nan_mask)
         # Convert boolean valid_sample to list of the positinal index of all true/one entries
         #   e.g. [0,0,1,1,0,1,0] -> [2,3,5]
         index_range = np.arange(0, df_length)
-        sample2index_map = index_range[valid_sample_mask]
+        sample_index_2_df_origin_index = index_range[valid_sample_mask]
 
         num_samples = np.sum(valid_sample_mask)
-        assert len(sample2index_map) == num_samples
+        assert len(sample_index_2_df_origin_index) == num_samples
+
+        return sample_index_2_df_origin_index, num_samples
+
+    def create_origin_start_end_mask(self, df_length, max_lags, n_forecasts):
+        """Creates a boolean mask for valid prediction origin positions.
+        (based on limiting input lags and forecast targets at start and end of df)"""
+        if max_lags >= 1:
+            start_pad = np.zeros(max_lags - 1, dtype=bool)
+            valid_targets = np.ones(df_length - max_lags - n_forecasts + 1, dtype=bool)
+            end_pad = np.zeros(n_forecasts, dtype=bool)
+            target_start_end_mask = np.concatenate((start_pad, valid_targets, end_pad), axis=None)
+        elif max_lags == 0 and n_forecasts == 1:
+            # without lags, forecast targets and origins are identical
+            target_start_end_mask = np.ones(df_length, dtype=bool)
+        else:
+            raise ValueError(f"max_lags value of {max_lags} not supported for n_forecasts {n_forecasts}.")
+        return target_start_end_mask
 
-        return sample2index_map, num_samples
+    def create_prediction_frequency_filter_mask(
+        self,
+        df: pd.DataFrame,
+        prediction_frequency=None,
+    ):
+        """Filters prediction origin index from df based on the forecast frequency setting.
 
-    def sample_index_to_df_index(self, sample_index):
-        """Translates a single outer sample to dataframe index"""
-        # Will need more sophisticated mapping for GlobalTimeDataset
-        return self.sample2index_map[sample_index]
+        Filter based on timestamp last lag before targets start
 
-    def drop_nan_init(self, drop_missing):
-        """Checks if inputs/targets contain any NaN values and drops them, if user opts to.
         Parameters
         ----------
-            drop_missing : bool
-                whether to automatically drop missing samples from the data
-            predict_steps : int
-                number of steps to predict
+            prediction_frequency : int
+                periodic interval in which forecasts should be made.
+            Note
+            ----
+            E.g. if prediction_frequency=7, forecasts are only made on every 7th step (once in a week in case of daily
+            resolution).
+
+        Returns boolean mask where prediction origin indexes to be included are True, and the rest False.
         """
+        # !! IMPORTANT
+        # TODO: Adjust top level documentation to specify that the filter is applied to prediction ORIGIN, not targets start.
+        # !! IMPORTANT
 
-    def create_target_start_end_mask(self, df_length, max_lags, n_forecasts):
-        """Creates a boolean mask for valid targets based on limiting input lags and forecast targets."""
-        start_pad = np.zeros(max_lags, dtype=bool)
-        valid_targets = np.ones(df_length - max_lags - n_forecasts + 1, dtype=bool)
-        end_pad = np.zeros(n_forecasts - 1, dtype=bool)
-        target_start_end_mask = np.concatenate((start_pad, valid_targets, end_pad), axis=None)
-        return target_start_end_mask
+        mask = np.ones((len(df),), dtype=bool)
+
+        # Basic case: no filter
+        if prediction_frequency is None or prediction_frequency == 1:
+            return mask
+
+        # originally: timestamps = pd.to_datetime([x["timestamps"][0] for x in df])
+        timestamps = df["timestamps"].apply(lambda x: pd.to_datetime(x[0]))
+        filter_masks = []
+        for key, value in prediction_frequency.items():
+            if key == "daily-hour":
+                mask = timestamps.hour == value
+            elif key == "weekly-day":
+                mask = timestamps.dayofweek == value
+            elif key == "monthly-day":
+                mask = timestamps.day == value
+            elif key == "yearly-month":
+                mask = timestamps.month == value
+            elif key == "hourly-minute":
+                mask = timestamps.minute == value
+            else:
+                raise ValueError(f"Invalid prediction frequency: {key}")
+            filter_masks.append(mask)
+        for m in filter_masks:
+            mask = np.logical_and(mask, m)
+        return mask
 
     def create_nan_mask(self, df, predict_steps, drop_missing):
-        """Checks if inputs/targets contain any NaN values and drops them, if user opts to.
+        """Creates mask for each prediction origin,
+        accounting for corresponding input lags / forecast targets containing any NaN values.
+
         Parameters
         ----------
             drop_missing : bool
@@ -206,7 +245,14 @@ def create_nan_mask(self, df, predict_steps, drop_missing):
         # TODO implement actual filtering
         return np.ones(len(df), dtype=bool)
 
-        # TODO: rewrite to return mask instead of filtering df.
+        # Create index mapping of sample index to df index
+        # - Filter missing samples (does not actually drop, but creates indexmapping)
+        # -- drop nan analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
+        # Note: needs to also account for NANs in lagged inputs or in n_forecasts, not just first target.
+        # Implement a convolutional filter for targets and each lagged regressor.
+        # Also account for future regressors and events.
+
+        # Rewrite to return mask instead of filtering df:
         nan_idx = []
         # NaNs in inputs
         for key, data in self.inputs.items():
@@ -245,25 +291,6 @@ def create_nan_mask(self, df, predict_steps, drop_missing):
                 "Please either adjust imputation parameters, or set 'drop_missing' to True to drop those samples."
             )
 
-    @staticmethod
-    def _split_nested_dict(inputs):
-        """Split nested dict into list of dicts.
-        Parameters
-        ----------
-            inputs : ordered dict
-                Nested dict to be split.
-        Returns
-        -------
-            list of dicts
-                List of dicts with same keys as inputs.
-        """
-
-        def split_dict(inputs, index):
-            return {k: v[index] if not isinstance(v, dict) else split_dict(v, index) for k, v in inputs.items()}
-
-        length = next(iter(inputs.values())).shape[0]
-        return [split_dict(inputs, i) for i in range(length)]
-
     def format_sample(self, inputs, targets=None):
         """Convert tabularizes sample to correct formats.
         Parameters
@@ -309,60 +336,29 @@ def format_sample(self, inputs, targets=None):
                     sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
         sample_input = self._split_nested_dict(sample_input)
 
+        # TODO Can this be skipped for a single sample?
+        # TODO Can this be optimized?
+        # Split nested dict into list of dicts with same keys as sample_input.
+        def split_dict(sample_input, index):
+            return {k: v[index] if not isinstance(v, dict) else split_dict(v, index) for k, v in sample_input.items()}
+
+        length = next(iter(sample_input.values())).shape[0]
+        sample_input = [split_dict(sample_input, i) for i in range(length)]
+
         ## Not sure if this needs be done here anymore?
         # Exact timestamps are not needed anymore
         sample_input.pop("timestamps")
 
         return sample_input, sample_target
 
-    def create_prediction_frequency_filter_mask(
-        self,
-        prediction_frequency=None,
-    ):
-        """Filters prediction target index from df based on the forecast frequency setting.
-        Parameters
-        ----------
-            prediction_frequency : int
-                periodic interval in which forecasts should be made.
-            Note
-            ----
-            E.g. if prediction_frequency=7, forecasts are only made on every 7th step (once in a week in case of daily
-            resolution).
-
-        Returns boolean mask where prediction target start indexes to be included are True, and the rest False.
-        """
-        if prediction_frequency is None or prediction_frequency == 1:
-            return
-        # Only the first target timestamp is of interest for filtering
-        timestamps = pd.to_datetime([x["timestamps"][0] for x in self.df])  # This may need adjusting
-        masks = []
-        for key, value in prediction_frequency.items():
-            if key == "daily-hour":
-                mask = timestamps.hour == value
-            elif key == "weekly-day":
-                mask = timestamps.dayofweek == value
-            elif key == "monthly-day":
-                mask = timestamps.day == value
-            elif key == "yearly-month":
-                mask = timestamps.month == value
-            elif key == "hourly-minute":
-                mask = timestamps.minute == value
-            else:
-                raise ValueError(f"Invalid prediction frequency: {key}")
-            masks.append(mask)
-        mask = np.ones((len(timestamps),), dtype=bool)
-        for m in masks:
-            mask = mask & m
-        return mask
-
 
 def tabularize_univariate_datetime_single_index(
-    df,
-    target_index,
-    predict_mode=False,
-    n_lags=0,
-    n_forecasts=1,
-    predict_steps=1,
+    df: pd.DataFrame,
+    target_index: int,
+    predict_mode: bool = False,
+    n_lags: int = 0,
+    n_forecasts: int = 1,
+    predict_steps: int = 1,
     config_seasonality: Optional[configure.ConfigSeasonality] = None,
     config_events: Optional[configure.ConfigEvents] = None,
     config_country_holidays=None,
@@ -372,15 +368,15 @@ def tabularize_univariate_datetime_single_index(
     config_train=None,
     prediction_frequency=None,
 ):
-    """Create a tabular dataset from univariate timeseries for supervised forecasting.
+    """Create a tabular data sample from timeseries dataframe, used for mini-batch creation.
     Note
     ----
-    Data must have no gaps.
-    If data contains missing values, they are ignored for the creation of the dataset.
-    Parameters
+    Data must have no gaps for sample extracted at given index position.
     ----------
         df : pd.DataFrame
             Sequence of observations with original ``ds``, ``y`` and normalized ``t``, ``y_scaled`` columns
+        target_index: int:
+            dataframe index position of first prediction target.
         config_seasonality : configure.ConfigSeasonality
             Configuration for seasonalities
         n_lags : int
@@ -422,23 +418,22 @@ def tabularize_univariate_datetime_single_index(
             Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
     """
     max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
-    # n_samples = len(df) - max_lags + 1 - n_forecasts
+    n_samples = 1
+
+    # previous workaround
+    # learning_rate = config_train.learning_rate
+    # if (
+    #     predict_mode
+    #     or (learning_rate is None)
+    #     or config_lagged_regressors
+    #     or config_country_holidays
+    #     or config_events
+    #     or prediction_frequency
+    # ):
+    #     n_samples = len(df) - max_lags + 1 - n_forecasts
 
     # TODO convert to single sample version
 
-    learning_rate = config_train.learning_rate
-    if (
-        predict_mode
-        or (learning_rate is None)
-        or config_lagged_regressors
-        or config_country_holidays
-        or config_events
-        or prediction_frequency
-    ):
-        n_samples = len(df) - max_lags + 1 - n_forecasts
-    else:
-        n_samples = 1
-
     # data is stored in OrderedDict
     inputs = OrderedDict({})
 
@@ -476,13 +471,27 @@ def _stride_timestamps_for_forecasts(x):
         return np.array([x[i + max_lags : i + max_lags + n_forecasts] for i in range(n_samples)], dtype=dtype)
 
     # time is the time at each forecast step
-    t = df.loc[:, "t"].values
     if max_lags == 0:
         assert n_forecasts == 1
-        time = np.expand_dims(t, 1)
+        time = np.expand_dims(df.loc[target_index, "t"].values, 1)
     else:
-        time = _stride_time_features_for_forecasts(t)
-    inputs["time"] = time  # contains n_lags + n_forecasts
+        ## time = _stride_time_features_for_forecasts(df.loc[:, "t"].values)
+        x = df.loc[:, "t"].values
+        window_size = n_lags + n_forecasts
+
+        if x.ndim == 1:
+            shape = (n_samples, window_size)
+        else:
+            shape = (n_samples, window_size) + x.shape[1:]
+
+        stride = x.strides[0]
+        strides = (stride, stride) + x.strides[1:]
+        start_index = max_lags - n_lags
+        time = np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
+        t = df.loc[:, "t"].values
+        # extract timestamps of n_lags steps before target_index and n_forecasts steps starting at target_index
+        time = t[target_index - n_lags : target_index + n_forecasts]
+    inputs["time"] = time
 
     if prediction_frequency is not None:
         ds = df.loc[:, "ds"].values

From 23d6100c18bed7483484a14ec99fede80e2b0335 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 18 Jan 2024 17:16:09 -0800
Subject: [PATCH 027/128] tabularize: converted time and lags to single sample
 extraction

---
 neuralprophet/time_dataset.py | 106 +++++++++++++++-------------------
 1 file changed, 46 insertions(+), 60 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 9abc38a4a..71123cdf0 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -112,7 +112,7 @@ def __getitem__(self, index):
         df_index = self.sample_index_to_df_index(index)
 
         # Tabularize - extract features from dataframe at given target index position
-        inputs, target = tabularize_univariate_datetime_single_index(self.df, target_index=df_index, **self.config_args)
+        inputs, target = tabularize_univariate_datetime_single_index(self.df, origin_index=df_index, **self.config_args)
         sample, target = self.format_sample(inputs, target)
         return sample, target, self.meta
 
@@ -210,8 +210,12 @@ def create_prediction_frequency_filter_mask(
         if prediction_frequency is None or prediction_frequency == 1:
             return mask
 
-        # originally: timestamps = pd.to_datetime([x["timestamps"][0] for x in df])
-        timestamps = df["timestamps"].apply(lambda x: pd.to_datetime(x[0]))
+        # OLD: timestamps were created from "ds" column in tabularization and then re-converted here
+        # timestamps = pd.to_datetime([x["timestamps"][0] for x in df])
+        # OR
+        # timestamps = df["timestamps"].apply(lambda x: pd.to_datetime(x[0]))
+
+        timestamps = pd.to_datetime(df.loc[:, "ds"].values)
         filter_masks = []
         for key, value in prediction_frequency.items():
             if key == "daily-hour":
@@ -303,7 +307,7 @@ def format_sample(self, inputs, targets=None):
         sample_input = OrderedDict({})
         inputs_dtype = {
             "time": torch.float,
-            "timestamps": np.datetime64,
+            # "timestamps": np.datetime64,
             "seasonalities": torch.float,
             "events": torch.float,
             "lags": torch.float,
@@ -330,10 +334,9 @@ def format_sample(self, inputs, targets=None):
                     else:
                         sample_input[key][name] = tensor
             else:
-                if key == "timestamps":
-                    sample_input[key] = data
-                else:
-                    sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
+                # if key == "timestamps": sample_input[key] = data
+                # else: sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
+                sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
         sample_input = self._split_nested_dict(sample_input)
 
         # TODO Can this be skipped for a single sample?
@@ -345,16 +348,15 @@ def split_dict(sample_input, index):
         length = next(iter(sample_input.values())).shape[0]
         sample_input = [split_dict(sample_input, i) for i in range(length)]
 
-        ## Not sure if this needs be done here anymore?
-        # Exact timestamps are not needed anymore
-        sample_input.pop("timestamps")
+        ## timestamps should no longer be present here?
+        # sample_input.pop("timestamps") # Exact timestamps are not needed anymore
 
         return sample_input, sample_target
 
 
 def tabularize_univariate_datetime_single_index(
     df: pd.DataFrame,
-    target_index: int,
+    origin_index: int,
     predict_mode: bool = False,
     n_lags: int = 0,
     n_forecasts: int = 1,
@@ -375,8 +377,8 @@ def tabularize_univariate_datetime_single_index(
     ----------
         df : pd.DataFrame
             Sequence of observations with original ``ds``, ``y`` and normalized ``t``, ``y_scaled`` columns
-        target_index: int:
-            dataframe index position of first prediction target.
+        origin_index: int:
+            dataframe index position of last observed lag before forecast starts.
         config_seasonality : configure.ConfigSeasonality
             Configuration for seasonalities
         n_lags : int
@@ -420,7 +422,7 @@ def tabularize_univariate_datetime_single_index(
     max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
     n_samples = 1
 
-    # previous workaround
+    # OLD: previous workaround
     # learning_rate = config_train.learning_rate
     # if (
     #     predict_mode
@@ -432,11 +434,37 @@ def tabularize_univariate_datetime_single_index(
     # ):
     #     n_samples = len(df) - max_lags + 1 - n_forecasts
 
-    # TODO convert to single sample version
-
     # data is stored in OrderedDict
     inputs = OrderedDict({})
 
+    # time is the time at each sample's lags and forecasts
+    if max_lags == 0:
+        assert n_forecasts == 1
+        # OLD: time = np.expand_dims(df.loc[origin_index, "t"].values, 1)
+        inputs["time"] = df.loc[origin_index, "t"].values
+    else:
+        # extract time value of n_lags steps before origin_index and n_forecasts steps starting at origin_index
+        ## OLD: inputs["time"] = _stride_time_features_for_forecasts(df.loc[:, "t"].values)
+        inputs["time"] = df[origin_index - n_lags : origin_index + n_forecasts, "t"].values
+
+    if n_lags >= 1 and "y" in df.columns:
+        # OLD
+        # def _stride_lagged_features(df_col_name, feature_dims):
+        #     # only for case where max_lags > 0
+        #     assert feature_dims >= 1
+        #     series = df.loc[:, df_col_name].values
+        #     # Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test
+        #     return np.array(
+        #         [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
+        #     )
+        # inputs["lags"] = _stride_lagged_features(df_col_name="y_scaled", feature_dims=n_lags)
+
+        # Extract n_lags steps up to and including origin_index
+        # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32)
+        inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values
+
+    # ----------- TODO convert to single sample version ----------------------
+
     def _stride_time_features_for_forecasts(x):
         window_size = n_lags + n_forecasts
 
@@ -462,45 +490,6 @@ def _stride_lagged_features(df_col_name, feature_dims):
             [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
         )
 
-    def _stride_timestamps_for_forecasts(x):
-        # only for case where n_lags > 0
-        if x.dtype != np.float64:
-            dtype = np.datetime64
-        else:
-            dtype = np.float64
-        return np.array([x[i + max_lags : i + max_lags + n_forecasts] for i in range(n_samples)], dtype=dtype)
-
-    # time is the time at each forecast step
-    if max_lags == 0:
-        assert n_forecasts == 1
-        time = np.expand_dims(df.loc[target_index, "t"].values, 1)
-    else:
-        ## time = _stride_time_features_for_forecasts(df.loc[:, "t"].values)
-        x = df.loc[:, "t"].values
-        window_size = n_lags + n_forecasts
-
-        if x.ndim == 1:
-            shape = (n_samples, window_size)
-        else:
-            shape = (n_samples, window_size) + x.shape[1:]
-
-        stride = x.strides[0]
-        strides = (stride, stride) + x.strides[1:]
-        start_index = max_lags - n_lags
-        time = np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
-        t = df.loc[:, "t"].values
-        # extract timestamps of n_lags steps before target_index and n_forecasts steps starting at target_index
-        time = t[target_index - n_lags : target_index + n_forecasts]
-    inputs["time"] = time
-
-    if prediction_frequency is not None:
-        ds = df.loc[:, "ds"].values
-        if max_lags == 0:  # is it rather n_lags?
-            timestamps = np.expand_dims(ds, 1)
-        else:
-            timestamps = _stride_timestamps_for_forecasts(ds)
-        inputs["timestamps"] = timestamps
-
     if config_seasonality is not None:
         seasonalities = seasonal_features_from_dates(df, config_seasonality)
         for name, features in seasonalities.items():
@@ -511,9 +500,6 @@ def _stride_timestamps_for_forecasts(x):
                 seasonalities[name] = _stride_time_features_for_forecasts(features)
         inputs["seasonalities"] = seasonalities
 
-    if n_lags > 0 and "y" in df.columns:
-        inputs["lags"] = _stride_lagged_features(df_col_name="y_scaled", feature_dims=n_lags)
-
     if config_lagged_regressors is not None and max_lags > 0:
         covariates = OrderedDict({})
         for covar in df.columns:
@@ -615,7 +601,7 @@ def fourier_series(dates, period, series_order):
     Parameters
     ----------
         dates : pd.Series
-            Containing timestamps
+            Containing time stamps
         period : float
             Number of days of the period
         series_order : int

From 49af45be1d7190971ee33fd46cd4689efa164eba Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 23 Jan 2024 15:01:53 -0800
Subject: [PATCH 028/128] convert lagged regressors

---
 neuralprophet/time_dataset.py | 55 +++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 71123cdf0..d2e3b49ee 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -437,7 +437,7 @@ def tabularize_univariate_datetime_single_index(
     # data is stored in OrderedDict
     inputs = OrderedDict({})
 
-    # time is the time at each sample's lags and forecasts
+    # TIME: the time at each sample's lags and forecasts
     if max_lags == 0:
         assert n_forecasts == 1
         # OLD: time = np.expand_dims(df.loc[origin_index, "t"].values, 1)
@@ -447,7 +447,11 @@ def tabularize_univariate_datetime_single_index(
         ## OLD: inputs["time"] = _stride_time_features_for_forecasts(df.loc[:, "t"].values)
         inputs["time"] = df[origin_index - n_lags : origin_index + n_forecasts, "t"].values
 
+    # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
     if n_lags >= 1 and "y" in df.columns:
+        # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32)
+        inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values
+
         # OLD
         # def _stride_lagged_features(df_col_name, feature_dims):
         #     # only for case where max_lags > 0
@@ -459,9 +463,34 @@ def tabularize_univariate_datetime_single_index(
         #     )
         # inputs["lags"] = _stride_lagged_features(df_col_name="y_scaled", feature_dims=n_lags)
 
-        # Extract n_lags steps up to and including origin_index
-        # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32)
-        inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values
+    # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
+    if config_lagged_regressors is not None and max_lags > 0:
+        lagged_regressors = OrderedDict({})
+        # TODO: optimize this computation for many lagged_regressors
+        for lagged_reg in df.columns:
+            if lagged_reg in config_lagged_regressors:
+                assert config_lagged_regressors[lagged_reg].n_lags > 0
+                covar_lags = config_lagged_regressors[lagged_reg].n_lags
+                lagged_regressors[lagged_reg] = df.loc[
+                    origin_index - covar_lags + 1 : origin_index + 1, lagged_reg
+                ].values
+        inputs["covariates"] = lagged_regressors
+
+        # OLD
+        # def _stride_lagged_features(df_col_name, feature_dims):
+        #     # only for case where max_lags > 0
+        #     assert feature_dims >= 1
+        #     series = df.loc[:, df_col_name].values
+        #     # Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test
+        #     return np.array(
+        #         [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
+        #     )
+        # for covar in df.columns:
+        #     if covar in config_lagged_regressors:
+        #         assert config_lagged_regressors[covar].n_lags > 0
+        #         window = config_lagged_regressors[covar].n_lags
+        #         covariates[covar] = _stride_lagged_features(df_col_name=covar, feature_dims=window)
+        # inputs["covariates"] = covariates
 
     # ----------- TODO convert to single sample version ----------------------
 
@@ -481,15 +510,6 @@ def _stride_time_features_for_forecasts(x):
     def _stride_future_time_features_for_forecasts(x):
         return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
 
-    def _stride_lagged_features(df_col_name, feature_dims):
-        # only for case where max_lags > 0
-        assert feature_dims >= 1
-        series = df.loc[:, df_col_name].values
-        # Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test
-        return np.array(
-            [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
-        )
-
     if config_seasonality is not None:
         seasonalities = seasonal_features_from_dates(df, config_seasonality)
         for name, features in seasonalities.items():
@@ -500,15 +520,6 @@ def _stride_lagged_features(df_col_name, feature_dims):
                 seasonalities[name] = _stride_time_features_for_forecasts(features)
         inputs["seasonalities"] = seasonalities
 
-    if config_lagged_regressors is not None and max_lags > 0:
-        covariates = OrderedDict({})
-        for covar in df.columns:
-            if covar in config_lagged_regressors:
-                assert config_lagged_regressors[covar].n_lags > 0
-                window = config_lagged_regressors[covar].n_lags
-                covariates[covar] = _stride_lagged_features(df_col_name=covar, feature_dims=window)
-        inputs["covariates"] = covariates
-
     # get the regressors features
     if config_regressors is not None:
         additive_regressors, multiplicative_regressors = make_regressors_features(df, config_regressors)

From a35a1b83a35f79063d90497563ab33cbeec688f3 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 23 Jan 2024 15:26:40 -0800
Subject: [PATCH 029/128] consolidate seasonality computation in one script

---
 neuralprophet/time_dataset.py | 231 +++++++++++++++++++++++++++-------
 1 file changed, 187 insertions(+), 44 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index d2e3b49ee..27fdb9190 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -466,7 +466,7 @@ def tabularize_univariate_datetime_single_index(
     # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
     if config_lagged_regressors is not None and max_lags > 0:
         lagged_regressors = OrderedDict({})
-        # TODO: optimize this computation for many lagged_regressors
+        # Future TODO: optimize this computation for many lagged_regressors
         for lagged_reg in df.columns:
             if lagged_reg in config_lagged_regressors:
                 assert config_lagged_regressors[lagged_reg].n_lags > 0
@@ -492,6 +492,159 @@ def tabularize_univariate_datetime_single_index(
         #         covariates[covar] = _stride_lagged_features(df_col_name=covar, feature_dims=window)
         # inputs["covariates"] = covariates
 
+    # SEASONALITIES
+    if config_seasonality is not None:
+        dates = df["ds"]
+        assert len(dates.shape) == 1
+        seasonalities = OrderedDict({})
+        # Seasonality features
+        for name, period in config_seasonality.periods.items():
+            if period.resolution > 0:
+                if config_seasonality.computation == "fourier":
+                    # convert to days since epoch
+                    t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
+                    # Provides Fourier series components with the specified frequency and order.
+                    # features: Matrix with dims (2*resolution, length len(dates))
+                    features = np.column_stack(
+                        [
+                            fun((2.0 * (i + 1) * np.pi * t / period.period))
+                            for i in range(period.resolution)
+                            for fun in (np.sin, np.cos)
+                        ]
+                    )
+
+                else:
+                    raise NotImplementedError
+                if period.condition_name is not None:
+                    # multiply seasonality features with condition mask/values
+                    features = features * df[period.condition_name].values[:, np.newaxis]
+                seasonalities[name] = features
+        for name, features in seasonalities.items():
+            if max_lags == 0:
+                seasonalities[name] = np.expand_dims(features, axis=1)
+            else:
+
+                def _stride_time_features_for_seasonality(x):
+                    window_size = n_lags + n_forecasts
+
+                    if x.ndim == 1:
+                        shape = (n_samples, window_size)
+                    else:
+                        shape = (n_samples, window_size) + x.shape[1:]
+
+                    stride = x.strides[0]
+                    strides = (stride, stride) + x.strides[1:]
+                    start_index = max_lags - n_lags
+                    return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
+
+                # stride into num_forecast at dim=1 for each sample, just like we did with time
+                seasonalities[name] = _stride_time_features_for_seasonality(features)
+        inputs["seasonalities"] = seasonalities
+
+        ## OLD
+        # def fourier_series_t(t, period, series_order):
+        #     """Provides Fourier series components with the specified frequency and order.
+        #     Note
+        #     ----
+        #     This function is identical to Meta AI's Prophet Library
+        #     Parameters
+        #     ----------
+        #         t : pd.Series, float
+        #             Containing time as floating point number of days
+        #         period : float
+        #             Number of days of the period
+        #         series_order : int
+        #             Number of fourier components
+        #     Returns
+        #     -------
+        #         np.array
+        #             Matrix with seasonality features
+        #     """
+        #     features = np.column_stack(
+        #         [fun((2.0 * (i + 1) * np.pi * t / period)) for i in range(series_order) for fun in (np.sin, np.cos)]
+        #     )
+        #     return features
+
+        # def fourier_series(dates, period, series_order):
+        #     """Provides Fourier series components with the specified frequency and order.
+        #     Note
+        #     ----
+        #     Identical to OG Prophet.
+        #     Parameters
+        #     ----------
+        #         dates : pd.Series
+        #             Containing time stamps
+        #         period : float
+        #             Number of days of the period
+        #         series_order : int
+        #             Number of fourier components
+        #     Returns
+        #     -------
+        #         np.array
+        #             Matrix with seasonality features
+        #     """
+        #     # convert to days since epoch
+        #     t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
+        #     return fourier_series_t(t, period, series_order)
+
+        # def seasonal_features_from_dates(df, config_seasonality: configure.ConfigSeasonality):
+        #     """Dataframe with seasonality features.
+        #     Includes seasonality features
+        #     Parameters
+        #     ----------
+        #         df : pd.DataFrame
+        #             Dataframe with all values
+        #         config_seasonality : configure.ConfigSeasonality
+        #             Configuration for seasonalities
+        #     Returns
+        #     -------
+        #         OrderedDict
+        #             Dictionary with keys for each period name containing an np.array
+        #             with the respective regression features. each with dims: (len(dates), 2*fourier_order)
+        #     """
+        #     dates = df["ds"]
+        #     assert len(dates.shape) == 1
+        #     seasonalities = OrderedDict({})
+        #     # Seasonality features
+        #     for name, period in config_seasonality.periods.items():
+        #         if period.resolution > 0:
+        #             if config_seasonality.computation == "fourier":
+        #                 # features: Matrix with dims (2*resolution, length len(dates))
+        #                 features = fourier_series(
+        #                     dates=dates,
+        #                     period=period.period,
+        #                     series_order=period.resolution,
+        #                 )
+        #             else:
+        #                 raise NotImplementedError
+        #             if period.condition_name is not None
+        #             # multiply seasonality features with condition mask/values:
+        #                 features = features * df[period.condition_name].values[:, np.newaxis]
+        #             seasonalities[name] = features
+        #     return seasonalities
+
+        # def _stride_time_features_for_seasonality(x):
+        #     window_size = n_lags + n_forecasts
+
+        #     if x.ndim == 1:
+        #         shape = (n_samples, window_size)
+        #     else:
+        #         shape = (n_samples, window_size) + x.shape[1:]
+
+        #     stride = x.strides[0]
+        #     strides = (stride, stride) + x.strides[1:]
+        #     start_index = max_lags - n_lags
+        #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
+
+        # seasonalities = seasonal_features_from_dates(df, config_seasonality)
+        # for name, features in seasonalities.items():
+        #     if max_lags == 0:
+        #         seasonalities[name] = np.expand_dims(features, axis=1)
+        #     else:
+        #         # stride into num_forecast at dim=1 for each sample, just like we did with time
+        #         seasonalities[name] = _stride_time_features_for_seasonality(features)
+        # inputs["seasonalities"] = seasonalities
+
     # ----------- TODO convert to single sample version ----------------------
 
     def _stride_time_features_for_forecasts(x):
@@ -510,16 +663,6 @@ def _stride_time_features_for_forecasts(x):
     def _stride_future_time_features_for_forecasts(x):
         return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
 
-    if config_seasonality is not None:
-        seasonalities = seasonal_features_from_dates(df, config_seasonality)
-        for name, features in seasonalities.items():
-            if max_lags == 0:
-                seasonalities[name] = np.expand_dims(features, axis=1)
-            else:
-                # stride into num_forecast at dim=1 for each sample, just like we did with time
-                seasonalities[name] = _stride_time_features_for_forecasts(features)
-        inputs["seasonalities"] = seasonalities
-
     # get the regressors features
     if config_regressors is not None:
         additive_regressors, multiplicative_regressors = make_regressors_features(df, config_regressors)
@@ -808,36 +951,36 @@ def make_regressors_features(df, config_regressors):
     return additive_regressors, multiplicative_regressors
 
 
-def seasonal_features_from_dates(df, config_seasonality: configure.ConfigSeasonality):
-    """Dataframe with seasonality features.
-    Includes seasonality features, holiday features, and added regressors.
-    Parameters
-    ----------
-        df : pd.DataFrame
-            Dataframe with all values
-        config_seasonality : configure.ConfigSeasonality
-            Configuration for seasonalities
-    Returns
-    -------
-        OrderedDict
-            Dictionary with keys for each period name containing an np.array
-            with the respective regression features. each with dims: (len(dates), 2*fourier_order)
-    """
-    dates = df["ds"]
-    assert len(dates.shape) == 1
-    seasonalities = OrderedDict({})
-    # Seasonality features
-    for name, period in config_seasonality.periods.items():
-        if period.resolution > 0:
-            if config_seasonality.computation == "fourier":
-                features = fourier_series(
-                    dates=dates,
-                    period=period.period,
-                    series_order=period.resolution,
-                )
-            else:
-                raise NotImplementedError
-            if period.condition_name is not None:
-                features = features * df[period.condition_name].values[:, np.newaxis]
-            seasonalities[name] = features
-    return seasonalities
+# def seasonal_features_from_dates(df, config_seasonality: configure.ConfigSeasonality):
+#     """Dataframe with seasonality features.
+#     Includes seasonality features
+#     Parameters
+#     ----------
+#         df : pd.DataFrame
+#             Dataframe with all values
+#         config_seasonality : configure.ConfigSeasonality
+#             Configuration for seasonalities
+#     Returns
+#     -------
+#         OrderedDict
+#             Dictionary with keys for each period name containing an np.array
+#             with the respective regression features. each with dims: (len(dates), 2*fourier_order)
+#     """
+#     dates = df["ds"]
+#     assert len(dates.shape) == 1
+#     seasonalities = OrderedDict({})
+#     # Seasonality features
+#     for name, period in config_seasonality.periods.items():
+#         if period.resolution > 0:
+#             if config_seasonality.computation == "fourier":
+#                 features = fourier_series(
+#                     dates=dates,
+#                     period=period.period,
+#                     series_order=period.resolution,
+#                 )
+#             else:
+#                 raise NotImplementedError
+#             if period.condition_name is not None:
+#                 features = features * df[period.condition_name].values[:, np.newaxis]
+#             seasonalities[name] = features
+#     return seasonalities

From c1c9b1bda653b22f93fdbd6fa78b554a08d7e569 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 23 Jan 2024 16:18:23 -0800
Subject: [PATCH 030/128] finish Seasonlity conversion

---
 neuralprophet/time_dataset.py | 86 +++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 40 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 27fdb9190..8a626217b 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -440,19 +440,32 @@ def tabularize_univariate_datetime_single_index(
     # TIME: the time at each sample's lags and forecasts
     if max_lags == 0:
         assert n_forecasts == 1
-        # OLD: time = np.expand_dims(df.loc[origin_index, "t"].values, 1)
         inputs["time"] = df.loc[origin_index, "t"].values
+        # TODO: Possibly need extra dim?
+        # inputs["time"] = np.expand_dims(inputs["time"], 1)
     else:
-        # extract time value of n_lags steps before origin_index and n_forecasts steps starting at origin_index
-        ## OLD: inputs["time"] = _stride_time_features_for_forecasts(df.loc[:, "t"].values)
-        inputs["time"] = df[origin_index - n_lags : origin_index + n_forecasts, "t"].values
+        # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
+        inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "t"].values
+        ## OLD: Time
+        # def _stride_time_features_for_forecasts(x):
+        #     window_size = n_lags + n_forecasts
+
+        #     if x.ndim == 1:
+        #         shape = (n_samples, window_size)
+        #     else:
+        #         shape = (n_samples, window_size) + x.shape[1:]
+
+        #     stride = x.strides[0]
+        #     strides = (stride, stride) + x.strides[1:]
+        #     start_index = max_lags - n_lags
+        #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
+        # inputs["time"] = _stride_time_features_for_forecasts(df.loc[:, "t"].values)
 
     # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
     if n_lags >= 1 and "y" in df.columns:
         # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32)
         inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values
-
-        # OLD
+        # OLD Lags
         # def _stride_lagged_features(df_col_name, feature_dims):
         #     # only for case where max_lags > 0
         #     assert feature_dims >= 1
@@ -475,8 +488,7 @@ def tabularize_univariate_datetime_single_index(
                     origin_index - covar_lags + 1 : origin_index + 1, lagged_reg
                 ].values
         inputs["covariates"] = lagged_regressors
-
-        # OLD
+        # OLD Covariates
         # def _stride_lagged_features(df_col_name, feature_dims):
         #     # only for case where max_lags > 0
         #     assert feature_dims >= 1
@@ -494,54 +506,45 @@ def tabularize_univariate_datetime_single_index(
 
     # SEASONALITIES
     if config_seasonality is not None:
-        dates = df["ds"]
-        assert len(dates.shape) == 1
         seasonalities = OrderedDict({})
+        if max_lags == 0:
+            assert n_forecasts == 1
+            dates = df.loc[origin_index, "ds"]
+        else:
+            dates = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "ds"]
+        assert len(dates.shape) == 1
         # Seasonality features
         for name, period in config_seasonality.periods.items():
             if period.resolution > 0:
                 if config_seasonality.computation == "fourier":
+                    # Compute Fourier series components with the specified frequency and order.
                     # convert to days since epoch
                     t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
-                    # Provides Fourier series components with the specified frequency and order.
-                    # features: Matrix with dims (2*resolution, length len(dates))
+                    # features: Matrix with dims (length len(dates), 2*resolution)
                     features = np.column_stack(
-                        [
-                            fun((2.0 * (i + 1) * np.pi * t / period.period))
-                            for i in range(period.resolution)
-                            for fun in (np.sin, np.cos)
-                        ]
+                        [np.sin((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
+                        + [np.cos((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
                     )
-
+                    # Single nested loop version:
+                    # features = np.column_stack(
+                    #     [
+                    #         fun((2.0 * (i + 1) * np.pi * t / period.period))
+                    #         for i in range(period.resolution)
+                    #         for fun in (np.sin, np.cos)
+                    #     ]
+                    # )
                 else:
                     raise NotImplementedError
                 if period.condition_name is not None:
                     # multiply seasonality features with condition mask/values
                     features = features * df[period.condition_name].values[:, np.newaxis]
-                seasonalities[name] = features
-        for name, features in seasonalities.items():
-            if max_lags == 0:
-                seasonalities[name] = np.expand_dims(features, axis=1)
-            else:
-
-                def _stride_time_features_for_seasonality(x):
-                    window_size = n_lags + n_forecasts
-
-                    if x.ndim == 1:
-                        shape = (n_samples, window_size)
-                    else:
-                        shape = (n_samples, window_size) + x.shape[1:]
-
-                    stride = x.strides[0]
-                    strides = (stride, stride) + x.strides[1:]
-                    start_index = max_lags - n_lags
-                    return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
 
-                # stride into num_forecast at dim=1 for each sample, just like we did with time
-                seasonalities[name] = _stride_time_features_for_seasonality(features)
+                seasonalities[name] = features
+                # TODO: Possibly need extra dim?
+                # seasonalities[name] = np.expand_dims(seasonalities[name], 1)
         inputs["seasonalities"] = seasonalities
 
-        ## OLD
+        ## OLD Seasonality
         # def fourier_series_t(t, period, series_order):
         #     """Provides Fourier series components with the specified frequency and order.
         #     Note
@@ -609,7 +612,7 @@ def _stride_time_features_for_seasonality(x):
         #     for name, period in config_seasonality.periods.items():
         #         if period.resolution > 0:
         #             if config_seasonality.computation == "fourier":
-        #                 # features: Matrix with dims (2*resolution, length len(dates))
+        #                 # features: Matrix with dims (length len(dates), 2*resolution)
         #                 features = fourier_series(
         #                     dates=dates,
         #                     period=period.period,
@@ -646,6 +649,9 @@ def _stride_time_features_for_seasonality(x):
         # inputs["seasonalities"] = seasonalities
 
     # ----------- TODO convert to single sample version ----------------------
+    # TODO: Future Regressors
+    # TODO: Events
+    # TODO: Postprocessing
 
     def _stride_time_features_for_forecasts(x):
         window_size = n_lags + n_forecasts

From 8271a5ed051dce4056355dc191c9720c976a5118 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 23 Jan 2024 16:19:26 -0800
Subject: [PATCH 031/128] update todos

---
 neuralprophet/time_dataset.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 8a626217b..9a8f33657 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -649,6 +649,7 @@ def tabularize_univariate_datetime_single_index(
         # inputs["seasonalities"] = seasonalities
 
     # ----------- TODO convert to single sample version ----------------------
+    # TODO: Targets
     # TODO: Future Regressors
     # TODO: Events
     # TODO: Postprocessing

From 1aad054bae5f15af5d62d30c359d581e4f6657b6 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 24 Jan 2024 12:51:30 -0800
Subject: [PATCH 032/128] complete targets and future regressors

---
 neuralprophet/time_dataset.py | 254 ++++++++++++++++++++++------------
 1 file changed, 162 insertions(+), 92 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 9a8f33657..f814ec184 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -246,6 +246,7 @@ def create_nan_mask(self, df, predict_steps, drop_missing):
             predict_steps : int
                 number of steps to predict
         """
+        # IMPORTANT !!
         # TODO implement actual filtering
         return np.ones(len(df), dtype=bool)
 
@@ -296,7 +297,7 @@ def create_nan_mask(self, df, predict_steps, drop_missing):
             )
 
     def format_sample(self, inputs, targets=None):
-        """Convert tabularizes sample to correct formats.
+        """Convert tabularized sample to correct formats.
         Parameters
         ----------
             inputs : ordered dict
@@ -421,6 +422,8 @@ def tabularize_univariate_datetime_single_index(
     """
     max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
     n_samples = 1
+    if max_lags == 0:
+        assert n_forecasts == 1
 
     # OLD: previous workaround
     # learning_rate = config_train.learning_rate
@@ -434,12 +437,41 @@ def tabularize_univariate_datetime_single_index(
     # ):
     #     n_samples = len(df) - max_lags + 1 - n_forecasts
 
+    if predict_mode:
+        targets = np.zeros((1, n_forecasts))
+        ## OLD
+        # # time is the time at each forecast step
+        # t = df.loc[:, "t"].values
+        # if max_lags == 0:
+        #     time = np.expand_dims(t, 1)
+        # else:
+        #     time = _stride_time_features_for_forecasts(t)
+        # inputs["time"] = time  # contains n_lags + n_forecasts
+        # targets = np.empty_like(time[:, n_lags:])
+        # targets = np.nan_to_num(targets)
+    else:
+        targets = df.loc[origin_index + 1 : origin_index + 1 + n_forecasts, "y_scaled"].values
+        targets = np.expand_dims(targets, axis=1)
+        ## Alternative
+        # x = df["y_scaled"].values
+        # targets = np.array([x[origin_index + 1 : origin_index + 1 + n_forecasts]], dtype=x.dtype)
+        ## OLD
+        # # time is the time at each forecast step
+        # t = df.loc[:, "t"].values
+        # if max_lags == 0:
+        #     time = np.expand_dims(t, 1)
+        # else:
+        #     time = _stride_time_features_for_forecasts(t)
+        # inputs["time"] = time  # contains n_lags + n_forecasts
+        # def _stride_future_time_features_for_forecasts(x):
+        # return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
+        # targets = _stride_future_time_features_for_forecasts(df["y_scaled"].values)
+
     # data is stored in OrderedDict
     inputs = OrderedDict({})
 
     # TIME: the time at each sample's lags and forecasts
     if max_lags == 0:
-        assert n_forecasts == 1
         inputs["time"] = df.loc[origin_index, "t"].values
         # TODO: Possibly need extra dim?
         # inputs["time"] = np.expand_dims(inputs["time"], 1)
@@ -508,7 +540,6 @@ def tabularize_univariate_datetime_single_index(
     if config_seasonality is not None:
         seasonalities = OrderedDict({})
         if max_lags == 0:
-            assert n_forecasts == 1
             dates = df.loc[origin_index, "ds"]
         else:
             dates = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "ds"]
@@ -648,58 +679,103 @@ def tabularize_univariate_datetime_single_index(
         #         seasonalities[name] = _stride_time_features_for_seasonality(features)
         # inputs["seasonalities"] = seasonalities
 
-    # ----------- TODO convert to single sample version ----------------------
-    # TODO: Targets
-    # TODO: Future Regressors
-    # TODO: Events
-    # TODO: Postprocessing
-
-    def _stride_time_features_for_forecasts(x):
-        window_size = n_lags + n_forecasts
-
-        if x.ndim == 1:
-            shape = (n_samples, window_size)
-        else:
-            shape = (n_samples, window_size) + x.shape[1:]
-
-        stride = x.strides[0]
-        strides = (stride, stride) + x.strides[1:]
-        start_index = max_lags - n_lags
-        return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
-
-    def _stride_future_time_features_for_forecasts(x):
-        return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
-
-    # get the regressors features
+    # FUTURE REGRESSORS: get the future regressors features
     if config_regressors is not None:
-        additive_regressors, multiplicative_regressors = make_regressors_features(df, config_regressors)
+        # sort and divide regressors into multiplicative and additive
+        additive_regressors_names = []
+        multiplicative_regressors_names = []
+        for reg in sorted(df.columns.tolist()):
+            if reg in config_regressors:
+                mode = config_regressors[reg].mode
+                if mode == "additive":
+                    additive_regressors_names.append(reg)
+                else:
+                    multiplicative_regressors_names.append(reg)
 
+        # create numpy array of values of additive and multiplicative regressors, at correct indexes
+        # features dims: (n_samples/batch, n_forecasts, n_features/n_regressors)
         regressors = OrderedDict({})
+        regressors["additive"] = None
+        regressors["multiplicative"] = None
         if max_lags == 0:
-            if additive_regressors is not None:
-                regressors["additive"] = np.expand_dims(additive_regressors, axis=1)
-            if multiplicative_regressors is not None:
-                regressors["multiplicative"] = np.expand_dims(multiplicative_regressors, axis=1)
+            if len(additive_regressors_names) > 0:
+                regressors["additive"] = np.expand_dims(df.loc[origin_index, additive_regressors_names].values, axis=0)
+            if len(multiplicative_regressors_names) > 0:
+                regressors["multiplicative"] = np.expand_dims(
+                    df.loc[origin_index, multiplicative_regressors_names].values, axis=0
+                )
         else:
-            if additive_regressors is not None:
-                additive_regressor_feature_windows = []
-                # additive_regressor_feature_windows_lagged = []
-                for i in range(0, additive_regressors.shape[1]):
-                    # stride into num_forecast at dim=1 for each sample, just like we did with time
-                    stride = _stride_time_features_for_forecasts(additive_regressors[:, i])
-                    additive_regressor_feature_windows.append(stride)
-                additive_regressors = np.dstack(additive_regressor_feature_windows)
-                regressors["additive"] = additive_regressors
-
-            if multiplicative_regressors is not None:
-                multiplicative_regressor_feature_windows = []
-                for i in range(0, multiplicative_regressors.shape[1]):
-                    stride = _stride_time_features_for_forecasts(multiplicative_regressors[:, i])
-                    multiplicative_regressor_feature_windows.append(stride)
-                multiplicative_regressors = np.dstack(multiplicative_regressor_feature_windows)
-                regressors["multiplicative"] = multiplicative_regressors
+            if len(additive_regressors_names) > 0:
+                regressors_add_future_window = df.loc[
+                    origin_index + 1 : origin_index + 1 + n_forecasts, additive_regressors_names
+                ].values
+                regressors["additive"] = np.expand_dims(regressors_add_future_window, axis=0)
+                ## OLD
+                # additive_regressor_feature_windows = []
+                # # additive_regressor_feature_windows_lagged = []
+                # for i in range(0, len(additive_regressors_names)):
+                #     # stride into num_forecast at dim=1 for each sample, just like we did with time
+                #     x = additive_regressors[:, i]
+                #     window_size = n_lags + n_forecasts
+
+                #     if x.ndim == 1:
+                #         shape = (n_samples, window_size)
+                #     else:
+                #         shape = (n_samples, window_size) + x.shape[1:]
+
+                #     stride = x.strides[0]
+                #     strides = (stride, stride) + x.strides[1:]
+                #     start_index = max_lags - n_lags
+                #     stride = np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
+                #     additive_regressor_feature_windows.append(stride)
+                # additive_regressors = np.dstack(additive_regressor_feature_windows)
+                # regressors["additive"] = additive_regressors
+            if len(multiplicative_regressors_names) > 0:
+                regressors_mul_future_window = df.loc[
+                    origin_index + 1 : origin_index + 1 + n_forecasts, multiplicative_regressors_names
+                ].values
+                regressors["multiplicative"] = np.expand_dims(regressors_mul_future_window, axis=0)
         inputs["regressors"] = regressors
 
+        ## OLD Future regressors
+        # additive_regressors, multiplicative_regressors = make_regressors_features(df, config_regressors)
+        # for max_lags == 0, see code before merge
+        # if max_lags > 0:
+        # def _stride_time_features_for_forecasts(x):additive_regressors
+        #     window_size = n_lags + n_forecasts
+
+        #     if x.ndim == 1:
+        #         shape = (n_samples, window_size)
+        #     else:
+        #         shape = (n_samples, window_size) + x.shape[1:]
+
+        #     stride = x.strides[0]
+        #     strides = (stride, stride) + x.strides[1:]
+        #     start_index = max_lags - n_lags
+        #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
+        # if additive_regressors is not None:
+        #     additive_regressor_feature_windows = []
+        #     # additive_regressor_feature_windows_lagged = []
+        #     for i in range(0, additive_regressors.shape[1]):
+        #         # stride into num_forecast at dim=1 for each sample, just like we did with time
+        #         stride = _stride_time_features_for_forecasts(additive_regressors[:, i])
+        #         additive_regressor_feature_windows.append(stride)
+        #     additive_regressors = np.dstack(additive_regressor_feature_windows)
+        #     regressors["additive"] = additive_regressors
+
+        # if multiplicative_regressors is not None:
+        #     multiplicative_regressor_feature_windows = []
+        #     for i in range(0, multiplicative_regressors.shape[1]):
+        #         stride = _stride_time_features_for_forecasts(multiplicative_regressors[:, i])
+        #         multiplicative_regressor_feature_windows.append(stride)
+        #     multiplicative_regressors = np.dstack(multiplicative_regressor_feature_windows)
+        #     regressors["multiplicative"] = multiplicative_regressors
+        # inputs["regressors"] = regressors
+
+    # ----------- TODO convert to single sample version ----------------------
+    # TODO: Events
+    # TODO: Postprocessing & Formatting
+
     # get the events features
     if config_events is not None or config_country_holidays is not None:
         additive_events, multiplicative_events = make_events_features(df, config_events, config_country_holidays)
@@ -731,12 +807,6 @@ def _stride_future_time_features_for_forecasts(x):
                 events["multiplicative"] = multiplicative_events
         inputs["events"] = events
 
-    if predict_mode:
-        targets = np.empty_like(time[:, n_lags:])
-        targets = np.nan_to_num(targets)
-    else:
-        targets = _stride_future_time_features_for_forecasts(df["y_scaled"].values)
-
     tabularized_input_shapes_str = ""
     for key, value in inputs.items():
         if key in [
@@ -918,44 +988,44 @@ def make_events_features(df, config_events: Optional[configure.ConfigEvents] = N
     return additive_events, multiplicative_events
 
 
-def make_regressors_features(df, config_regressors):
-    """Construct arrays of all scalar regressor features
-    Parameters
-    ----------
-        df : pd.DataFrame
-            Dataframe with all values including the user specified regressors
-        config_regressors : configure.ConfigFutureRegressors
-            User specified regressors config
-    Returns
-    -------
-        np.array
-            All additive regressor features
-        np.array
-            All multiplicative regressor features
-    """
-    additive_regressors = pd.DataFrame()
-    multiplicative_regressors = pd.DataFrame()
-
-    for reg in df.columns:
-        if reg in config_regressors:
-            mode = config_regressors[reg].mode
-            if mode == "additive":
-                additive_regressors[reg] = df[reg]
-            else:
-                multiplicative_regressors[reg] = df[reg]
-
-    if not additive_regressors.empty:
-        additive_regressors = additive_regressors[sorted(additive_regressors.columns.tolist())]
-        additive_regressors = additive_regressors.values
-    else:
-        additive_regressors = None
-    if not multiplicative_regressors.empty:
-        multiplicative_regressors = multiplicative_regressors[sorted(multiplicative_regressors.columns.tolist())]
-        multiplicative_regressors = multiplicative_regressors.values
-    else:
-        multiplicative_regressors = None
-
-    return additive_regressors, multiplicative_regressors
+# def make_regressors_features(df, config_regressors):
+#     """Construct arrays of all scalar regressor features
+#     Parameters
+#     ----------
+#         df : pd.DataFrame
+#             Dataframe with all values including the user specified regressors
+#         config_regressors : configure.ConfigFutureRegressors
+#             User specified regressors config
+#     Returns
+#     -------
+#         np.array
+#             All additive regressor features
+#         np.array
+#             All multiplicative regressor features
+#     """
+#     additive_regressors = pd.DataFrame()
+#     multiplicative_regressors = pd.DataFrame()
+
+#     for reg in df.columns:
+#         if reg in config_regressors:
+#             mode = config_regressors[reg].mode
+#             if mode == "additive":
+#                 additive_regressors[reg] = df[reg]
+#             else:
+#                 multiplicative_regressors[reg] = df[reg]
+
+#     if not additive_regressors.empty:
+#         additive_regressors = additive_regressors[sorted(additive_regressors.columns.tolist())]
+#         additive_regressors = additive_regressors.values
+#     else:
+#         additive_regressors = None
+#     if not multiplicative_regressors.empty:
+#         multiplicative_regressors = multiplicative_regressors[sorted(multiplicative_regressors.columns.tolist())]
+#         multiplicative_regressors = multiplicative_regressors.values
+#     else:
+#         multiplicative_regressors = None
+
+#     return additive_regressors, multiplicative_regressors
 
 
 # def seasonal_features_from_dates(df, config_seasonality: configure.ConfigSeasonality):

From a41138e5150687ed3fdf3341667d0527cee19c8e Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 24 Jan 2024 15:06:45 -0800
Subject: [PATCH 033/128] convert events

---
 neuralprophet/time_dataset.py | 1330 ++++++++++++++++++---------------
 1 file changed, 730 insertions(+), 600 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index f814ec184..5a82a56f6 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -65,16 +65,22 @@ def __init__(self, df, name, **kwargs):
         # ->_create_dataset calls prep_or_copy_df, then returns GlobalTimeDataset
         # Future TODO: integrate some of these preprocessing steps happening outside?
 
-        # TODO: Preprocessing of features (added to self.df)
-        # - events and holidays: convert date-time occurence dictionary to a column of values in the self.df
-        # - These will then be later tabularized in __get_item___
-
         self.df = df
         self.name = name
         self.meta = OrderedDict({})
         self.meta["df_name"] = self.name
         self.config_args = kwargs
 
+        # TODO: Preprocessing of features (added to self.df)
+        # - events and holidays: convert date-time occurence dictionary to a column of values in the self.df
+        # - These will then be later tabularized in __get_item___
+        # add events based on configuration to df
+        self.df = self.df.reset_index(drop=True)
+        (
+            self.df,
+            self.additive_event_and_holiday_names,
+            self.multiplicative_event_and_holiday_names,
+        ) = add_event_features_to_df(self.df, self.config_args.config_events, self.config_args.config_country_holidays)
         self.sample2index_map, self.length = self.create_sample2index_map(df)
 
     def __getitem__(self, index):
@@ -135,7 +141,7 @@ def create_sample2index_map(self, df):
         df_length = len(df)
         max_lags = get_max_num_lags(self.config_args["config_lagged_regressors"], self.config_args["n_lags"])
         n_forecasts = self.config_args["n_forecasts"]
-        origin_start_end_mask = self.create_origin_start_end_mask(
+        origin_start_end_mask = create_origin_start_end_mask(
             df_length=df_length, max_lags=max_lags, n_forecasts=n_forecasts
         )
 
@@ -143,14 +149,14 @@ def create_sample2index_map(self, df):
         # Filter missing samples and prediction frequency (does not actually drop, but creates indexmapping)
         # analogous to `self.filter_samples_after_init(
         # self.kwargs["prediction_frequency"])`
-        prediction_frequency_mask = self.create_prediction_frequency_filter_mask(
-            self, df, self.config_args["prediction_frequency"]
+        prediction_frequency_mask = create_prediction_frequency_filter_mask(
+            df, self.config_args["prediction_frequency"]
         )
 
-        # TODO Create index mapping of sample index to df index
-        # Drop nan analogous to `self.drop_nan_after_init(
+        # TODO Create NAN-free index mapping of sample index to df index
+        # analogous to `self.drop_nan_after_init(
         # self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
-        nan_mask = self.create_nan_mask(df)  # boolean array where NAN are False
+        nan_mask = create_nan_mask(df)  # boolean array where NAN are False
 
         # Combine masks
         mask = np.logical_and(prediction_frequency_mask, origin_start_end_mask)
@@ -165,137 +171,6 @@ def create_sample2index_map(self, df):
 
         return sample_index_2_df_origin_index, num_samples
 
-    def create_origin_start_end_mask(self, df_length, max_lags, n_forecasts):
-        """Creates a boolean mask for valid prediction origin positions.
-        (based on limiting input lags and forecast targets at start and end of df)"""
-        if max_lags >= 1:
-            start_pad = np.zeros(max_lags - 1, dtype=bool)
-            valid_targets = np.ones(df_length - max_lags - n_forecasts + 1, dtype=bool)
-            end_pad = np.zeros(n_forecasts, dtype=bool)
-            target_start_end_mask = np.concatenate((start_pad, valid_targets, end_pad), axis=None)
-        elif max_lags == 0 and n_forecasts == 1:
-            # without lags, forecast targets and origins are identical
-            target_start_end_mask = np.ones(df_length, dtype=bool)
-        else:
-            raise ValueError(f"max_lags value of {max_lags} not supported for n_forecasts {n_forecasts}.")
-        return target_start_end_mask
-
-    def create_prediction_frequency_filter_mask(
-        self,
-        df: pd.DataFrame,
-        prediction_frequency=None,
-    ):
-        """Filters prediction origin index from df based on the forecast frequency setting.
-
-        Filter based on timestamp last lag before targets start
-
-        Parameters
-        ----------
-            prediction_frequency : int
-                periodic interval in which forecasts should be made.
-            Note
-            ----
-            E.g. if prediction_frequency=7, forecasts are only made on every 7th step (once in a week in case of daily
-            resolution).
-
-        Returns boolean mask where prediction origin indexes to be included are True, and the rest False.
-        """
-        # !! IMPORTANT
-        # TODO: Adjust top level documentation to specify that the filter is applied to prediction ORIGIN, not targets start.
-        # !! IMPORTANT
-
-        mask = np.ones((len(df),), dtype=bool)
-
-        # Basic case: no filter
-        if prediction_frequency is None or prediction_frequency == 1:
-            return mask
-
-        # OLD: timestamps were created from "ds" column in tabularization and then re-converted here
-        # timestamps = pd.to_datetime([x["timestamps"][0] for x in df])
-        # OR
-        # timestamps = df["timestamps"].apply(lambda x: pd.to_datetime(x[0]))
-
-        timestamps = pd.to_datetime(df.loc[:, "ds"].values)
-        filter_masks = []
-        for key, value in prediction_frequency.items():
-            if key == "daily-hour":
-                mask = timestamps.hour == value
-            elif key == "weekly-day":
-                mask = timestamps.dayofweek == value
-            elif key == "monthly-day":
-                mask = timestamps.day == value
-            elif key == "yearly-month":
-                mask = timestamps.month == value
-            elif key == "hourly-minute":
-                mask = timestamps.minute == value
-            else:
-                raise ValueError(f"Invalid prediction frequency: {key}")
-            filter_masks.append(mask)
-        for m in filter_masks:
-            mask = np.logical_and(mask, m)
-        return mask
-
-    def create_nan_mask(self, df, predict_steps, drop_missing):
-        """Creates mask for each prediction origin,
-        accounting for corresponding input lags / forecast targets containing any NaN values.
-
-        Parameters
-        ----------
-            drop_missing : bool
-                whether to automatically drop missing samples from the data
-            predict_steps : int
-                number of steps to predict
-        """
-        # IMPORTANT !!
-        # TODO implement actual filtering
-        return np.ones(len(df), dtype=bool)
-
-        # Create index mapping of sample index to df index
-        # - Filter missing samples (does not actually drop, but creates indexmapping)
-        # -- drop nan analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
-        # Note: needs to also account for NANs in lagged inputs or in n_forecasts, not just first target.
-        # Implement a convolutional filter for targets and each lagged regressor.
-        # Also account for future regressors and events.
-
-        # Rewrite to return mask instead of filtering df:
-        nan_idx = []
-        # NaNs in inputs
-        for key, data in self.inputs.items():
-            if isinstance(data, torch.Tensor):
-                nans = torch.where(torch.isnan(data))[0].tolist()
-                if len(nans) > 0:
-                    nan_idx += nans
-            elif isinstance(data, dict):
-                for subkey, subdata in data.items():
-                    nans = torch.where(torch.isnan(subdata))[0].tolist()
-                    if len(nans) > 0:
-                        nan_idx += nans
-
-        # NaNs in targets that are not inserted for prediction at the end
-        nans = torch.where(torch.isnan(self.targets))[0].tolist()
-        if len(nans) > 0:
-            for idx in nans:
-                if idx not in nan_idx and idx < len(self) - predict_steps:
-                    nan_idx.append(idx)
-
-        nan_idx = list(set(nan_idx))
-        nan_idx.sort()
-        if drop_missing and len(nan_idx) > 0:
-            log.warning(f"{len(nan_idx)} samples with missing values were dropped from the data. ")
-            for key, data in self.inputs.items():
-                if key not in ["time", "lags"]:  # "time_lagged"
-                    for name, features in data.items():
-                        self.inputs[key][name] = np.delete(self.inputs[key][name], nan_idx, 0)
-                else:
-                    self.inputs[key] = np.delete(self.inputs[key], nan_idx, 0)
-            self.targets = np.delete(self.targets, nan_idx, 0)
-            self.length = self.inputs["time"].shape[0]
-        if not drop_missing and len(nan_idx) > 0:
-            raise ValueError(
-                "Inputs/targets with missing values detected. "
-                "Please either adjust imputation parameters, or set 'drop_missing' to True to drop those samples."
-            )
-
     def format_sample(self, inputs, targets=None):
         """Convert tabularized sample to correct formats.
         Parameters
@@ -354,474 +229,506 @@ def split_dict(sample_input, index):
 
         return sample_input, sample_target
 
-
-def tabularize_univariate_datetime_single_index(
-    df: pd.DataFrame,
-    origin_index: int,
-    predict_mode: bool = False,
-    n_lags: int = 0,
-    n_forecasts: int = 1,
-    predict_steps: int = 1,
-    config_seasonality: Optional[configure.ConfigSeasonality] = None,
-    config_events: Optional[configure.ConfigEvents] = None,
-    config_country_holidays=None,
-    config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None,
-    config_regressors: Optional[configure.ConfigFutureRegressors] = None,
-    config_missing=None,
-    config_train=None,
-    prediction_frequency=None,
-):
-    """Create a tabular data sample from timeseries dataframe, used for mini-batch creation.
-    Note
-    ----
-    Data must have no gaps for sample extracted at given index position.
-    ----------
-        df : pd.DataFrame
-            Sequence of observations with original ``ds``, ``y`` and normalized ``t``, ``y_scaled`` columns
-        origin_index: int:
-            dataframe index position of last observed lag before forecast starts.
-        config_seasonality : configure.ConfigSeasonality
-            Configuration for seasonalities
-        n_lags : int
-            Number of lagged values of series to include as model inputs (aka AR-order)
-        n_forecasts : int
-            Number of steps to forecast into future
-        config_events : configure.ConfigEvents
-            User specified events, each with their upper, lower windows (int) and regularization
-        config_country_holidays : configure.ConfigCountryHolidays
-            Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays
-        config_lagged_regressors : configure.ConfigLaggedRegressors
-            Configurations for lagged regressors
-        config_regressors : configure.ConfigFutureRegressors
-            Configuration for regressors
-        predict_mode : bool
-            Chooses the prediction mode
-            Options
-                * (default) ``False``: Includes target values
-                * ``True``: Does not include targets but includes entire dataset as input
-    Returns
-    -------
-        OrderedDict
-            Model inputs, each of len(df) but with varying dimensions
-            Note
-            ----
-            Contains the following data:
-            Model Inputs
-                * ``time`` (np.array, float), dims: (num_samples, 1)
-                * ``seasonalities`` (OrderedDict), named seasonalities
-                each with features (np.array, float) - dims: (num_samples, n_features[name])
-                * ``lags`` (np.array, float), dims: (num_samples, n_lags)
-                * ``covariates`` (OrderedDict), named covariates,
-                each with features (np.array, float) of dims: (num_samples, n_lags)
-                * ``events`` (OrderedDict), events,
-                each with features (np.array, float) of dims: (num_samples, n_lags)
-                * ``regressors`` (OrderedDict), regressors,
-                each with features (np.array, float) of dims: (num_samples, n_lags)
-        np.array, float
-            Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
-    """
-    max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
-    n_samples = 1
-    if max_lags == 0:
-        assert n_forecasts == 1
-
-    # OLD: previous workaround
-    # learning_rate = config_train.learning_rate
-    # if (
-    #     predict_mode
-    #     or (learning_rate is None)
-    #     or config_lagged_regressors
-    #     or config_country_holidays
-    #     or config_events
-    #     or prediction_frequency
-    # ):
-    #     n_samples = len(df) - max_lags + 1 - n_forecasts
-
-    if predict_mode:
-        targets = np.zeros((1, n_forecasts))
-        ## OLD
-        # # time is the time at each forecast step
-        # t = df.loc[:, "t"].values
-        # if max_lags == 0:
-        #     time = np.expand_dims(t, 1)
-        # else:
-        #     time = _stride_time_features_for_forecasts(t)
-        # inputs["time"] = time  # contains n_lags + n_forecasts
-        # targets = np.empty_like(time[:, n_lags:])
-        # targets = np.nan_to_num(targets)
-    else:
-        targets = df.loc[origin_index + 1 : origin_index + 1 + n_forecasts, "y_scaled"].values
-        targets = np.expand_dims(targets, axis=1)
-        ## Alternative
-        # x = df["y_scaled"].values
-        # targets = np.array([x[origin_index + 1 : origin_index + 1 + n_forecasts]], dtype=x.dtype)
-        ## OLD
-        # # time is the time at each forecast step
-        # t = df.loc[:, "t"].values
-        # if max_lags == 0:
-        #     time = np.expand_dims(t, 1)
-        # else:
-        #     time = _stride_time_features_for_forecasts(t)
-        # inputs["time"] = time  # contains n_lags + n_forecasts
-        # def _stride_future_time_features_for_forecasts(x):
-        # return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
-        # targets = _stride_future_time_features_for_forecasts(df["y_scaled"].values)
-
-    # data is stored in OrderedDict
-    inputs = OrderedDict({})
-
-    # TIME: the time at each sample's lags and forecasts
-    if max_lags == 0:
-        inputs["time"] = df.loc[origin_index, "t"].values
-        # TODO: Possibly need extra dim?
-        # inputs["time"] = np.expand_dims(inputs["time"], 1)
-    else:
-        # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
-        inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "t"].values
-        ## OLD: Time
-        # def _stride_time_features_for_forecasts(x):
-        #     window_size = n_lags + n_forecasts
-
-        #     if x.ndim == 1:
-        #         shape = (n_samples, window_size)
-        #     else:
-        #         shape = (n_samples, window_size) + x.shape[1:]
-
-        #     stride = x.strides[0]
-        #     strides = (stride, stride) + x.strides[1:]
-        #     start_index = max_lags - n_lags
-        #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
-        # inputs["time"] = _stride_time_features_for_forecasts(df.loc[:, "t"].values)
-
-    # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
-    if n_lags >= 1 and "y" in df.columns:
-        # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32)
-        inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values
-        # OLD Lags
-        # def _stride_lagged_features(df_col_name, feature_dims):
-        #     # only for case where max_lags > 0
-        #     assert feature_dims >= 1
-        #     series = df.loc[:, df_col_name].values
-        #     # Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test
-        #     return np.array(
-        #         [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
-        #     )
-        # inputs["lags"] = _stride_lagged_features(df_col_name="y_scaled", feature_dims=n_lags)
-
-    # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
-    if config_lagged_regressors is not None and max_lags > 0:
-        lagged_regressors = OrderedDict({})
-        # Future TODO: optimize this computation for many lagged_regressors
-        for lagged_reg in df.columns:
-            if lagged_reg in config_lagged_regressors:
-                assert config_lagged_regressors[lagged_reg].n_lags > 0
-                covar_lags = config_lagged_regressors[lagged_reg].n_lags
-                lagged_regressors[lagged_reg] = df.loc[
-                    origin_index - covar_lags + 1 : origin_index + 1, lagged_reg
-                ].values
-        inputs["covariates"] = lagged_regressors
-        # OLD Covariates
-        # def _stride_lagged_features(df_col_name, feature_dims):
-        #     # only for case where max_lags > 0
-        #     assert feature_dims >= 1
-        #     series = df.loc[:, df_col_name].values
-        #     # Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test
-        #     return np.array(
-        #         [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
-        #     )
-        # for covar in df.columns:
-        #     if covar in config_lagged_regressors:
-        #         assert config_lagged_regressors[covar].n_lags > 0
-        #         window = config_lagged_regressors[covar].n_lags
-        #         covariates[covar] = _stride_lagged_features(df_col_name=covar, feature_dims=window)
-        # inputs["covariates"] = covariates
-
-    # SEASONALITIES
-    if config_seasonality is not None:
-        seasonalities = OrderedDict({})
+    def tabularize_univariate_datetime_single_index(
+        self,
+        df: pd.DataFrame,
+        origin_index: int,
+        predict_mode: bool = False,
+        n_lags: int = 0,
+        n_forecasts: int = 1,
+        predict_steps: int = 1,
+        config_seasonality: Optional[configure.ConfigSeasonality] = None,
+        config_events: Optional[configure.ConfigEvents] = None,
+        config_country_holidays=None,
+        config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None,
+        config_regressors: Optional[configure.ConfigFutureRegressors] = None,
+        config_missing=None,
+        config_train=None,
+        prediction_frequency=None,
+    ):
+        """Create a tabular data sample from timeseries dataframe, used for mini-batch creation.
+        Note
+        ----
+        Data must have no gaps for sample extracted at given index position.
+        ----------
+            df : pd.DataFrame
+                Sequence of observations with original ``ds``, ``y`` and normalized ``t``, ``y_scaled`` columns
+            origin_index: int:
+                dataframe index position of last observed lag before forecast starts.
+            config_seasonality : configure.ConfigSeasonality
+                Configuration for seasonalities
+            n_lags : int
+                Number of lagged values of series to include as model inputs (aka AR-order)
+            n_forecasts : int
+                Number of steps to forecast into future
+            config_events : configure.ConfigEvents
+                User specified events, each with their upper, lower windows (int) and regularization
+            config_country_holidays : configure.ConfigCountryHolidays
+                Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays
+            config_lagged_regressors : configure.ConfigLaggedRegressors
+                Configurations for lagged regressors
+            config_regressors : configure.ConfigFutureRegressors
+                Configuration for regressors
+            predict_mode : bool
+                Chooses the prediction mode
+                Options
+                    * (default) ``False``: Includes target values
+                    * ``True``: Does not include targets but includes entire dataset as input
+        Returns
+        -------
+            OrderedDict
+                Model inputs, each of len(df) but with varying dimensions
+                Note
+                ----
+                Contains the following data:
+                Model Inputs
+                    * ``time`` (np.array, float), dims: (num_samples, 1)
+                    * ``seasonalities`` (OrderedDict), named seasonalities
+                    each with features (np.array, float) - dims: (num_samples, n_features[name])
+                    * ``lags`` (np.array, float), dims: (num_samples, n_lags)
+                    * ``covariates`` (OrderedDict), named covariates,
+                    each with features (np.array, float) of dims: (num_samples, n_lags)
+                    * ``events`` (OrderedDict), events,
+                    each with features (np.array, float) of dims: (num_samples, n_lags)
+                    * ``regressors`` (OrderedDict), regressors,
+                    each with features (np.array, float) of dims: (num_samples, n_lags)
+            np.array, float
+                Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
+        """
+        max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
+        n_samples = 1
         if max_lags == 0:
-            dates = df.loc[origin_index, "ds"]
+            assert n_forecasts == 1
+
+        # OLD: previous workaround
+        # learning_rate = config_train.learning_rate
+        # if (
+        #     predict_mode
+        #     or (learning_rate is None)
+        #     or config_lagged_regressors
+        #     or config_country_holidays
+        #     or config_events
+        #     or prediction_frequency
+        # ):
+        #     n_samples = len(df) - max_lags + 1 - n_forecasts
+
+        if predict_mode:
+            targets = np.zeros((1, n_forecasts))
+            ## OLD
+            # # time is the time at each forecast step
+            # t = df.loc[:, "t"].values
+            # if max_lags == 0:
+            #     time = np.expand_dims(t, 1)
+            # else:
+            #     time = _stride_time_features_for_forecasts(t)
+            # inputs["time"] = time  # contains n_lags + n_forecasts
+            # targets = np.empty_like(time[:, n_lags:])
+            # targets = np.nan_to_num(targets)
         else:
-            dates = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "ds"]
-        assert len(dates.shape) == 1
-        # Seasonality features
-        for name, period in config_seasonality.periods.items():
-            if period.resolution > 0:
-                if config_seasonality.computation == "fourier":
-                    # Compute Fourier series components with the specified frequency and order.
-                    # convert to days since epoch
-                    t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
-                    # features: Matrix with dims (length len(dates), 2*resolution)
-                    features = np.column_stack(
-                        [np.sin((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
-                        + [np.cos((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
+            targets = df.loc[origin_index + 1 : origin_index + 1 + n_forecasts, "y_scaled"].values
+            targets = np.expand_dims(targets, axis=1)
+            ## Alternative
+            # x = df["y_scaled"].values
+            # targets = np.array([x[origin_index + 1 : origin_index + 1 + n_forecasts]], dtype=x.dtype)
+            ## OLD
+            # # time is the time at each forecast step
+            # t = df.loc[:, "t"].values
+            # if max_lags == 0:
+            #     time = np.expand_dims(t, 1)
+            # else:
+            #     time = _stride_time_features_for_forecasts(t)
+            # inputs["time"] = time  # contains n_lags + n_forecasts
+            # def _stride_future_time_features_for_forecasts(x):
+            # return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
+            # targets = _stride_future_time_features_for_forecasts(df["y_scaled"].values)
+
+        # data is stored in OrderedDict
+        inputs = OrderedDict({})
+
+        # TIME: the time at each sample's lags and forecasts
+        if max_lags == 0:
+            inputs["time"] = df.loc[origin_index, "t"].values
+            # TODO: Possibly need extra dim?
+            # inputs["time"] = np.expand_dims(inputs["time"], 1)
+        else:
+            # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
+            inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "t"].values
+            ## OLD: Time
+            # def _stride_time_features_for_forecasts(x):
+            #     window_size = n_lags + n_forecasts
+
+            #     if x.ndim == 1:
+            #         shape = (n_samples, window_size)
+            #     else:
+            #         shape = (n_samples, window_size) + x.shape[1:]
+
+            #     stride = x.strides[0]
+            #     strides = (stride, stride) + x.strides[1:]
+            #     start_index = max_lags - n_lags
+            #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
+            # inputs["time"] = _stride_time_features_for_forecasts(df.loc[:, "t"].values)
+
+        # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
+        if n_lags >= 1 and "y" in df.columns:
+            # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32)
+            inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values
+            # OLD Lags
+            # def _stride_lagged_features(df_col_name, feature_dims):
+            #     # only for case where max_lags > 0
+            #     assert feature_dims >= 1
+            #     series = df.loc[:, df_col_name].values
+            #     # Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test
+            #     return np.array(
+            #         [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
+            #     )
+            # inputs["lags"] = _stride_lagged_features(df_col_name="y_scaled", feature_dims=n_lags)
+
+        # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
+        if config_lagged_regressors is not None and max_lags > 0:
+            lagged_regressors = OrderedDict({})
+            # Future TODO: optimize this computation for many lagged_regressors
+            for lagged_reg in df.columns:
+                if lagged_reg in config_lagged_regressors:
+                    assert config_lagged_regressors[lagged_reg].n_lags > 0
+                    covar_lags = config_lagged_regressors[lagged_reg].n_lags
+                    lagged_regressors[lagged_reg] = df.loc[
+                        origin_index - covar_lags + 1 : origin_index + 1, lagged_reg
+                    ].values
+            inputs["covariates"] = lagged_regressors
+            # OLD Covariates
+            # def _stride_lagged_features(df_col_name, feature_dims):
+            #     # only for case where max_lags > 0
+            #     assert feature_dims >= 1
+            #     series = df.loc[:, df_col_name].values
+            #     # Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test
+            #     return np.array(
+            #         [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
+            #     )
+            # for covar in df.columns:
+            #     if covar in config_lagged_regressors:
+            #         assert config_lagged_regressors[covar].n_lags > 0
+            #         window = config_lagged_regressors[covar].n_lags
+            #         covariates[covar] = _stride_lagged_features(df_col_name=covar, feature_dims=window)
+            # inputs["covariates"] = covariates
+
+        # SEASONALITIES
+        if config_seasonality is not None:
+            seasonalities = OrderedDict({})
+            if max_lags == 0:
+                dates = df.loc[origin_index, "ds"]
+            else:
+                dates = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "ds"]
+            assert len(dates.shape) == 1
+            # Seasonality features
+            for name, period in config_seasonality.periods.items():
+                if period.resolution > 0:
+                    if config_seasonality.computation == "fourier":
+                        # Compute Fourier series components with the specified frequency and order.
+                        # convert to days since epoch
+                        t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (
+                            3600 * 24.0
+                        )
+                        # features: Matrix with dims (length len(dates), 2*resolution)
+                        features = np.column_stack(
+                            [np.sin((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
+                            + [np.cos((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
+                        )
+                        # Single nested loop version:
+                        # features = np.column_stack(
+                        #     [
+                        #         fun((2.0 * (i + 1) * np.pi * t / period.period))
+                        #         for i in range(period.resolution)
+                        #         for fun in (np.sin, np.cos)
+                        #     ]
+                        # )
+                    else:
+                        raise NotImplementedError
+                    if period.condition_name is not None:
+                        # multiply seasonality features with condition mask/values
+                        features = features * df[period.condition_name].values[:, np.newaxis]
+
+                    seasonalities[name] = features
+                    # TODO: Possibly need extra dim?
+                    # seasonalities[name] = np.expand_dims(seasonalities[name], 1)
+            inputs["seasonalities"] = seasonalities
+
+            ## OLD Seasonality
+            # def fourier_series_t(t, period, series_order):
+            #     """Provides Fourier series components with the specified frequency and order.
+            #     Note
+            #     ----
+            #     This function is identical to Meta AI's Prophet Library
+            #     Parameters
+            #     ----------
+            #         t : pd.Series, float
+            #             Containing time as floating point number of days
+            #         period : float
+            #             Number of days of the period
+            #         series_order : int
+            #             Number of fourier components
+            #     Returns
+            #     -------
+            #         np.array
+            #             Matrix with seasonality features
+            #     """
+            #     features = np.column_stack(
+            #         [fun((2.0 * (i + 1) * np.pi * t / period)) for i in range(series_order) for fun in (np.sin, np.cos)]
+            #     )
+            #     return features
+
+            # def fourier_series(dates, period, series_order):
+            #     """Provides Fourier series components with the specified frequency and order.
+            #     Note
+            #     ----
+            #     Identical to OG Prophet.
+            #     Parameters
+            #     ----------
+            #         dates : pd.Series
+            #             Containing time stamps
+            #         period : float
+            #             Number of days of the period
+            #         series_order : int
+            #             Number of fourier components
+            #     Returns
+            #     -------
+            #         np.array
+            #             Matrix with seasonality features
+            #     """
+            #     # convert to days since epoch
+            #     t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
+            #     return fourier_series_t(t, period, series_order)
+
+            # def seasonal_features_from_dates(df, config_seasonality: configure.ConfigSeasonality):
+            #     """Dataframe with seasonality features.
+            #     Includes seasonality features
+            #     Parameters
+            #     ----------
+            #         df : pd.DataFrame
+            #             Dataframe with all values
+            #         config_seasonality : configure.ConfigSeasonality
+            #             Configuration for seasonalities
+            #     Returns
+            #     -------
+            #         OrderedDict
+            #             Dictionary with keys for each period name containing an np.array
+            #             with the respective regression features. each with dims: (len(dates), 2*fourier_order)
+            #     """
+            #     dates = df["ds"]
+            #     assert len(dates.shape) == 1
+            #     seasonalities = OrderedDict({})
+            #     # Seasonality features
+            #     for name, period in config_seasonality.periods.items():
+            #         if period.resolution > 0:
+            #             if config_seasonality.computation == "fourier":
+            #                 # features: Matrix with dims (length len(dates), 2*resolution)
+            #                 features = fourier_series(
+            #                     dates=dates,
+            #                     period=period.period,
+            #                     series_order=period.resolution,
+            #                 )
+            #             else:
+            #                 raise NotImplementedError
+            #             if period.condition_name is not None
+            #             # multiply seasonality features with condition mask/values:
+            #                 features = features * df[period.condition_name].values[:, np.newaxis]
+            #             seasonalities[name] = features
+            #     return seasonalities
+
+            # def _stride_time_features_for_seasonality(x):
+            #     window_size = n_lags + n_forecasts
+
+            #     if x.ndim == 1:
+            #         shape = (n_samples, window_size)
+            #     else:
+            #         shape = (n_samples, window_size) + x.shape[1:]
+
+            #     stride = x.strides[0]
+            #     strides = (stride, stride) + x.strides[1:]
+            #     start_index = max_lags - n_lags
+            #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
+
+            # seasonalities = seasonal_features_from_dates(df, config_seasonality)
+            # for name, features in seasonalities.items():
+            #     if max_lags == 0:
+            #         seasonalities[name] = np.expand_dims(features, axis=1)
+            #     else:
+            #         # stride into num_forecast at dim=1 for each sample, just like we did with time
+            #         seasonalities[name] = _stride_time_features_for_seasonality(features)
+            # inputs["seasonalities"] = seasonalities
+
+        # FUTURE REGRESSORS: get the future regressors features
+        if config_regressors is not None:
+            # sort and divide regressors into multiplicative and additive
+            additive_regressors_names = []
+            multiplicative_regressors_names = []
+            for reg in sorted(df.columns.tolist()):
+                if reg in config_regressors:
+                    mode = config_regressors[reg].mode
+                    if mode == "additive":
+                        additive_regressors_names.append(reg)
+                    else:
+                        multiplicative_regressors_names.append(reg)
+
+            # create numpy array of values of additive and multiplicative regressors, at correct indexes
+            # features dims: (n_samples/batch, n_forecasts, n_features/n_regressors)
+            regressors = OrderedDict({})
+            regressors["additive"] = None
+            regressors["multiplicative"] = None
+            if max_lags == 0:
+                if len(additive_regressors_names) > 0:
+                    regressors["additive"] = np.expand_dims(
+                        df.loc[origin_index, additive_regressors_names].values, axis=0
                     )
-                    # Single nested loop version:
-                    # features = np.column_stack(
-                    #     [
-                    #         fun((2.0 * (i + 1) * np.pi * t / period.period))
-                    #         for i in range(period.resolution)
-                    #         for fun in (np.sin, np.cos)
-                    #     ]
-                    # )
-                else:
-                    raise NotImplementedError
-                if period.condition_name is not None:
-                    # multiply seasonality features with condition mask/values
-                    features = features * df[period.condition_name].values[:, np.newaxis]
-
-                seasonalities[name] = features
-                # TODO: Possibly need extra dim?
-                # seasonalities[name] = np.expand_dims(seasonalities[name], 1)
-        inputs["seasonalities"] = seasonalities
-
-        ## OLD Seasonality
-        # def fourier_series_t(t, period, series_order):
-        #     """Provides Fourier series components with the specified frequency and order.
-        #     Note
-        #     ----
-        #     This function is identical to Meta AI's Prophet Library
-        #     Parameters
-        #     ----------
-        #         t : pd.Series, float
-        #             Containing time as floating point number of days
-        #         period : float
-        #             Number of days of the period
-        #         series_order : int
-        #             Number of fourier components
-        #     Returns
-        #     -------
-        #         np.array
-        #             Matrix with seasonality features
-        #     """
-        #     features = np.column_stack(
-        #         [fun((2.0 * (i + 1) * np.pi * t / period)) for i in range(series_order) for fun in (np.sin, np.cos)]
-        #     )
-        #     return features
-
-        # def fourier_series(dates, period, series_order):
-        #     """Provides Fourier series components with the specified frequency and order.
-        #     Note
-        #     ----
-        #     Identical to OG Prophet.
-        #     Parameters
-        #     ----------
-        #         dates : pd.Series
-        #             Containing time stamps
-        #         period : float
-        #             Number of days of the period
-        #         series_order : int
-        #             Number of fourier components
-        #     Returns
-        #     -------
-        #         np.array
-        #             Matrix with seasonality features
-        #     """
-        #     # convert to days since epoch
-        #     t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
-        #     return fourier_series_t(t, period, series_order)
-
-        # def seasonal_features_from_dates(df, config_seasonality: configure.ConfigSeasonality):
-        #     """Dataframe with seasonality features.
-        #     Includes seasonality features
-        #     Parameters
-        #     ----------
-        #         df : pd.DataFrame
-        #             Dataframe with all values
-        #         config_seasonality : configure.ConfigSeasonality
-        #             Configuration for seasonalities
-        #     Returns
-        #     -------
-        #         OrderedDict
-        #             Dictionary with keys for each period name containing an np.array
-        #             with the respective regression features. each with dims: (len(dates), 2*fourier_order)
-        #     """
-        #     dates = df["ds"]
-        #     assert len(dates.shape) == 1
-        #     seasonalities = OrderedDict({})
-        #     # Seasonality features
-        #     for name, period in config_seasonality.periods.items():
-        #         if period.resolution > 0:
-        #             if config_seasonality.computation == "fourier":
-        #                 # features: Matrix with dims (length len(dates), 2*resolution)
-        #                 features = fourier_series(
-        #                     dates=dates,
-        #                     period=period.period,
-        #                     series_order=period.resolution,
-        #                 )
-        #             else:
-        #                 raise NotImplementedError
-        #             if period.condition_name is not None
-        #             # multiply seasonality features with condition mask/values:
-        #                 features = features * df[period.condition_name].values[:, np.newaxis]
-        #             seasonalities[name] = features
-        #     return seasonalities
-
-        # def _stride_time_features_for_seasonality(x):
-        #     window_size = n_lags + n_forecasts
-
-        #     if x.ndim == 1:
-        #         shape = (n_samples, window_size)
-        #     else:
-        #         shape = (n_samples, window_size) + x.shape[1:]
-
-        #     stride = x.strides[0]
-        #     strides = (stride, stride) + x.strides[1:]
-        #     start_index = max_lags - n_lags
-        #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
-
-        # seasonalities = seasonal_features_from_dates(df, config_seasonality)
-        # for name, features in seasonalities.items():
-        #     if max_lags == 0:
-        #         seasonalities[name] = np.expand_dims(features, axis=1)
-        #     else:
-        #         # stride into num_forecast at dim=1 for each sample, just like we did with time
-        #         seasonalities[name] = _stride_time_features_for_seasonality(features)
-        # inputs["seasonalities"] = seasonalities
-
-    # FUTURE REGRESSORS: get the future regressors features
-    if config_regressors is not None:
-        # sort and divide regressors into multiplicative and additive
-        additive_regressors_names = []
-        multiplicative_regressors_names = []
-        for reg in sorted(df.columns.tolist()):
-            if reg in config_regressors:
-                mode = config_regressors[reg].mode
-                if mode == "additive":
-                    additive_regressors_names.append(reg)
-                else:
-                    multiplicative_regressors_names.append(reg)
-
-        # create numpy array of values of additive and multiplicative regressors, at correct indexes
-        # features dims: (n_samples/batch, n_forecasts, n_features/n_regressors)
-        regressors = OrderedDict({})
-        regressors["additive"] = None
-        regressors["multiplicative"] = None
+                if len(multiplicative_regressors_names) > 0:
+                    regressors["multiplicative"] = np.expand_dims(
+                        df.loc[origin_index, multiplicative_regressors_names].values, axis=0
+                    )
+            else:
+                if len(additive_regressors_names) > 0:
+                    regressors_add_future_window = df.loc[
+                        origin_index + 1 : origin_index + 1 + n_forecasts, additive_regressors_names
+                    ].values
+                    regressors["additive"] = np.expand_dims(regressors_add_future_window, axis=0)
+                    ## OLD
+                    # additive_regressor_feature_windows = []
+                    # # additive_regressor_feature_windows_lagged = []
+                    # for i in range(0, len(additive_regressors_names)):
+                    #     # stride into num_forecast at dim=1 for each sample, just like we did with time
+                    #     x = additive_regressors[:, i]
+                    #     window_size = n_lags + n_forecasts
+
+                    #     if x.ndim == 1:
+                    #         shape = (n_samples, window_size)
+                    #     else:
+                    #         shape = (n_samples, window_size) + x.shape[1:]
+
+                    #     stride = x.strides[0]
+                    #     strides = (stride, stride) + x.strides[1:]
+                    #     start_index = max_lags - n_lags
+                    #     stride = np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
+                    #     additive_regressor_feature_windows.append(stride)
+                    # additive_regressors = np.dstack(additive_regressor_feature_windows)
+                    # regressors["additive"] = additive_regressors
+                if len(multiplicative_regressors_names) > 0:
+                    regressors_mul_future_window = df.loc[
+                        origin_index + 1 : origin_index + 1 + n_forecasts, multiplicative_regressors_names
+                    ].values
+                    regressors["multiplicative"] = np.expand_dims(regressors_mul_future_window, axis=0)
+            inputs["regressors"] = regressors
+
+            ## OLD Future regressors
+            # additive_regressors, multiplicative_regressors = make_regressors_features(df, config_regressors)
+            # for max_lags == 0, see code before merge
+            # if max_lags > 0:
+            # def _stride_time_features_for_forecasts(x):additive_regressors
+            #     window_size = n_lags + n_forecasts
+
+            #     if x.ndim == 1:
+            #         shape = (n_samples, window_size)
+            #     else:
+            #         shape = (n_samples, window_size) + x.shape[1:]
+
+            #     stride = x.strides[0]
+            #     strides = (stride, stride) + x.strides[1:]
+            #     start_index = max_lags - n_lags
+            #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
+            # if additive_regressors is not None:
+            #     additive_regressor_feature_windows = []
+            #     # additive_regressor_feature_windows_lagged = []
+            #     for i in range(0, additive_regressors.shape[1]):
+            #         # stride into num_forecast at dim=1 for each sample, just like we did with time
+            #         stride = _stride_time_features_for_forecasts(additive_regressors[:, i])
+            #         additive_regressor_feature_windows.append(stride)
+            #     additive_regressors = np.dstack(additive_regressor_feature_windows)
+            #     regressors["additive"] = additive_regressors
+
+            # if multiplicative_regressors is not None:
+            #     multiplicative_regressor_feature_windows = []
+            #     for i in range(0, multiplicative_regressors.shape[1]):
+            #         stride = _stride_time_features_for_forecasts(multiplicative_regressors[:, i])
+            #         multiplicative_regressor_feature_windows.append(stride)
+            #     multiplicative_regressors = np.dstack(multiplicative_regressor_feature_windows)
+            #     regressors["multiplicative"] = multiplicative_regressors
+            # inputs["regressors"] = regressors
+
+        # FUTURE EVENTS: get the events features
+        # create numpy array of values of additive and multiplicative events, at correct indexes
+        # features dims: (n_samples/batch, n_forecasts, n_features/n_events)
+        events = OrderedDict({})
+        events["additive"] = None
+        events["multiplicative"] = None
         if max_lags == 0:
-            if len(additive_regressors_names) > 0:
-                regressors["additive"] = np.expand_dims(df.loc[origin_index, additive_regressors_names].values, axis=0)
-            if len(multiplicative_regressors_names) > 0:
-                regressors["multiplicative"] = np.expand_dims(
-                    df.loc[origin_index, multiplicative_regressors_names].values, axis=0
+            if len(self.additive_event_and_holiday_names) > 0:
+                events["additive"] = np.expand_dims(
+                    df.loc[origin_index, self.additive_event_and_holiday_names].values, axis=0
+                )
+            if len(self.multiplicative_event_and_holiday_names) > 0:
+                events["multiplicative"] = np.expand_dims(
+                    df.loc[origin_index, self.multiplicative_event_and_holiday_names].values, axis=0
                 )
         else:
-            if len(additive_regressors_names) > 0:
-                regressors_add_future_window = df.loc[
-                    origin_index + 1 : origin_index + 1 + n_forecasts, additive_regressors_names
+            if len(self.additive_event_and_holiday_names) > 0:
+                events_add_future_window = df.loc[
+                    origin_index + 1 : origin_index + 1 + n_forecasts, self.additive_event_and_holiday_names
                 ].values
-                regressors["additive"] = np.expand_dims(regressors_add_future_window, axis=0)
-                ## OLD
-                # additive_regressor_feature_windows = []
-                # # additive_regressor_feature_windows_lagged = []
-                # for i in range(0, len(additive_regressors_names)):
-                #     # stride into num_forecast at dim=1 for each sample, just like we did with time
-                #     x = additive_regressors[:, i]
-                #     window_size = n_lags + n_forecasts
-
-                #     if x.ndim == 1:
-                #         shape = (n_samples, window_size)
-                #     else:
-                #         shape = (n_samples, window_size) + x.shape[1:]
-
-                #     stride = x.strides[0]
-                #     strides = (stride, stride) + x.strides[1:]
-                #     start_index = max_lags - n_lags
-                #     stride = np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
-                #     additive_regressor_feature_windows.append(stride)
-                # additive_regressors = np.dstack(additive_regressor_feature_windows)
-                # regressors["additive"] = additive_regressors
-            if len(multiplicative_regressors_names) > 0:
-                regressors_mul_future_window = df.loc[
-                    origin_index + 1 : origin_index + 1 + n_forecasts, multiplicative_regressors_names
+                events["additive"] = np.expand_dims(events_add_future_window, axis=0)
+            if len(self.multiplicative_event_and_holiday_names) > 0:
+                events_mul_future_window = df.loc[
+                    origin_index + 1 : origin_index + 1 + n_forecasts, self.multiplicative_event_and_holiday_names
                 ].values
-                regressors["multiplicative"] = np.expand_dims(regressors_mul_future_window, axis=0)
-        inputs["regressors"] = regressors
-
-        ## OLD Future regressors
-        # additive_regressors, multiplicative_regressors = make_regressors_features(df, config_regressors)
-        # for max_lags == 0, see code before merge
-        # if max_lags > 0:
-        # def _stride_time_features_for_forecasts(x):additive_regressors
-        #     window_size = n_lags + n_forecasts
-
-        #     if x.ndim == 1:
-        #         shape = (n_samples, window_size)
-        #     else:
-        #         shape = (n_samples, window_size) + x.shape[1:]
-
-        #     stride = x.strides[0]
-        #     strides = (stride, stride) + x.strides[1:]
-        #     start_index = max_lags - n_lags
-        #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
-        # if additive_regressors is not None:
-        #     additive_regressor_feature_windows = []
-        #     # additive_regressor_feature_windows_lagged = []
-        #     for i in range(0, additive_regressors.shape[1]):
-        #         # stride into num_forecast at dim=1 for each sample, just like we did with time
-        #         stride = _stride_time_features_for_forecasts(additive_regressors[:, i])
-        #         additive_regressor_feature_windows.append(stride)
-        #     additive_regressors = np.dstack(additive_regressor_feature_windows)
-        #     regressors["additive"] = additive_regressors
-
-        # if multiplicative_regressors is not None:
-        #     multiplicative_regressor_feature_windows = []
-        #     for i in range(0, multiplicative_regressors.shape[1]):
-        #         stride = _stride_time_features_for_forecasts(multiplicative_regressors[:, i])
-        #         multiplicative_regressor_feature_windows.append(stride)
-        #     multiplicative_regressors = np.dstack(multiplicative_regressor_feature_windows)
-        #     regressors["multiplicative"] = multiplicative_regressors
-        # inputs["regressors"] = regressors
-
-    # ----------- TODO convert to single sample version ----------------------
-    # TODO: Events
-    # TODO: Postprocessing & Formatting
-
-    # get the events features
-    if config_events is not None or config_country_holidays is not None:
-        additive_events, multiplicative_events = make_events_features(df, config_events, config_country_holidays)
-
-        events = OrderedDict({})
-        if max_lags == 0:
-            if additive_events is not None:
-                events["additive"] = np.expand_dims(additive_events, axis=1)
-            if multiplicative_events is not None:
-                events["multiplicative"] = np.expand_dims(multiplicative_events, axis=1)
-        else:
-            if additive_events is not None:
-                additive_event_feature_windows = []
-                for i in range(0, additive_events.shape[1]):
-                    # stride into num_forecast at dim=1 for each sample, just like we did with time
-                    additive_event_feature_windows.append(_stride_time_features_for_forecasts(additive_events[:, i]))
-                additive_events = np.dstack(additive_event_feature_windows)
-                events["additive"] = additive_events
-
-            if multiplicative_events is not None:
-                multiplicative_event_feature_windows = []
-                # multiplicative_event_feature_windows_lagged = []
-                for i in range(0, multiplicative_events.shape[1]):
-                    # stride into num_forecast at dim=1 for each sample, just like we did with time
-                    multiplicative_event_feature_windows.append(
-                        _stride_time_features_for_forecasts(multiplicative_events[:, i])
-                    )
-                multiplicative_events = np.dstack(multiplicative_event_feature_windows)
-                events["multiplicative"] = multiplicative_events
+                events["multiplicative"] = np.expand_dims(events_mul_future_window, axis=0)
         inputs["events"] = events
 
-    tabularized_input_shapes_str = ""
-    for key, value in inputs.items():
-        if key in [
-            "seasonalities",
-            "covariates",
-            "events",
-            "regressors",
-        ]:
-            for name, period_features in value.items():
-                tabularized_input_shapes_str += f"    {name} {key} {period_features}\n"
-        else:
-            tabularized_input_shapes_str += f"    {key} {value.shape} \n"
-    log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
+        ## OLD
+        # # get the events features
+        # if config_events is not None or config_country_holidays is not None:
+        #     additive_events, multiplicative_events = make_events_features(df, config_events, config_country_holidays)
 
-    return inputs, targets
+        #     events = OrderedDict({})
+        #     if max_lags == 0:
+        #         if additive_events is not None:
+        #             events["additive"] = np.expand_dims(additive_events, axis=1)
+        #         if multiplicative_events is not None:
+        #             events["multiplicative"] = np.expand_dims(multiplicative_events, axis=1)
+        #     else:
+        #         if additive_events is not None:
+        #             additive_event_feature_windows = []
+        #             for i in range(0, additive_events.shape[1]):
+        #                 # stride into num_forecast at dim=1 for each sample, just like we did with time
+        #                 additive_event_feature_windows.append(_stride_time_features_for_forecasts(additive_events[:, i]))
+        #             additive_events = np.dstack(additive_event_feature_windows)
+        #             events["additive"] = additive_events
+
+        #         if multiplicative_events is not None:
+        #             multiplicative_event_feature_windows = []
+        #             # multiplicative_event_feature_windows_lagged = []
+        #             for i in range(0, multiplicative_events.shape[1]):
+        #                 # stride into num_forecast at dim=1 for each sample, just like we did with time
+        #                 multiplicative_event_feature_windows.append(
+        #                     _stride_time_features_for_forecasts(multiplicative_events[:, i])
+        #                 )
+        #             multiplicative_events = np.dstack(multiplicative_event_feature_windows)
+        #             events["multiplicative"] = multiplicative_events
+        #     inputs["events"] = events
+
+        # ----------- TODO convert to single sample version ----------------------
+        # TODO: Postprocessing & Formatting
+
+        tabularized_input_shapes_str = ""
+        for key, value in inputs.items():
+            if key in [
+                "seasonalities",
+                "covariates",
+                "events",
+                "regressors",
+            ]:
+                for name, period_features in value.items():
+                    tabularized_input_shapes_str += f"    {name} {key} {period_features}\n"
+            else:
+                tabularized_input_shapes_str += f"    {key} {value.shape} \n"
+        log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
+
+        return inputs, targets
 
 
 def fourier_series(dates, period, series_order):
@@ -871,7 +778,7 @@ def fourier_series_t(t, period, series_order):
     return features
 
 
-def make_country_specific_holidays_df(year_list, country):
+def make_country_specific_holidays_dict(year_list, country):
     """
     Make dataframe of country specific holidays for given years and countries
     Parameters
@@ -900,6 +807,32 @@ def make_country_specific_holidays_df(year_list, country):
     return country_specific_holidays_dict
 
 
+def get_event_offset_features(event, config, feature):
+    """
+    Create event offset features for the given event, config and feature
+    Parameters
+    ----------
+        event : str
+            Name of the event
+        config : configure.ConfigEvents
+            User specified events, holidays, and country specific holidays
+        feature : pd.Series
+            Feature for the event
+    Returns
+    -------
+        tuple
+            Tuple of additive_events and multiplicative_events
+    """
+    events = pd.DataFrame({})
+    lw = config.lower_window
+    uw = config.upper_window
+    for offset in range(lw, uw + 1):
+        key = utils.create_event_names_for_offsets(event, offset)
+        offset_feature = feature.shift(periods=offset, fill_value=0.0)
+        events[key] = offset_feature
+    return events
+
+
 def _create_event_offset_features(event, config, feature, additive_events, multiplicative_events):
     """
     Create event offset features for the given event, config and feature
@@ -932,6 +865,73 @@ def _create_event_offset_features(event, config, feature, additive_events, multi
             multiplicative_events[key] = offset_feature
 
 
+def add_event_features_to_df(
+    df,
+    config_events: Optional[configure.ConfigEvents] = None,
+    config_country_holidays: Optional[configure.ConfigCountryHolidays] = None,
+):
+    """
+    Construct columns containing the features of each event, added to df.
+    Parameters
+    ----------
+        df : pd.DataFrame
+            Dataframe with all values including the user specified events (provided by user)
+        config_events : configure.ConfigEvents
+            User specified events, each with their upper, lower windows (int), regularization
+        config_country_holidays : configure.ConfigCountryHolidays
+            Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays
+    Returns
+    -------
+        np.array
+            All additive event features (both user specified and country specific)
+        np.array
+            All multiplicative event features (both user specified and country specific)
+    """
+    # create all additional user specified offest events
+    additive_events_names = []
+    multiplicative_events_names = []
+    if config_events is not None:
+        for event in sorted(list(config_events.keys())):
+            feature = df[event]
+            config = config_events[event]
+            mode = config.mode
+            for offset in range(config.lower_window, config.upper_window + 1):
+                event_offset_name = utils.create_event_names_for_offsets(event, offset)
+                df[event_offset_name] = feature.shift(periods=offset, fill_value=0.0)
+                if mode == "additive":
+                    additive_events_names.append(event_offset_name)
+                else:
+                    multiplicative_events_names.append(event_offset_name)
+
+    # create all country specific holidays and their offsets.
+    additive_holiday_names = []
+    multiplicative_holiday_names = []
+    if config_country_holidays is not None:
+        year_list = list({x.year for x in df.ds})
+        country_holidays_dict = make_country_specific_holidays_dict(year_list, config_country_holidays.country)
+        config = config_country_holidays
+        mode = config.mode
+        for holiday in config_country_holidays.holiday_names:
+            # feature = pd.Series([0.0] * df.shape[0])
+            feature = pd.Series(np.zeros(df.shape[0], dtype=np.float32))
+            if holiday in country_holidays_dict.keys():
+                dates = country_holidays_dict[holiday]
+                feature[df.ds.isin(dates)] = 1.0
+            else:
+                raise ValueError(f"Holiday {holiday} not found in country holidays")
+            for offset in range(config.lower_window, config.upper_window + 1):
+                holiday_offset_name = utils.create_event_names_for_offsets(holiday, offset)
+                df[holiday_offset_name] = feature.shift(periods=offset, fill_value=0.0)
+                if mode == "additive":
+                    additive_holiday_names.append(event_offset_name)
+                else:
+                    multiplicative_holiday_names.append(event_offset_name)
+    # Future TODO: possibly undo merge of events and holidays.
+    additive_event_and_holiday_names = sorted(additive_events_names + additive_holiday_names)
+    multiplicative_event_and_holiday_names = sorted(multiplicative_events_names + multiplicative_holiday_names)
+    return df, additive_event_and_holiday_names, multiplicative_event_and_holiday_names
+
+
 def make_events_features(df, config_events: Optional[configure.ConfigEvents] = None, config_country_holidays=None):
     """
     Construct arrays of all event features
@@ -963,7 +963,7 @@ def make_events_features(df, config_events: Optional[configure.ConfigEvents] = N
     # create all country specific holidays
     if config_country_holidays is not None:
         year_list = list({x.year for x in df.ds})
-        country_holidays_dict = make_country_specific_holidays_df(year_list, config_country_holidays.country)
+        country_holidays_dict = make_country_specific_holidays_dict(year_list, config_country_holidays.country)
         for holiday in config_country_holidays.holiday_names:
             feature = pd.Series([0.0] * df.shape[0])
             if holiday in country_holidays_dict.keys():
@@ -1061,3 +1061,133 @@ def make_events_features(df, config_events: Optional[configure.ConfigEvents] = N
 #                 features = features * df[period.condition_name].values[:, np.newaxis]
 #             seasonalities[name] = features
 #     return seasonalities
+
+
+def create_origin_start_end_mask(df_length, max_lags, n_forecasts):
+    """Creates a boolean mask for valid prediction origin positions.
+    (based on limiting input lags and forecast targets at start and end of df)"""
+    if max_lags >= 1:
+        start_pad = np.zeros(max_lags - 1, dtype=bool)
+        valid_targets = np.ones(df_length - max_lags - n_forecasts + 1, dtype=bool)
+        end_pad = np.zeros(n_forecasts, dtype=bool)
+        target_start_end_mask = np.concatenate((start_pad, valid_targets, end_pad), axis=None)
+    elif max_lags == 0 and n_forecasts == 1:
+        # without lags, forecast targets and origins are identical
+        target_start_end_mask = np.ones(df_length, dtype=bool)
+    else:
+        raise ValueError(f"max_lags value of {max_lags} not supported for n_forecasts {n_forecasts}.")
+    return target_start_end_mask
+
+
+def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequency=None):
+    """Filters prediction origin index from df based on the forecast frequency setting.
+
+    Filter based on timestamp last lag before targets start
+
+    Parameters
+    ----------
+        prediction_frequency : int
+            periodic interval in which forecasts should be made.
+        Note
+        ----
+        E.g. if prediction_frequency=7, forecasts are only made on every 7th step (once in a week in case of daily
+        resolution).
+
+    Returns boolean mask where prediction origin indexes to be included are True, and the rest False.
+    """
+    # !! IMPORTANT
+    # TODO: Adjust top level documentation to specify that the filter is applied to prediction ORIGIN, not targets start.
+    # !! IMPORTANT
+
+    mask = np.ones((len(df),), dtype=bool)
+
+    # Basic case: no filter
+    if prediction_frequency is None or prediction_frequency == 1:
+        return mask
+
+    # OLD: timestamps were created from "ds" column in tabularization and then re-converted here
+    # timestamps = pd.to_datetime([x["timestamps"][0] for x in df])
+    # OR
+    # timestamps = df["timestamps"].apply(lambda x: pd.to_datetime(x[0]))
+
+    timestamps = pd.to_datetime(df.loc[:, "ds"].values)
+    filter_masks = []
+    for key, value in prediction_frequency.items():
+        if key == "daily-hour":
+            mask = timestamps.hour == value
+        elif key == "weekly-day":
+            mask = timestamps.dayofweek == value
+        elif key == "monthly-day":
+            mask = timestamps.day == value
+        elif key == "yearly-month":
+            mask = timestamps.month == value
+        elif key == "hourly-minute":
+            mask = timestamps.minute == value
+        else:
+            raise ValueError(f"Invalid prediction frequency: {key}")
+        filter_masks.append(mask)
+    for m in filter_masks:
+        mask = np.logical_and(mask, m)
+    return mask
+
+
+def create_nan_mask(df, predict_steps, drop_missing):
+    """Creates mask for each prediction origin,
+    accounting for corresponding input lags / forecast targets containing any NaN values.
+
+    Parameters
+    ----------
+        drop_missing : bool
+            whether to automatically drop missing samples from the data
+        predict_steps : int
+            number of steps to predict
+    """
+    # IMPORTANT !!
+    # TODO implement actual filtering
+    return np.ones(len(df), dtype=bool)
+
+    # Create index mapping of sample index to df index
+    # - Filter missing samples (does not actually drop, but creates indexmapping)
+    # -- drop nan analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
+    # Note: needs to also account for NANs in lagged inputs or in n_forecasts, not just first target.
+    # Implement a convolutional filter for targets and each lagged regressor.
+    # Also account for future regressors and events.
+
+    # Rewrite to return mask instead of filtering df:
+    nan_idx = []
+    # NaNs in inputs
+    for key, data in self.inputs.items():
+        if isinstance(data, torch.Tensor):
+            nans = torch.where(torch.isnan(data))[0].tolist()
+            if len(nans) > 0:
+                nan_idx += nans
+        elif isinstance(data, dict):
+            for subkey, subdata in data.items():
+                nans = torch.where(torch.isnan(subdata))[0].tolist()
+                if len(nans) > 0:
+                    nan_idx += nans
+
+    # NaNs in targets that are not inserted for prediction at the end
+    nans = torch.where(torch.isnan(self.targets))[0].tolist()
+    if len(nans) > 0:
+        for idx in nans:
+            if idx not in nan_idx and idx < len(self) - predict_steps:
+                nan_idx.append(idx)
+
+    nan_idx = list(set(nan_idx))
+    nan_idx.sort()
+    if drop_missing and len(nan_idx) > 0:
+        log.warning(f"{len(nan_idx)} samples with missing values were dropped from the data. ")
+        for key, data in self.inputs.items():
+            if key not in ["time", "lags"]:  # "time_lagged"
+                for name, features in data.items():
+                    self.inputs[key][name] = np.delete(self.inputs[key][name], nan_idx, 0)
+            else:
+                self.inputs[key] = np.delete(self.inputs[key], nan_idx, 0)
+        self.targets = np.delete(self.targets, nan_idx, 0)
+        self.length = self.inputs["time"].shape[0]
+    if not drop_missing and len(nan_idx) > 0:
+        raise ValueError(
+            "Inputs/targets with missing values detected. "
+            "Please either adjust imputation parameters, or set 'drop_missing' to True to drop those samples."
+        )

From dfc60063ada5d5d01d03baa90816ce3904f6ffcb Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 24 Jan 2024 15:24:35 -0800
Subject: [PATCH 034/128] finish events and holidays conversion

---
 neuralprophet/time_dataset.py | 83 ++++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 35 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 5a82a56f6..5b3451e30 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -66,21 +66,24 @@ def __init__(self, df, name, **kwargs):
         # Future TODO: integrate some of these preprocessing steps happening outside?
 
         self.df = df
+        self.df = self.df.reset_index(drop=True)  # Future TODO: Is this still necessary post restructuring?
         self.name = name
         self.meta = OrderedDict({})
         self.meta["df_name"] = self.name
         self.config_args = kwargs
 
-        # TODO: Preprocessing of features (added to self.df)
-        # - events and holidays: convert date-time occurence dictionary to a column of values in the self.df
-        # - These will then be later tabularized in __get_item___
-        # add events based on configuration to df
-        self.df = self.df.reset_index(drop=True)
+        # Preprocessing of events and holidays features (added to self.df)
         (
             self.df,
             self.additive_event_and_holiday_names,
             self.multiplicative_event_and_holiday_names,
         ) = add_event_features_to_df(self.df, self.config_args.config_events, self.config_args.config_country_holidays)
+        # pre-sort additive/multiplicative regressors
+        self.additive_regressors_names, self.multiplicative_regressors_names = sort_regressor_names(
+            self.config_args.config_regressors
+        )
+
+        # Construct index map
         self.sample2index_map, self.length = self.create_sample2index_map(df)
 
     def __getitem__(self, index):
@@ -118,8 +121,13 @@ def __getitem__(self, index):
         df_index = self.sample_index_to_df_index(index)
 
         # Tabularize - extract features from dataframe at given target index position
-        inputs, target = tabularize_univariate_datetime_single_index(self.df, origin_index=df_index, **self.config_args)
+        inputs, target = self.tabularize_univariate_datetime_single_index(
+            self, self.df, origin_index=df_index, **self.config_args
+        )
+        # ------------------
+        # Important! TODO: integrate format_sample into tabularize_univariate_datetime_single_index
         sample, target = self.format_sample(inputs, target)
+        # --------------------------
         return sample, target, self.meta
 
     def __len__(self):
@@ -557,23 +565,14 @@ def tabularize_univariate_datetime_single_index(
             # inputs["seasonalities"] = seasonalities
 
         # FUTURE REGRESSORS: get the future regressors features
+        # create numpy array of values of additive and multiplicative regressors, at correct indexes
+        # features dims: (n_samples/batch, n_forecasts, n_features/n_regressors)
         if config_regressors is not None:
-            # sort and divide regressors into multiplicative and additive
-            additive_regressors_names = []
-            multiplicative_regressors_names = []
-            for reg in sorted(df.columns.tolist()):
-                if reg in config_regressors:
-                    mode = config_regressors[reg].mode
-                    if mode == "additive":
-                        additive_regressors_names.append(reg)
-                    else:
-                        multiplicative_regressors_names.append(reg)
-
-            # create numpy array of values of additive and multiplicative regressors, at correct indexes
-            # features dims: (n_samples/batch, n_forecasts, n_features/n_regressors)
             regressors = OrderedDict({})
             regressors["additive"] = None
             regressors["multiplicative"] = None
+            additive_regressors_names = self.additive_regressors_names
+            multiplicative_regressors_names = self.multiplicative_regressors_names
             if max_lags == 0:
                 if len(additive_regressors_names) > 0:
                     regressors["additive"] = np.expand_dims(
@@ -711,22 +710,20 @@ def tabularize_univariate_datetime_single_index(
         #             events["multiplicative"] = multiplicative_events
         #     inputs["events"] = events
 
-        # ----------- TODO convert to single sample version ----------------------
-        # TODO: Postprocessing & Formatting
-
-        tabularized_input_shapes_str = ""
-        for key, value in inputs.items():
-            if key in [
-                "seasonalities",
-                "covariates",
-                "events",
-                "regressors",
-            ]:
-                for name, period_features in value.items():
-                    tabularized_input_shapes_str += f"    {name} {key} {period_features}\n"
-            else:
-                tabularized_input_shapes_str += f"    {key} {value.shape} \n"
-        log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
+        # ONLY FOR DEBUGGING
+        # tabularized_input_shapes_str = ""
+        # for key, value in inputs.items():
+        #     if key in [
+        #         "seasonalities",
+        #         "covariates",
+        #         "events",
+        #         "regressors",
+        #     ]:
+        #         for name, period_features in value.items():
+        #             tabularized_input_shapes_str += f"    {name} {key} {period_features}\n"
+        #     else:
+        #         tabularized_input_shapes_str += f"    {key} {value.shape} \n"
+        # log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
 
         return inputs, targets
 
@@ -1191,3 +1188,19 @@ def create_nan_mask(df, predict_steps, drop_missing):
             "Inputs/targets with missing values detected. "
             "Please either adjust imputation parameters, or set 'drop_missing' to True to drop those samples."
         )
+
+
+def sort_regressor_names(config):
+    additive_regressors_names = []
+    multiplicative_regressors_names = []
+    if config is not None:
+        # sort and divide regressors into multiplicative and additive
+        additive_regressors_names = []
+        multiplicative_regressors_names = []
+        for reg in sorted(list(config.keys())):
+            mode = config[reg].mode
+            if mode == "additive":
+                additive_regressors_names.append(reg)
+            else:
+                multiplicative_regressors_names.append(reg)
+    return additive_regressors_names, multiplicative_regressors_names

From 62c4818497b5941882b83ff78c3bf925c7c1f7ce Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 25 Jan 2024 15:52:58 -0800
Subject: [PATCH 035/128] debug timedataset

---
 neuralprophet/time_dataset.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 5b3451e30..8927abc45 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -77,10 +77,12 @@ def __init__(self, df, name, **kwargs):
             self.df,
             self.additive_event_and_holiday_names,
             self.multiplicative_event_and_holiday_names,
-        ) = add_event_features_to_df(self.df, self.config_args.config_events, self.config_args.config_country_holidays)
+        ) = add_event_features_to_df(
+            self.df, self.config_args["config_events"], self.config_args["config_country_holidays"]
+        )
         # pre-sort additive/multiplicative regressors
         self.additive_regressors_names, self.multiplicative_regressors_names = sort_regressor_names(
-            self.config_args.config_regressors
+            self.config_args["config_regressors"]
         )
 
         # Construct index map
@@ -122,7 +124,7 @@ def __getitem__(self, index):
 
         # Tabularize - extract features from dataframe at given target index position
         inputs, target = self.tabularize_univariate_datetime_single_index(
-            self, self.df, origin_index=df_index, **self.config_args
+            df=self.df, origin_index=df_index, **self.config_args
         )
         # ------------------
         # Important! TODO: integrate format_sample into tabularize_univariate_datetime_single_index
@@ -164,7 +166,9 @@ def create_sample2index_map(self, df):
         # TODO Create NAN-free index mapping of sample index to df index
         # analogous to `self.drop_nan_after_init(
         # self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
-        nan_mask = create_nan_mask(df)  # boolean array where NAN are False
+        nan_mask = create_nan_mask(
+            df, self.config_args["predict_steps"], self.config_args["config_missing"].drop_missing
+        )  # boolean array where NAN are False
 
         # Combine masks
         mask = np.logical_and(prediction_frequency_mask, origin_start_end_mask)

From e7b8f0c076e48d54517b361adb38a044d281198d Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 25 Jan 2024 16:22:39 -0800
Subject: [PATCH 036/128] debugging

---
 neuralprophet/time_dataset.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 8927abc45..72b49d12c 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -72,6 +72,13 @@ def __init__(self, df, name, **kwargs):
         self.meta["df_name"] = self.name
         self.config_args = kwargs
 
+        self.two_level_inputs = [
+            "seasonalities",
+            "covariates",
+            "events",
+            "regressors",
+        ]
+
         # Preprocessing of events and holidays features (added to self.df)
         (
             self.df,

From 235eea8043da2e46faad188bea4043f6c834abfd Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 25 Jan 2024 16:26:30 -0800
Subject: [PATCH 037/128] make_country_specific_holidays_df

---
 neuralprophet/time_dataset.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 72b49d12c..a1eaf1ce1 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -1215,3 +1215,33 @@ def sort_regressor_names(config):
             else:
                 multiplicative_regressors_names.append(reg)
     return additive_regressors_names, multiplicative_regressors_names
+
+
+## TODO: move - used elsewhere, not in this file.
+def make_country_specific_holidays_df(year_list, country):
+    """
+    Make dataframe of country specific holidays for given years and countries
+    Parameters
+    ----------
+        year_list : list
+            List of years
+        country : str, list
+            List of country names
+    Returns
+    -------
+        pd.DataFrame
+            Containing country specific holidays df with columns 'ds' and 'holiday'
+    """
+    # iterate over countries and get holidays for each country
+    # convert to list if not already
+    if isinstance(country, str):
+        country = [country]
+    country_specific_holidays = {}
+    for single_country in country:
+        single_country_specific_holidays = get_country_holidays(single_country, year_list)
+        # only add holiday if it is not already in the dict
+        country_specific_holidays.update(single_country_specific_holidays)
+    country_specific_holidays_dict = defaultdict(list)
+    for date, holiday in country_specific_holidays.items():
+        country_specific_holidays_dict[holiday].append(pd.to_datetime(date))
+    return country_specific_holidays_dict

From 02ff9bb33bf7c49d1f02d7ed9b2bfb46ba4c88c4 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 25 Jan 2024 16:35:48 -0800
Subject: [PATCH 038/128] remove uses of df.loc[...].values

---
 neuralprophet/time_dataset.py | 63 +++++++++--------------------------
 1 file changed, 16 insertions(+), 47 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index a1eaf1ce1..f8902e331 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -344,7 +344,7 @@ def tabularize_univariate_datetime_single_index(
             # targets = np.empty_like(time[:, n_lags:])
             # targets = np.nan_to_num(targets)
         else:
-            targets = df.loc[origin_index + 1 : origin_index + 1 + n_forecasts, "y_scaled"].values
+            targets = df.loc[origin_index + 1 : origin_index + 1 + n_forecasts, "y_scaled"]
             targets = np.expand_dims(targets, axis=1)
             ## Alternative
             # x = df["y_scaled"].values
@@ -366,12 +366,12 @@ def tabularize_univariate_datetime_single_index(
 
         # TIME: the time at each sample's lags and forecasts
         if max_lags == 0:
-            inputs["time"] = df.loc[origin_index, "t"].values
+            inputs["time"] = df.loc[origin_index, "t"]
             # TODO: Possibly need extra dim?
             # inputs["time"] = np.expand_dims(inputs["time"], 1)
         else:
             # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
-            inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "t"].values
+            inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "t"]
             ## OLD: Time
             # def _stride_time_features_for_forecasts(x):
             #     window_size = n_lags + n_forecasts
@@ -390,7 +390,7 @@ def tabularize_univariate_datetime_single_index(
         # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
         if n_lags >= 1 and "y" in df.columns:
             # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32)
-            inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values
+            inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"]
             # OLD Lags
             # def _stride_lagged_features(df_col_name, feature_dims):
             #     # only for case where max_lags > 0
@@ -410,9 +410,7 @@ def tabularize_univariate_datetime_single_index(
                 if lagged_reg in config_lagged_regressors:
                     assert config_lagged_regressors[lagged_reg].n_lags > 0
                     covar_lags = config_lagged_regressors[lagged_reg].n_lags
-                    lagged_regressors[lagged_reg] = df.loc[
-                        origin_index - covar_lags + 1 : origin_index + 1, lagged_reg
-                    ].values
+                    lagged_regressors[lagged_reg] = df.loc[origin_index - covar_lags + 1 : origin_index + 1, lagged_reg]
             inputs["covariates"] = lagged_regressors
             # OLD Covariates
             # def _stride_lagged_features(df_col_name, feature_dims):
@@ -586,18 +584,16 @@ def tabularize_univariate_datetime_single_index(
             multiplicative_regressors_names = self.multiplicative_regressors_names
             if max_lags == 0:
                 if len(additive_regressors_names) > 0:
-                    regressors["additive"] = np.expand_dims(
-                        df.loc[origin_index, additive_regressors_names].values, axis=0
-                    )
+                    regressors["additive"] = np.expand_dims(df.loc[origin_index, additive_regressors_names], axis=0)
                 if len(multiplicative_regressors_names) > 0:
                     regressors["multiplicative"] = np.expand_dims(
-                        df.loc[origin_index, multiplicative_regressors_names].values, axis=0
+                        df.loc[origin_index, multiplicative_regressors_names], axis=0
                     )
             else:
                 if len(additive_regressors_names) > 0:
                     regressors_add_future_window = df.loc[
                         origin_index + 1 : origin_index + 1 + n_forecasts, additive_regressors_names
-                    ].values
+                    ]
                     regressors["additive"] = np.expand_dims(regressors_add_future_window, axis=0)
                     ## OLD
                     # additive_regressor_feature_windows = []
@@ -622,7 +618,7 @@ def tabularize_univariate_datetime_single_index(
                 if len(multiplicative_regressors_names) > 0:
                     regressors_mul_future_window = df.loc[
                         origin_index + 1 : origin_index + 1 + n_forecasts, multiplicative_regressors_names
-                    ].values
+                    ]
                     regressors["multiplicative"] = np.expand_dims(regressors_mul_future_window, axis=0)
             inputs["regressors"] = regressors
 
@@ -669,23 +665,21 @@ def tabularize_univariate_datetime_single_index(
         events["multiplicative"] = None
         if max_lags == 0:
             if len(self.additive_event_and_holiday_names) > 0:
-                events["additive"] = np.expand_dims(
-                    df.loc[origin_index, self.additive_event_and_holiday_names].values, axis=0
-                )
+                events["additive"] = np.expand_dims(df.loc[origin_index, self.additive_event_and_holiday_names], axis=0)
             if len(self.multiplicative_event_and_holiday_names) > 0:
                 events["multiplicative"] = np.expand_dims(
-                    df.loc[origin_index, self.multiplicative_event_and_holiday_names].values, axis=0
+                    df.loc[origin_index, self.multiplicative_event_and_holiday_names], axis=0
                 )
         else:
             if len(self.additive_event_and_holiday_names) > 0:
                 events_add_future_window = df.loc[
                     origin_index + 1 : origin_index + 1 + n_forecasts, self.additive_event_and_holiday_names
-                ].values
+                ]
                 events["additive"] = np.expand_dims(events_add_future_window, axis=0)
             if len(self.multiplicative_event_and_holiday_names) > 0:
                 events_mul_future_window = df.loc[
                     origin_index + 1 : origin_index + 1 + n_forecasts, self.multiplicative_event_and_holiday_names
-                ].values
+                ]
                 events["multiplicative"] = np.expand_dims(events_mul_future_window, axis=0)
         inputs["events"] = events
 
@@ -1118,7 +1112,7 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
     # OR
     # timestamps = df["timestamps"].apply(lambda x: pd.to_datetime(x[0]))
 
-    timestamps = pd.to_datetime(df.loc[:, "ds"].values)
+    timestamps = pd.to_datetime(df.loc[:, "ds"])
     filter_masks = []
     for key, value in prediction_frequency.items():
         if key == "daily-hour":
@@ -1217,31 +1211,6 @@ def sort_regressor_names(config):
     return additive_regressors_names, multiplicative_regressors_names
 
 
-## TODO: move - used elsewhere, not in this file.
+## TODO: rename - used elsewhere, not in this file.
 def make_country_specific_holidays_df(year_list, country):
-    """
-    Make dataframe of country specific holidays for given years and countries
-    Parameters
-    ----------
-        year_list : list
-            List of years
-        country : str, list
-            List of country names
-    Returns
-    -------
-        pd.DataFrame
-            Containing country specific holidays df with columns 'ds' and 'holiday'
-    """
-    # iterate over countries and get holidays for each country
-    # convert to list if not already
-    if isinstance(country, str):
-        country = [country]
-    country_specific_holidays = {}
-    for single_country in country:
-        single_country_specific_holidays = get_country_holidays(single_country, year_list)
-        # only add holiday if it is not already in the dict
-        country_specific_holidays.update(single_country_specific_holidays)
-    country_specific_holidays_dict = defaultdict(list)
-    for date, holiday in country_specific_holidays.items():
-        country_specific_holidays_dict[holiday].append(pd.to_datetime(date))
-    return country_specific_holidays_dict
+    return make_country_specific_holidays_dict(year_list, country)

From 7fda18d55a42d795a67840b8b2b5027ea1d6c93a Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 25 Jan 2024 16:41:40 -0800
Subject: [PATCH 039/128] debug time

---
 neuralprophet/time_dataset.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index f8902e331..723426405 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -66,7 +66,7 @@ def __init__(self, df, name, **kwargs):
         # Future TODO: integrate some of these preprocessing steps happening outside?
 
         self.df = df
-        self.df = self.df.reset_index(drop=True)  # Future TODO: Is this still necessary post restructuring?
+        self.df = self.df.reset_index(drop=True)  # Needed for index based operations in __get_item__
         self.name = name
         self.meta = OrderedDict({})
         self.meta["df_name"] = self.name
@@ -435,7 +435,6 @@ def tabularize_univariate_datetime_single_index(
                 dates = df.loc[origin_index, "ds"]
             else:
                 dates = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "ds"]
-            assert len(dates.shape) == 1
             # Seasonality features
             for name, period in config_seasonality.periods.items():
                 if period.resolution > 0:

From 621e701ce4a27c655cd16904864cabbf95c72ef3 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 25 Jan 2024 16:46:40 -0800
Subject: [PATCH 040/128] debugging types

---
 neuralprophet/time_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 723426405..3c9b99556 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -344,7 +344,7 @@ def tabularize_univariate_datetime_single_index(
             # targets = np.empty_like(time[:, n_lags:])
             # targets = np.nan_to_num(targets)
         else:
-            targets = df.loc[origin_index + 1 : origin_index + 1 + n_forecasts, "y_scaled"]
+            targets = df.loc[origin_index + 1 : origin_index + 1 + n_forecasts, "y_scaled"].values
             targets = np.expand_dims(targets, axis=1)
             ## Alternative
             # x = df["y_scaled"].values

From c62f3320e8e5a572aa2a0c6f902baafca61f1df3 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 25 Jan 2024 21:16:47 -0800
Subject: [PATCH 041/128] debug timedata

---
 .gitignore                    |  1 +
 neuralprophet/time_dataset.py | 57 ++++++++++++++++++-----------------
 2 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0cb6e7b98..bab9645aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@ tests/metrics/*.svg
 .vscode/launch.json
 .vscode/settings.json
 source/
+debug*
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 3c9b99556..455aa9dde 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -366,9 +366,8 @@ def tabularize_univariate_datetime_single_index(
 
         # TIME: the time at each sample's lags and forecasts
         if max_lags == 0:
-            inputs["time"] = df.loc[origin_index, "t"]
-            # TODO: Possibly need extra dim?
-            # inputs["time"] = np.expand_dims(inputs["time"], 1)
+            # inputs["time"] = df.loc[origin_index, "t"]
+            inputs["time"] = np.expand_dims(df.loc[origin_index, "t"], 0)
         else:
             # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
             inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "t"]
@@ -441,9 +440,7 @@ def tabularize_univariate_datetime_single_index(
                     if config_seasonality.computation == "fourier":
                         # Compute Fourier series components with the specified frequency and order.
                         # convert to days since epoch
-                        t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (
-                            3600 * 24.0
-                        )
+                        t = np.array((dates - datetime(1900, 1, 1)).total_seconds()) / (3600 * 24.0)
                         # features: Matrix with dims (length len(dates), 2*resolution)
                         features = np.column_stack(
                             [np.sin((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
@@ -659,28 +656,32 @@ def tabularize_univariate_datetime_single_index(
         # FUTURE EVENTS: get the events features
         # create numpy array of values of additive and multiplicative events, at correct indexes
         # features dims: (n_samples/batch, n_forecasts, n_features/n_events)
-        events = OrderedDict({})
-        events["additive"] = None
-        events["multiplicative"] = None
-        if max_lags == 0:
-            if len(self.additive_event_and_holiday_names) > 0:
-                events["additive"] = np.expand_dims(df.loc[origin_index, self.additive_event_and_holiday_names], axis=0)
-            if len(self.multiplicative_event_and_holiday_names) > 0:
-                events["multiplicative"] = np.expand_dims(
-                    df.loc[origin_index, self.multiplicative_event_and_holiday_names], axis=0
-                )
-        else:
-            if len(self.additive_event_and_holiday_names) > 0:
-                events_add_future_window = df.loc[
-                    origin_index + 1 : origin_index + 1 + n_forecasts, self.additive_event_and_holiday_names
-                ]
-                events["additive"] = np.expand_dims(events_add_future_window, axis=0)
-            if len(self.multiplicative_event_and_holiday_names) > 0:
-                events_mul_future_window = df.loc[
-                    origin_index + 1 : origin_index + 1 + n_forecasts, self.multiplicative_event_and_holiday_names
-                ]
-                events["multiplicative"] = np.expand_dims(events_mul_future_window, axis=0)
-        inputs["events"] = events
+        any_events = 0 < len(self.additive_event_and_holiday_names + self.multiplicative_event_and_holiday_names)
+        if any_events:
+            events = OrderedDict({})
+            events["additive"] = None
+            events["multiplicative"] = None
+            if max_lags == 0:
+                if len(self.additive_event_and_holiday_names) > 0:
+                    events["additive"] = np.expand_dims(
+                        df.loc[origin_index, self.additive_event_and_holiday_names], axis=0
+                    )
+                if len(self.multiplicative_event_and_holiday_names) > 0:
+                    events["multiplicative"] = np.expand_dims(
+                        df.loc[origin_index, self.multiplicative_event_and_holiday_names], axis=0
+                    )
+            else:
+                if len(self.additive_event_and_holiday_names) > 0:
+                    events_add_future_window = df.loc[
+                        origin_index + 1 : origin_index + 1 + n_forecasts, self.additive_event_and_holiday_names
+                    ]
+                    events["additive"] = np.expand_dims(events_add_future_window, axis=0)
+                if len(self.multiplicative_event_and_holiday_names) > 0:
+                    events_mul_future_window = df.loc[
+                        origin_index + 1 : origin_index + 1 + n_forecasts, self.multiplicative_event_and_holiday_names
+                    ]
+                    events["multiplicative"] = np.expand_dims(events_mul_future_window, axis=0)
+            inputs["events"] = events
 
         ## OLD
         # # get the events features

From 54edbf491ff8a4ab600101c3e6aab431eb6d74cd Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 25 Jan 2024 21:50:42 -0800
Subject: [PATCH 042/128] debugging time_dataset variable shapes

---
 neuralprophet/time_dataset.py   | 90 +++++++++++++++++++++------------
 tests/test_model_performance.py |  7 ++-
 2 files changed, 63 insertions(+), 34 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 455aa9dde..1c54ab606 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -232,16 +232,14 @@ def format_sample(self, inputs, targets=None):
                 # if key == "timestamps": sample_input[key] = data
                 # else: sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
                 sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
-        sample_input = self._split_nested_dict(sample_input)
 
         # TODO Can this be skipped for a single sample?
-        # TODO Can this be optimized?
+        # Alternatively, Can this be optimized?
         # Split nested dict into list of dicts with same keys as sample_input.
-        def split_dict(sample_input, index):
-            return {k: v[index] if not isinstance(v, dict) else split_dict(v, index) for k, v in sample_input.items()}
-
-        length = next(iter(sample_input.values())).shape[0]
-        sample_input = [split_dict(sample_input, i) for i in range(length)]
+        # def split_dict(sample_input, index):
+        # return {k: v[index] if not isinstance(v, dict) else split_dict(v, index) for k, v in sample_input.items()}
+        # length = next(iter(sample_input.values())).shape[0]
+        # sample_input = [split_dict(sample_input, i) for i in range(length)]
 
         ## timestamps should no longer be present here?
         # sample_input.pop("timestamps") # Exact timestamps are not needed anymore
@@ -332,7 +330,7 @@ def tabularize_univariate_datetime_single_index(
         #     n_samples = len(df) - max_lags + 1 - n_forecasts
 
         if predict_mode:
-            targets = np.zeros((1, n_forecasts))
+            targets = np.zeros((1, n_forecasts), dtype=np.float32)
             ## OLD
             # # time is the time at each forecast step
             # t = df.loc[:, "t"].values
@@ -345,7 +343,7 @@ def tabularize_univariate_datetime_single_index(
             # targets = np.nan_to_num(targets)
         else:
             targets = df.loc[origin_index + 1 : origin_index + 1 + n_forecasts, "y_scaled"].values
-            targets = np.expand_dims(targets, axis=1)
+            targets = np.expand_dims(np.array(targets, dtype=np.float32), axis=0)
             ## Alternative
             # x = df["y_scaled"].values
             # targets = np.array([x[origin_index + 1 : origin_index + 1 + n_forecasts]], dtype=x.dtype)
@@ -370,7 +368,7 @@ def tabularize_univariate_datetime_single_index(
             inputs["time"] = np.expand_dims(df.loc[origin_index, "t"], 0)
         else:
             # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
-            inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "t"]
+            inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "t"].values
             ## OLD: Time
             # def _stride_time_features_for_forecasts(x):
             #     window_size = n_lags + n_forecasts
@@ -389,7 +387,9 @@ def tabularize_univariate_datetime_single_index(
         # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
         if n_lags >= 1 and "y" in df.columns:
             # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32)
-            inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"]
+            inputs["lags"] = np.array(
+                df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32
+            )
             # OLD Lags
             # def _stride_lagged_features(df_col_name, feature_dims):
             #     # only for case where max_lags > 0
@@ -409,7 +409,9 @@ def tabularize_univariate_datetime_single_index(
                 if lagged_reg in config_lagged_regressors:
                     assert config_lagged_regressors[lagged_reg].n_lags > 0
                     covar_lags = config_lagged_regressors[lagged_reg].n_lags
-                    lagged_regressors[lagged_reg] = df.loc[origin_index - covar_lags + 1 : origin_index + 1, lagged_reg]
+                    lagged_regressors[lagged_reg] = df.loc[
+                        origin_index - covar_lags + 1 : origin_index + 1, lagged_reg
+                    ].values
             inputs["covariates"] = lagged_regressors
             # OLD Covariates
             # def _stride_lagged_features(df_col_name, feature_dims):
@@ -431,16 +433,18 @@ def tabularize_univariate_datetime_single_index(
         if config_seasonality is not None:
             seasonalities = OrderedDict({})
             if max_lags == 0:
-                dates = df.loc[origin_index, "ds"]
+                dates = pd.Series(df.loc[origin_index, "ds"])
             else:
-                dates = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "ds"]
+                dates = pd.Series(df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "ds"])
             # Seasonality features
             for name, period in config_seasonality.periods.items():
                 if period.resolution > 0:
                     if config_seasonality.computation == "fourier":
                         # Compute Fourier series components with the specified frequency and order.
                         # convert to days since epoch
-                        t = np.array((dates - datetime(1900, 1, 1)).total_seconds()) / (3600 * 24.0)
+                        t = np.array((dates - datetime(1900, 1, 1)).dt.total_seconds().astype(np.float32)) / (
+                            3600 * 24.0
+                        )
                         # features: Matrix with dims (length len(dates), 2*resolution)
                         features = np.column_stack(
                             [np.sin((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
@@ -572,23 +576,24 @@ def tabularize_univariate_datetime_single_index(
         # FUTURE REGRESSORS: get the future regressors features
         # create numpy array of values of additive and multiplicative regressors, at correct indexes
         # features dims: (n_samples/batch, n_forecasts, n_features/n_regressors)
-        if config_regressors is not None:
+        any_future_regressors = 0 < len(self.additive_regressors_names + self.multiplicative_regressors_names)
+        if any_future_regressors:  # if config_regressors is not None:
             regressors = OrderedDict({})
-            regressors["additive"] = None
-            regressors["multiplicative"] = None
-            additive_regressors_names = self.additive_regressors_names
-            multiplicative_regressors_names = self.multiplicative_regressors_names
+            # regressors["additive"] = None
+            # regressors["multiplicative"] = None
             if max_lags == 0:
-                if len(additive_regressors_names) > 0:
-                    regressors["additive"] = np.expand_dims(df.loc[origin_index, additive_regressors_names], axis=0)
-                if len(multiplicative_regressors_names) > 0:
+                if len(self.additive_regressors_names) > 0:
+                    regressors["additive"] = np.expand_dims(
+                        df.loc[origin_index, self.additive_regressors_names], axis=0
+                    )
+                if len(self.multiplicative_regressors_names) > 0:
                     regressors["multiplicative"] = np.expand_dims(
-                        df.loc[origin_index, multiplicative_regressors_names], axis=0
+                        df.loc[origin_index, self.multiplicative_regressors_names], axis=0
                     )
             else:
-                if len(additive_regressors_names) > 0:
+                if len(self.additive_regressors_names) > 0:
                     regressors_add_future_window = df.loc[
-                        origin_index + 1 : origin_index + 1 + n_forecasts, additive_regressors_names
+                        origin_index + 1 : origin_index + 1 + n_forecasts, self.additive_regressors_names
                     ]
                     regressors["additive"] = np.expand_dims(regressors_add_future_window, axis=0)
                     ## OLD
@@ -611,9 +616,9 @@ def tabularize_univariate_datetime_single_index(
                     #     additive_regressor_feature_windows.append(stride)
                     # additive_regressors = np.dstack(additive_regressor_feature_windows)
                     # regressors["additive"] = additive_regressors
-                if len(multiplicative_regressors_names) > 0:
+                if len(self.multiplicative_regressors_names) > 0:
                     regressors_mul_future_window = df.loc[
-                        origin_index + 1 : origin_index + 1 + n_forecasts, multiplicative_regressors_names
+                        origin_index + 1 : origin_index + 1 + n_forecasts, self.multiplicative_regressors_names
                     ]
                     regressors["multiplicative"] = np.expand_dims(regressors_mul_future_window, axis=0)
             inputs["regressors"] = regressors
@@ -659,8 +664,8 @@ def tabularize_univariate_datetime_single_index(
         any_events = 0 < len(self.additive_event_and_holiday_names + self.multiplicative_event_and_holiday_names)
         if any_events:
             events = OrderedDict({})
-            events["additive"] = None
-            events["multiplicative"] = None
+            # events["additive"] = None
+            # events["multiplicative"] = None
             if max_lags == 0:
                 if len(self.additive_event_and_holiday_names) > 0:
                     events["additive"] = np.expand_dims(
@@ -1211,6 +1216,25 @@ def sort_regressor_names(config):
     return additive_regressors_names, multiplicative_regressors_names
 
 
-## TODO: rename - used elsewhere, not in this file.
-def make_country_specific_holidays_df(year_list, country):
-    return make_country_specific_holidays_dict(year_list, country)
+# ## TODO: rename - used elsewhere, not in this file.
+# def make_country_specific_holidays_df(year_list, country):
+#     return make_country_specific_holidays_dict(year_list, country)
+
+
+# def split_nested_dict(inputs):
+#     """Split nested dict into list of dicts.
+#     Parameters
+#     ----------
+#         inputs : ordered dict
+#             Nested dict to be split.
+#     Returns
+#     -------
+#         list of dicts
+#             List of dicts with same keys as inputs.
+#     """
+
+#     def split_dict(inputs, index):
+#         return {k: v[index] if not isinstance(v, dict) else split_dict(v, index) for k, v in inputs.items()}
+
+#     length = next(iter(inputs.values())).shape[0]
+#     return [split_dict(inputs, i) for i in range(length)]
diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index 6a519fe03..37d623c23 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -146,6 +146,7 @@ def test_PeytonManning():
 def test_YosemiteTemps():
     df = pd.read_csv(YOS_FILE)
     m = NeuralProphet(
+        learning_rate=0.01,
         n_lags=36,
         n_forecasts=12,
         changepoints_range=0.9,
@@ -171,7 +172,10 @@ def test_YosemiteTemps():
 
 def test_AirPassengers():
     df = pd.read_csv(AIR_FILE)
-    m = NeuralProphet(seasonality_mode="multiplicative")
+    m = NeuralProphet(
+        learning_rate=0.01,
+        seasonality_mode="multiplicative",
+    )
     df_train, df_test = m.split_df(df=df, freq="MS", valid_p=0.1)
 
     system_speed, std = get_system_speed()
@@ -194,6 +198,7 @@ def test_EnergyPriceDaily():
     df["temp"] = df["temperature"]
 
     m = NeuralProphet(
+        learning_rate=0.01,
         n_forecasts=7,
         n_changepoints=0,
         yearly_seasonality=True,

From 4629bf41d72a47039bdc15bde6443e48b7cab4e2 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 26 Jan 2024 15:44:53 -0800
Subject: [PATCH 043/128] address indexing and slicing issues, .loc

---
 neuralprophet/configure.py    |   7 +-
 neuralprophet/data/process.py |   7 +-
 neuralprophet/df_utils.py     |   6 +-
 neuralprophet/time_dataset.py | 148 +++++++++++++++++++---------------
 neuralprophet/time_net.py     |   2 +-
 5 files changed, 96 insertions(+), 74 deletions(-)

diff --git a/neuralprophet/configure.py b/neuralprophet/configure.py
index 57ef0c301..0c9c6458e 100644
--- a/neuralprophet/configure.py
+++ b/neuralprophet/configure.py
@@ -41,10 +41,9 @@ def init_data_params(
         config_events: Optional[ConfigEvents] = None,
         config_seasonality: Optional[ConfigSeasonality] = None,
     ):
-        if len(df["ID"].unique()) == 1:
-            if not self.global_normalization:
-                log.info("Setting normalization to global as only one dataframe provided for training.")
-                self.global_normalization = True
+        if len(df["ID"].unique()) == 1 and not self.global_normalization:
+            log.info("Setting normalization to global as only one dataframe provided for training.")
+            self.global_normalization = True
         self.local_data_params, self.global_data_params = df_utils.init_data_params(
             df=df,
             normalize=self.normalize,
diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py
index f3e44f9bb..6899496fc 100644
--- a/neuralprophet/data/process.py
+++ b/neuralprophet/data/process.py
@@ -345,6 +345,11 @@ def _validate_column_name(
         "extra_regressors_multiplicative",
         "multiplicative_terms",
         "ID",
+        "y_scaled",
+        "ds",
+        "t",
+        "y",
+        "index",
     ]
     rn_l = [n + "_lower" for n in reserved_names]
     rn_u = [n + "_upper" for n in reserved_names]
@@ -495,7 +500,7 @@ def _handle_missing_data(
         df_grouped = df.groupby("ID").apply(lambda x: x.set_index("ds").resample(freq).asfreq()).drop(columns=["ID"])
         n_missing_dates = len(df_grouped) - len(df)
         if n_missing_dates > 0:
-            df = df_grouped.reset_index()
+            df = df_grouped.reset_index(drop=True)
             log.info(f"Added {n_missing_dates} missing dates.")
 
     if config_regressors is not None:
diff --git a/neuralprophet/df_utils.py b/neuralprophet/df_utils.py
index 8a4c4dcb5..fcd12d1f4 100644
--- a/neuralprophet/df_utils.py
+++ b/neuralprophet/df_utils.py
@@ -1053,7 +1053,7 @@ def add_missing_dates_nan(df, freq):
     df_resampled = df.resample(freq).asfreq()
     if "ID" in df.columns:
         df_resampled["ID"].fillna(df["ID"].iloc[0], inplace=True)
-    df_resampled.reset_index(inplace=True)
+    df_resampled.reset_index(drop=True, inplace=True)
 
     num_added = len(df_resampled) - len(df)
     return df_resampled, num_added
@@ -1534,10 +1534,10 @@ def drop_missing_from_df(df, drop_missing, predict_steps, n_lags):
                 if all_nan_idx[i + 1] - all_nan_idx[i] > 1:
                     break
             # drop NaN window
-            df = df.drop(df.index[window[0] : window[-1] + 1]).reset_index().drop("index", axis=1)
+            df = df.drop(df.index[window[0] : window[-1] + 1]).reset_index(drop=True)
             # drop lagged values if window does not occur at the beginning of df
             if window[0] - (n_lags - 1) >= 0:
-                df = df.drop(df.index[(window[0] - (n_lags - 1)) : window[0]]).reset_index().drop("index", axis=1)
+                df = df.drop(df.index[(window[0] - (n_lags - 1)) : window[0]]).reset_index(drop=True)
     return df
 
 
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 1c54ab606..46ce99b5f 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -15,29 +15,6 @@
 log = logging.getLogger("NP.time_dataset")
 
 
-class GlobalTimeDataset(Dataset):
-    def __init__(self, df, **kwargs):
-        """Initialize Timedataset from time-series df.
-        Parameters
-        ----------
-            df : pd.DataFrame
-                dataframe containing column ``ds``, ``y``, and optionally``ID`` and
-                normalized columns normalized columns ``ds``, ``y``, ``t``, ``y_scaled``
-            **kwargs : dict
-                Identical to :meth:`tabularize_univariate_datetime`
-        """
-        # # TODO (future): vectorize
-        timedatasets = [TimeDataset(df_i, df_name, **kwargs) for df_name, df_i in df.groupby("ID")]
-        self.combined_timedataset = [item for timedataset in timedatasets for item in timedataset]
-        self.length = sum(timedataset.length for timedataset in timedatasets)
-
-    def __len__(self):
-        return self.length
-
-    def __getitem__(self, idx):
-        return self.combined_timedataset[idx]
-
-
 class TimeDataset(Dataset):
     """Create a PyTorch dataset of a tabularized time-series"""
 
@@ -65,11 +42,11 @@ def __init__(self, df, name, **kwargs):
         # ->_create_dataset calls prep_or_copy_df, then returns GlobalTimeDataset
         # Future TODO: integrate some of these preprocessing steps happening outside?
 
-        self.df = df
-        self.df = self.df.reset_index(drop=True)  # Needed for index based operations in __get_item__
-        self.name = name
+        self.df = df.reset_index(drop=True)  # Needed for index based operations in __get_item__
+        if "index" in list(self.df.columns):  # should not be the case
+            self.df = self.df.drop("index", axis=1)
         self.meta = OrderedDict({})
-        self.meta["df_name"] = self.name
+        self.meta["df_name"] = name
         self.config_args = kwargs
 
         self.two_level_inputs = [
@@ -211,7 +188,7 @@ def format_sample(self, inputs, targets=None):
         }
         targets_dtype = torch.float
 
-        sample_target = torch.from_numpy(targets).type(targets_dtype).unsqueeze(dim=2)
+        sample_target = torch.from_numpy(targets).type(targets_dtype)
 
         for key, data in inputs.items():
             if key in self.two_level_inputs:
@@ -330,7 +307,9 @@ def tabularize_univariate_datetime_single_index(
         #     n_samples = len(df) - max_lags + 1 - n_forecasts
 
         if predict_mode:
-            targets = np.zeros((1, n_forecasts), dtype=np.float32)
+            # targets = np.zeros((1, n_forecasts), dtype=np.float32)
+            targets = np.zeros(n_forecasts, dtype=np.float32)
+
             ## OLD
             # # time is the time at each forecast step
             # t = df.loc[:, "t"].values
@@ -342,42 +321,52 @@ def tabularize_univariate_datetime_single_index(
             # targets = np.empty_like(time[:, n_lags:])
             # targets = np.nan_to_num(targets)
         else:
-            targets = df.loc[origin_index + 1 : origin_index + 1 + n_forecasts, "y_scaled"].values
-            targets = np.expand_dims(np.array(targets, dtype=np.float32), axis=0)
-            ## Alternative
-            # x = df["y_scaled"].values
-            # targets = np.array([x[origin_index + 1 : origin_index + 1 + n_forecasts]], dtype=x.dtype)
-            ## OLD
-            # # time is the time at each forecast step
-            # t = df.loc[:, "t"].values
-            # if max_lags == 0:
-            #     time = np.expand_dims(t, 1)
-            # else:
-            #     time = _stride_time_features_for_forecasts(t)
-            # inputs["time"] = time  # contains n_lags + n_forecasts
-            # def _stride_future_time_features_for_forecasts(x):
-            # return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
-            # targets = _stride_future_time_features_for_forecasts(df["y_scaled"].values)
+            if n_forecasts == 1:
+                if max_lags == 0:
+                    targets = df.at[origin_index, "y_scaled"]
+                if max_lags > 0:
+                    targets = df.at[origin_index + 1, "y_scaled"]
+            else:
+                # Note: df.loc is inclusive of slice end, while df.iloc is not.
+                targets = df.loc[origin_index + 1 : origin_index + n_forecasts, "y_scaled"].values
+                # targets = np.array(targets, dtype=np.float32) # optional
+
+                ## Alternative 1
+                # targets = df.loc[:, "y_scaled"].iloc[origin_index + 1 : origin_index + 1 + n_forecasts].values
+                # targets = np.expand_dims(np.array(targets, dtype=np.float32), axis=0)
+                ## Alternative 2
+                # x = df["y_scaled"].values
+                # targets = np.array([x[origin_index + 1 : origin_index + 1 + n_forecasts]], dtype=x.dtype)
+                ## OLD
+                # # time is the time at each forecast step
+                # t = df.loc[:, "t"].values
+                # if max_lags == 0:
+                #     time = np.expand_dims(t, 1)
+                # else:
+                #     time = _stride_time_features_for_forecasts(t)
+                # inputs["time"] = time  # contains n_lags + n_forecasts
+                # def _stride_future_time_features_for_forecasts(x):
+                # return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
+                # targets = _stride_future_time_features_for_forecasts(df["y_scaled"].values)
 
         # data is stored in OrderedDict
         inputs = OrderedDict({})
 
         # TIME: the time at each sample's lags and forecasts
         if max_lags == 0:
-            # inputs["time"] = df.loc[origin_index, "t"]
-            inputs["time"] = np.expand_dims(df.loc[origin_index, "t"], 0)
+            # inputs["time"] = np.expand_dims(df.at[origin_index, "t"], 0)
+            inputs["time"] = df.at[origin_index, "t"]
         else:
             # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
-            inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "t"].values
+            # Note: df.loc is inclusive of slice end, while df.iloc is not.
+            inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
             ## OLD: Time
             # def _stride_time_features_for_forecasts(x):
             #     window_size = n_lags + n_forecasts
-
             #     if x.ndim == 1:
             #         shape = (n_samples, window_size)
             #     else:
             #         shape = (n_samples, window_size) + x.shape[1:]
-
             #     stride = x.strides[0]
             #     strides = (stride, stride) + x.strides[1:]
             #     start_index = max_lags - n_lags
@@ -385,11 +374,10 @@ def tabularize_univariate_datetime_single_index(
             # inputs["time"] = _stride_time_features_for_forecasts(df.loc[:, "t"].values)
 
         # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
-        if n_lags >= 1 and "y" in df.columns:
-            # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32)
-            inputs["lags"] = np.array(
-                df.loc[origin_index - n_lags + 1 : origin_index + 1, "y_scaled"].values, dtype=np.float32
-            )
+        if n_lags >= 1 and "y_scaled" in df.columns:
+            # Note: df.loc is inclusive of slice end, while df.iloc is not.
+            # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values, dtype=np.float32)
+            inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values
             # OLD Lags
             # def _stride_lagged_features(df_col_name, feature_dims):
             #     # only for case where max_lags > 0
@@ -407,10 +395,11 @@ def tabularize_univariate_datetime_single_index(
             # Future TODO: optimize this computation for many lagged_regressors
             for lagged_reg in df.columns:
                 if lagged_reg in config_lagged_regressors:
-                    assert config_lagged_regressors[lagged_reg].n_lags > 0
                     covar_lags = config_lagged_regressors[lagged_reg].n_lags
+                    assert covar_lags > 0
+                    # Note: df.loc is inclusive of slice end, while df.iloc is not.
                     lagged_regressors[lagged_reg] = df.loc[
-                        origin_index - covar_lags + 1 : origin_index + 1, lagged_reg
+                        origin_index - covar_lags + 1 : origin_index, lagged_reg
                     ].values
             inputs["covariates"] = lagged_regressors
             # OLD Covariates
@@ -433,9 +422,10 @@ def tabularize_univariate_datetime_single_index(
         if config_seasonality is not None:
             seasonalities = OrderedDict({})
             if max_lags == 0:
-                dates = pd.Series(df.loc[origin_index, "ds"])
+                dates = pd.Series(df.at[origin_index, "ds"])
             else:
-                dates = pd.Series(df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts + 1, "ds"])
+                # Note: df.loc is inclusive of slice end, while df.iloc is not.
+                dates = pd.Series(df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "ds"].values)
             # Seasonality features
             for name, period in config_seasonality.periods.items():
                 if period.resolution > 0:
@@ -463,10 +453,9 @@ def tabularize_univariate_datetime_single_index(
                     if period.condition_name is not None:
                         # multiply seasonality features with condition mask/values
                         features = features * df[period.condition_name].values[:, np.newaxis]
-
                     seasonalities[name] = features
                     # TODO: Possibly need extra dim?
-                    # seasonalities[name] = np.expand_dims(seasonalities[name], 1)
+                    # seasonalities[name] = np.expand_dims(seasonalities[name], 0)
             inputs["seasonalities"] = seasonalities
 
             ## OLD Seasonality
@@ -593,7 +582,7 @@ def tabularize_univariate_datetime_single_index(
             else:
                 if len(self.additive_regressors_names) > 0:
                     regressors_add_future_window = df.loc[
-                        origin_index + 1 : origin_index + 1 + n_forecasts, self.additive_regressors_names
+                        origin_index + 1 : origin_index + n_forecasts, self.additive_regressors_names
                     ]
                     regressors["additive"] = np.expand_dims(regressors_add_future_window, axis=0)
                     ## OLD
@@ -618,7 +607,7 @@ def tabularize_univariate_datetime_single_index(
                     # regressors["additive"] = additive_regressors
                 if len(self.multiplicative_regressors_names) > 0:
                     regressors_mul_future_window = df.loc[
-                        origin_index + 1 : origin_index + 1 + n_forecasts, self.multiplicative_regressors_names
+                        origin_index + 1 : origin_index + n_forecasts, self.multiplicative_regressors_names
                     ]
                     regressors["multiplicative"] = np.expand_dims(regressors_mul_future_window, axis=0)
             inputs["regressors"] = regressors
@@ -678,12 +667,12 @@ def tabularize_univariate_datetime_single_index(
             else:
                 if len(self.additive_event_and_holiday_names) > 0:
                     events_add_future_window = df.loc[
-                        origin_index + 1 : origin_index + 1 + n_forecasts, self.additive_event_and_holiday_names
+                        origin_index + 1 : origin_index + n_forecasts, self.additive_event_and_holiday_names
                     ]
                     events["additive"] = np.expand_dims(events_add_future_window, axis=0)
                 if len(self.multiplicative_event_and_holiday_names) > 0:
                     events_mul_future_window = df.loc[
-                        origin_index + 1 : origin_index + 1 + n_forecasts, self.multiplicative_event_and_holiday_names
+                        origin_index + 1 : origin_index + n_forecasts, self.multiplicative_event_and_holiday_names
                     ]
                     events["multiplicative"] = np.expand_dims(events_mul_future_window, axis=0)
             inputs["events"] = events
@@ -738,6 +727,35 @@ def tabularize_univariate_datetime_single_index(
         return inputs, targets
 
 
+class GlobalTimeDataset(TimeDataset):
+    def __init__(self, df, **kwargs):
+        """Initialize Timedataset from time-series df.
+        Parameters
+        ----------
+            df : pd.DataFrame
+                dataframe containing column ``ds``, ``y``, and optionally``ID`` and
+                normalized columns normalized columns ``ds``, ``y``, ``t``, ``y_scaled``
+            **kwargs : dict
+                Identical to :meth:`tabularize_univariate_datetime`
+        """
+        df_names = list(np.unique(df.loc[:, "ID"].values))
+        if len(df_names) == 1:
+            super().__init__(df, df_names[0], **kwargs)
+        else:
+            raise NotImplementedError
+            # TODO: re-implement with JIT sample computation in TimeDatase
+            # # TODO (future): vectorize
+            # timedatasets = [TimeDataset(df_i, df_name, **kwargs) for df_name, df_i in df.groupby("ID")]
+            # self.combined_timedataset = [item for timedataset in timedatasets for item in timedataset]
+            # self.length = sum(timedataset.length for timedataset in timedatasets)
+
+    # def __len__(self):
+    #     return self.length
+
+    # def __getitem__(self, idx):
+    #     return self.combined_timedataset[idx]
+
+
 def fourier_series(dates, period, series_order):
     """Provides Fourier series components with the specified frequency and order.
     Note
diff --git a/neuralprophet/time_net.py b/neuralprophet/time_net.py
index f2fcbeb80..0379844cf 100644
--- a/neuralprophet/time_net.py
+++ b/neuralprophet/time_net.py
@@ -801,7 +801,7 @@ def training_step(self, batch, batch_idx):
         # Metrics
         if self.metrics_enabled:
             predicted_denorm = self.denormalize(predicted[:, :, 0])
-            target_denorm = self.denormalize(targets.squeeze(dim=2))
+            target_denorm = self.denormalize(targets)
             self.log_dict(self.metrics_train(predicted_denorm, target_denorm), **self.log_args)
             self.log("Loss", loss, **self.log_args)
             self.log("RegLoss", reg_loss, **self.log_args)

From b2f89ed602a592681b9e8f0e41cbba1908e64a7f Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 26 Jan 2024 16:13:59 -0800
Subject: [PATCH 044/128] fix dimensions except nonstationary components

---
 neuralprophet/time_dataset.py | 47 +++++++++++++++++------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 46ce99b5f..f1673c0b0 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -572,19 +572,18 @@ def tabularize_univariate_datetime_single_index(
             # regressors["multiplicative"] = None
             if max_lags == 0:
                 if len(self.additive_regressors_names) > 0:
-                    regressors["additive"] = np.expand_dims(
-                        df.loc[origin_index, self.additive_regressors_names], axis=0
-                    )
+                    regressors["additive"] = df.loc[origin_index, self.additive_regressors_names].values
+                    # regressors["additive"] = np.expand_dims(regressors["additive"], axis=0)
                 if len(self.multiplicative_regressors_names) > 0:
-                    regressors["multiplicative"] = np.expand_dims(
-                        df.loc[origin_index, self.multiplicative_regressors_names], axis=0
-                    )
+                    regressors["multiplicative"] = df.loc[origin_index, self.multiplicative_regressors_names].values
+                    # regressors["multiplicative"] = np.expand_dims(regressors["multiplicative"], axis=0)
             else:
                 if len(self.additive_regressors_names) > 0:
-                    regressors_add_future_window = df.loc[
+                    regressors["additive"] = df.loc[
                         origin_index + 1 : origin_index + n_forecasts, self.additive_regressors_names
-                    ]
-                    regressors["additive"] = np.expand_dims(regressors_add_future_window, axis=0)
+                    ].values
+                    # regressors["additive"] = np.expand_dims(regressors["additive"], axis=0)
+
                     ## OLD
                     # additive_regressor_feature_windows = []
                     # # additive_regressor_feature_windows_lagged = []
@@ -605,11 +604,13 @@ def tabularize_univariate_datetime_single_index(
                     #     additive_regressor_feature_windows.append(stride)
                     # additive_regressors = np.dstack(additive_regressor_feature_windows)
                     # regressors["additive"] = additive_regressors
+
                 if len(self.multiplicative_regressors_names) > 0:
-                    regressors_mul_future_window = df.loc[
+                    regressors["multiplicative"] = df.loc[
                         origin_index + 1 : origin_index + n_forecasts, self.multiplicative_regressors_names
-                    ]
-                    regressors["multiplicative"] = np.expand_dims(regressors_mul_future_window, axis=0)
+                    ].values
+                    # regressors["multiplicative"] = np.expand_dims(regressors["multiplicative"], axis=0)
+
             inputs["regressors"] = regressors
 
             ## OLD Future regressors
@@ -657,24 +658,22 @@ def tabularize_univariate_datetime_single_index(
             # events["multiplicative"] = None
             if max_lags == 0:
                 if len(self.additive_event_and_holiday_names) > 0:
-                    events["additive"] = np.expand_dims(
-                        df.loc[origin_index, self.additive_event_and_holiday_names], axis=0
-                    )
+                    events["additive"] = df.loc[origin_index, self.additive_event_and_holiday_names].values
+                    # events["additive"] = np.expand_dims( events["additive"], axis=0)
                 if len(self.multiplicative_event_and_holiday_names) > 0:
-                    events["multiplicative"] = np.expand_dims(
-                        df.loc[origin_index, self.multiplicative_event_and_holiday_names], axis=0
-                    )
+                    events["multiplicative"] = df.loc[origin_index, self.multiplicative_event_and_holiday_names].values
+                    # events["multiplicative"] = np.expand_dims(events["multiplicative"], axis=0)
             else:
                 if len(self.additive_event_and_holiday_names) > 0:
-                    events_add_future_window = df.loc[
+                    events["additive"] = df.loc[
                         origin_index + 1 : origin_index + n_forecasts, self.additive_event_and_holiday_names
-                    ]
-                    events["additive"] = np.expand_dims(events_add_future_window, axis=0)
+                    ].values
+                    # events["additive"] = np.expand_dims(events["additive"], axis=0)
                 if len(self.multiplicative_event_and_holiday_names) > 0:
-                    events_mul_future_window = df.loc[
+                    events["multiplicative"] = df.loc[
                         origin_index + 1 : origin_index + n_forecasts, self.multiplicative_event_and_holiday_names
-                    ]
-                    events["multiplicative"] = np.expand_dims(events_mul_future_window, axis=0)
+                    ].values
+                    # events["multiplicative"] = np.expand_dims(events["multiplicative"], axis=0)
             inputs["events"] = events
 
         ## OLD

From c65a10701fe018adbf00cefbf296f9b1e256858d Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 26 Jan 2024 16:51:52 -0800
Subject: [PATCH 045/128] integrate torch formatting into tabularize

---
 neuralprophet/time_dataset.py | 145 +++++++++++++++++++---------------
 1 file changed, 82 insertions(+), 63 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index f1673c0b0..7967927f3 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -111,10 +111,10 @@ def __getitem__(self, index):
             df=self.df, origin_index=df_index, **self.config_args
         )
         # ------------------
-        # Important! TODO: integrate format_sample into tabularize_univariate_datetime_single_index
-        sample, target = self.format_sample(inputs, target)
+        # DONE: integrate format_sample into tabularize_univariate_datetime_single_index
+        # sample, target = self.format_sample(inputs, target)
         # --------------------------
-        return sample, target, self.meta
+        return inputs, target, self.meta
 
     def __len__(self):
         """Overrides Parent class method to get data length."""
@@ -167,61 +167,64 @@ def create_sample2index_map(self, df):
 
         return sample_index_2_df_origin_index, num_samples
 
-    def format_sample(self, inputs, targets=None):
-        """Convert tabularized sample to correct formats.
-        Parameters
-        ----------
-            inputs : ordered dict
-                Identical to returns from :meth:`tabularize_univariate_datetime`
-            targets : np.array, float
-                Identical to returns from :meth:`tabularize_univariate_datetime`
-        """
-        sample_input = OrderedDict({})
-        inputs_dtype = {
-            "time": torch.float,
-            # "timestamps": np.datetime64,
-            "seasonalities": torch.float,
-            "events": torch.float,
-            "lags": torch.float,
-            "covariates": torch.float,
-            "regressors": torch.float,
-        }
-        targets_dtype = torch.float
-
-        sample_target = torch.from_numpy(targets).type(targets_dtype)
-
-        for key, data in inputs.items():
-            if key in self.two_level_inputs:
-                sample_input[key] = OrderedDict({})
-                for name, features in data.items():
-                    if features.dtype != np.float32:
-                        features = features.astype(np.float32, copy=False)
-
-                    tensor = torch.from_numpy(features)
-
-                    if tensor.dtype != inputs_dtype[key]:
-                        sample_input[key][name] = tensor.to(
-                            dtype=inputs_dtype[key]
-                        )  # this can probably be removed, but was included in the previous code
-                    else:
-                        sample_input[key][name] = tensor
-            else:
-                # if key == "timestamps": sample_input[key] = data
-                # else: sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
-                sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
-
-        # TODO Can this be skipped for a single sample?
-        # Alternatively, Can this be optimized?
-        # Split nested dict into list of dicts with same keys as sample_input.
-        # def split_dict(sample_input, index):
-        # return {k: v[index] if not isinstance(v, dict) else split_dict(v, index) for k, v in sample_input.items()}
-        # length = next(iter(sample_input.values())).shape[0]
-        # sample_input = [split_dict(sample_input, i) for i in range(length)]
-
-        ## timestamps should no longer be present here?
-        # sample_input.pop("timestamps") # Exact timestamps are not needed anymore
-
-        return sample_input, sample_target
+    # def format_sample(self, inputs, targets=None):
+    #     """Convert tabularized sample to correct formats.
+    #     Parameters
+    #     ----------
+    #         inputs : ordered dict
+    #             Identical to returns from :meth:`tabularize_univariate_datetime`
+    #         targets : np.array, float
+    #             Identical to returns from :meth:`tabularize_univariate_datetime`
+    #     """
+    #     sample_input = OrderedDict({})
+    #     sample_input["time"] = inputs["time"]
+    #     if "lags" in inputs.keys():
+    #         sample_input["lags"] = inputs["lags"]
+    #     inputs_dtype = {
+    #         # "time": torch.float,
+    #         # "timestamps": np.datetime64,
+    #         # "lags": torch.float,
+    #         "seasonalities": torch.float,
+    #         "events": torch.float,
+    #         "covariates": torch.float,
+    #         "regressors": torch.float,
+    #     }
+
+    #     for key, data in inputs.items():
+    #         if key in self.two_level_inputs:
+    #             sample_input[key] = OrderedDict({})
+    #             for name, features in data.items():
+    #                 if features.dtype != np.float32:
+    #                     features = features.astype(np.float32, copy=False)
+
+    #                 tensor = torch.from_numpy(features)
+
+    #                 if tensor.dtype != inputs_dtype[key]:
+    #                     sample_input[key][name] = tensor.to(
+    #                         dtype=inputs_dtype[key]
+    #                     )  # this can probably be removed, but was included in the previous code
+    #                 else:
+    #                     sample_input[key][name] = tensor
+
+    #         # No longer needed as - now directly casting to torch in tabularize
+    #         # else: # single_level items
+    #         #     sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
+    #         #     ## OLD
+    #         #     # if key == "timestamps": sample_input[key] = data
+    #         #     # else: sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
+
+    #     # TODO Can this be skipped for a single sample?
+    #     # Alternatively, Can this be optimized?
+    #     # Split nested dict into list of dicts with same keys as sample_input.
+    #     # def split_dict(sample_input, index):
+    #     # return {k: v[index] if not isinstance(v, dict) else split_dict(v, index) for k, v in sample_input.items()}
+    #     # length = next(iter(sample_input.values())).shape[0]
+    #     # sample_input = [split_dict(sample_input, i) for i in range(length)]
+
+    #     ## timestamps should no longer be present here?
+    #     # sample_input.pop("timestamps") # Exact timestamps are not needed anymore
+
+    #     return sample_input, targets
 
     def tabularize_univariate_datetime_single_index(
         self,
@@ -289,6 +292,9 @@ def tabularize_univariate_datetime_single_index(
             np.array, float
                 Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
         """
+        # data is stored in OrderedDict
+        inputs = OrderedDict({})
+
         max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
         n_samples = 1
         if max_lags == 0:
@@ -308,7 +314,8 @@ def tabularize_univariate_datetime_single_index(
 
         if predict_mode:
             # targets = np.zeros((1, n_forecasts), dtype=np.float32)
-            targets = np.zeros(n_forecasts, dtype=np.float32)
+            # targets = np.zeros(n_forecasts, dtype=np.float32)
+            targets = torch.zeros(n_forecasts, dtype=torch.float32)
 
             ## OLD
             # # time is the time at each forecast step
@@ -326,10 +333,12 @@ def tabularize_univariate_datetime_single_index(
                     targets = df.at[origin_index, "y_scaled"]
                 if max_lags > 0:
                     targets = df.at[origin_index + 1, "y_scaled"]
+                targets = torch.tensor(targets, dtype=torch.float32)
             else:
                 # Note: df.loc is inclusive of slice end, while df.iloc is not.
                 targets = df.loc[origin_index + 1 : origin_index + n_forecasts, "y_scaled"].values
                 # targets = np.array(targets, dtype=np.float32) # optional
+                targets = torch.as_tensor(targets, dtype=torch.float32)
 
                 ## Alternative 1
                 # targets = df.loc[:, "y_scaled"].iloc[origin_index + 1 : origin_index + 1 + n_forecasts].values
@@ -349,17 +358,16 @@ def tabularize_univariate_datetime_single_index(
                 # return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
                 # targets = _stride_future_time_features_for_forecasts(df["y_scaled"].values)
 
-        # data is stored in OrderedDict
-        inputs = OrderedDict({})
-
         # TIME: the time at each sample's lags and forecasts
         if max_lags == 0:
             # inputs["time"] = np.expand_dims(df.at[origin_index, "t"], 0)
             inputs["time"] = df.at[origin_index, "t"]
+            inputs["time"] = torch.tensor(inputs["time"], dtype=torch.float32)
         else:
             # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
             # Note: df.loc is inclusive of slice end, while df.iloc is not.
             inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
+            inputs["time"] = torch.as_tensor(inputs["time"], dtype=torch.float32)
             ## OLD: Time
             # def _stride_time_features_for_forecasts(x):
             #     window_size = n_lags + n_forecasts
@@ -378,6 +386,7 @@ def tabularize_univariate_datetime_single_index(
             # Note: df.loc is inclusive of slice end, while df.iloc is not.
             # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values, dtype=np.float32)
             inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values
+            inputs["lags"] = torch.as_tensor(inputs["lags"], dtype=torch.float32)
             # OLD Lags
             # def _stride_lagged_features(df_col_name, feature_dims):
             #     # only for case where max_lags > 0
@@ -401,6 +410,7 @@ def tabularize_univariate_datetime_single_index(
                     lagged_regressors[lagged_reg] = df.loc[
                         origin_index - covar_lags + 1 : origin_index, lagged_reg
                     ].values
+                    lagged_regressors[lagged_reg] = torch.as_tensor(lagged_regressors[lagged_reg], dtype=torch.float32)
             inputs["covariates"] = lagged_regressors
             # OLD Covariates
             # def _stride_lagged_features(df_col_name, feature_dims):
@@ -453,7 +463,7 @@ def tabularize_univariate_datetime_single_index(
                     if period.condition_name is not None:
                         # multiply seasonality features with condition mask/values
                         features = features * df[period.condition_name].values[:, np.newaxis]
-                    seasonalities[name] = features
+                    seasonalities[name] = torch.as_tensor(features, dtype=torch.float32)
                     # TODO: Possibly need extra dim?
                     # seasonalities[name] = np.expand_dims(seasonalities[name], 0)
             inputs["seasonalities"] = seasonalities
@@ -574,15 +584,18 @@ def tabularize_univariate_datetime_single_index(
                 if len(self.additive_regressors_names) > 0:
                     regressors["additive"] = df.loc[origin_index, self.additive_regressors_names].values
                     # regressors["additive"] = np.expand_dims(regressors["additive"], axis=0)
+                    regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
                 if len(self.multiplicative_regressors_names) > 0:
                     regressors["multiplicative"] = df.loc[origin_index, self.multiplicative_regressors_names].values
                     # regressors["multiplicative"] = np.expand_dims(regressors["multiplicative"], axis=0)
+                    regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)
             else:
                 if len(self.additive_regressors_names) > 0:
                     regressors["additive"] = df.loc[
                         origin_index + 1 : origin_index + n_forecasts, self.additive_regressors_names
                     ].values
                     # regressors["additive"] = np.expand_dims(regressors["additive"], axis=0)
+                    regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
 
                     ## OLD
                     # additive_regressor_feature_windows = []
@@ -610,6 +623,7 @@ def tabularize_univariate_datetime_single_index(
                         origin_index + 1 : origin_index + n_forecasts, self.multiplicative_regressors_names
                     ].values
                     # regressors["multiplicative"] = np.expand_dims(regressors["multiplicative"], axis=0)
+                    regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)
 
             inputs["regressors"] = regressors
 
@@ -660,20 +674,25 @@ def tabularize_univariate_datetime_single_index(
                 if len(self.additive_event_and_holiday_names) > 0:
                     events["additive"] = df.loc[origin_index, self.additive_event_and_holiday_names].values
                     # events["additive"] = np.expand_dims( events["additive"], axis=0)
+                    events["additive"] = torch.as_tensor(events["additive"], dtype=torch.float32)
                 if len(self.multiplicative_event_and_holiday_names) > 0:
                     events["multiplicative"] = df.loc[origin_index, self.multiplicative_event_and_holiday_names].values
                     # events["multiplicative"] = np.expand_dims(events["multiplicative"], axis=0)
+                    events["multiplicative"] = torch.as_tensor(events["multiplicative"], dtype=torch.float32)
             else:
                 if len(self.additive_event_and_holiday_names) > 0:
                     events["additive"] = df.loc[
                         origin_index + 1 : origin_index + n_forecasts, self.additive_event_and_holiday_names
                     ].values
                     # events["additive"] = np.expand_dims(events["additive"], axis=0)
+                    events["additive"] = torch.as_tensor(events["additive"], dtype=torch.float32)
+
                 if len(self.multiplicative_event_and_holiday_names) > 0:
                     events["multiplicative"] = df.loc[
                         origin_index + 1 : origin_index + n_forecasts, self.multiplicative_event_and_holiday_names
                     ].values
                     # events["multiplicative"] = np.expand_dims(events["multiplicative"], axis=0)
+                    events["multiplicative"] = torch.as_tensor(events["multiplicative"], dtype=torch.float32)
             inputs["events"] = events
 
         ## OLD

From af5524ae4db463ae490ae25c9ea7abdf4b5b00ba Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 26 Jan 2024 19:07:03 -0800
Subject: [PATCH 046/128] check shapes

---
 neuralprophet/time_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 7967927f3..36e21a671 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -42,7 +42,7 @@ def __init__(self, df, name, **kwargs):
         # ->_create_dataset calls prep_or_copy_df, then returns GlobalTimeDataset
         # Future TODO: integrate some of these preprocessing steps happening outside?
 
-        self.df = df.reset_index(drop=True)  # Needed for index based operations in __get_item__
+        self.df = df.reset_index(drop=True)  # Needed for index based operations in __getitem__
         if "index" in list(self.df.columns):  # should not be the case
             self.df = self.df.drop("index", axis=1)
         self.meta = OrderedDict({})

From 404e3072f8df67ff1d13efc8bb0c67ecccf48cc6 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 26 Jan 2024 20:58:49 -0800
Subject: [PATCH 047/128] AirPassengers test working!

---
 neuralprophet/time_dataset.py | 51 ++++++++++++++++++-----------------
 neuralprophet/time_net.py     |  2 +-
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 36e21a671..e811ae5d4 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -313,9 +313,8 @@ def tabularize_univariate_datetime_single_index(
         #     n_samples = len(df) - max_lags + 1 - n_forecasts
 
         if predict_mode:
-            # targets = np.zeros((1, n_forecasts), dtype=np.float32)
-            # targets = np.zeros(n_forecasts, dtype=np.float32)
-            targets = torch.zeros(n_forecasts, dtype=torch.float32)
+            targets = torch.zeros((1, 1, n_forecasts), dtype=torch.float32)
+            # targets = torch.zeros(n_forecasts, dtype=torch.float32)
 
             ## OLD
             # # time is the time at each forecast step
@@ -333,40 +332,42 @@ def tabularize_univariate_datetime_single_index(
                     targets = df.at[origin_index, "y_scaled"]
                 if max_lags > 0:
                     targets = df.at[origin_index + 1, "y_scaled"]
-                targets = torch.tensor(targets, dtype=torch.float32)
             else:
                 # Note: df.loc is inclusive of slice end, while df.iloc is not.
                 targets = df.loc[origin_index + 1 : origin_index + n_forecasts, "y_scaled"].values
-                # targets = np.array(targets, dtype=np.float32) # optional
-                targets = torch.as_tensor(targets, dtype=torch.float32)
-
-                ## Alternative 1
-                # targets = df.loc[:, "y_scaled"].iloc[origin_index + 1 : origin_index + 1 + n_forecasts].values
-                # targets = np.expand_dims(np.array(targets, dtype=np.float32), axis=0)
-                ## Alternative 2
-                # x = df["y_scaled"].values
-                # targets = np.array([x[origin_index + 1 : origin_index + 1 + n_forecasts]], dtype=x.dtype)
-                ## OLD
-                # # time is the time at each forecast step
-                # t = df.loc[:, "t"].values
-                # if max_lags == 0:
-                #     time = np.expand_dims(t, 1)
-                # else:
-                #     time = _stride_time_features_for_forecasts(t)
-                # inputs["time"] = time  # contains n_lags + n_forecasts
-                # def _stride_future_time_features_for_forecasts(x):
-                # return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
-                # targets = _stride_future_time_features_for_forecasts(df["y_scaled"].values)
+            targets = np.expand_dims(np.expand_dims(targets, 0), 0)
+            targets = torch.as_tensor(targets, dtype=torch.float32)
+
+            ## Alternative 1
+            # targets = df.loc[:, "y_scaled"].iloc[origin_index + 1 : origin_index + 1 + n_forecasts].values
+            # targets = np.expand_dims(np.array(targets, dtype=np.float32), axis=0)
+            ## Alternative 2
+            # x = df["y_scaled"].values
+            # targets = np.array([x[origin_index + 1 : origin_index + 1 + n_forecasts]], dtype=x.dtype)
+            ## OLD
+            # # time is the time at each forecast step
+            # t = df.loc[:, "t"].values
+            # if max_lags == 0:
+            #     time = np.expand_dims(t, 1)
+            # else:
+            #     time = _stride_time_features_for_forecasts(t)
+            # inputs["time"] = time  # contains n_lags + n_forecasts
+            # def _stride_future_time_features_for_forecasts(x):
+            # return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
+            # targets = _stride_future_time_features_for_forecasts(df["y_scaled"].values)
 
         # TIME: the time at each sample's lags and forecasts
         if max_lags == 0:
-            # inputs["time"] = np.expand_dims(df.at[origin_index, "t"], 0)
             inputs["time"] = df.at[origin_index, "t"]
+            inputs["time"] = np.expand_dims(inputs["time"], 0)
             inputs["time"] = torch.tensor(inputs["time"], dtype=torch.float32)
+
         else:
             # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
             # Note: df.loc is inclusive of slice end, while df.iloc is not.
             inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
+            if n_forecasts == 1:
+                inputs["time"] = np.expand_dims(inputs["time"], 0)
             inputs["time"] = torch.as_tensor(inputs["time"], dtype=torch.float32)
             ## OLD: Time
             # def _stride_time_features_for_forecasts(x):
diff --git a/neuralprophet/time_net.py b/neuralprophet/time_net.py
index 0379844cf..f2fcbeb80 100644
--- a/neuralprophet/time_net.py
+++ b/neuralprophet/time_net.py
@@ -801,7 +801,7 @@ def training_step(self, batch, batch_idx):
         # Metrics
         if self.metrics_enabled:
             predicted_denorm = self.denormalize(predicted[:, :, 0])
-            target_denorm = self.denormalize(targets)
+            target_denorm = self.denormalize(targets.squeeze(dim=2))
             self.log_dict(self.metrics_train(predicted_denorm, target_denorm), **self.log_args)
             self.log("Loss", loss, **self.log_args)
             self.log("RegLoss", reg_loss, **self.log_args)

From 6075074e8dfdb48322e7be6915f5863a127d1995 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 26 Jan 2024 21:03:10 -0800
Subject: [PATCH 048/128] fix dataset generator

---
 tests/utils/dataset_generators.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/utils/dataset_generators.py b/tests/utils/dataset_generators.py
index 4a0440e12..065b91162 100644
--- a/tests/utils/dataset_generators.py
+++ b/tests/utils/dataset_generators.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pandas as pd
 
-from neuralprophet.time_dataset import make_country_specific_holidays_df
+from neuralprophet.time_dataset import make_country_specific_holidays_dict
 
 
 def generate_holiday_dataset(country="US", years=[2022], y_default=1, y_holiday=100, y_holidays_override={}):
@@ -11,7 +11,7 @@ def generate_holiday_dataset(country="US", years=[2022], y_default=1, y_holiday=
     dates = pd.date_range("%i-01-01" % (years[0]), periods=periods, freq="D")
     df = pd.DataFrame({"ds": dates, "y": y_default}, index=dates)
 
-    holidays = make_country_specific_holidays_df(years, country)
+    holidays = make_country_specific_holidays_dict(years, country)
     for holiday_name, timestamps in holidays.items():
         df.loc[timestamps[0], "y"] = y_holidays_override.get(holiday_name, y_holiday)
 

From d6242a28722cee139b43adcba5e0812454631234 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Mon, 29 Jan 2024 16:56:26 -0800
Subject: [PATCH 049/128] fixed all performance tests but Energy due to
 nonstationary components

---
 neuralprophet/time_dataset.py   | 11 ++++++-----
 tests/test_model_performance.py | 14 ++++++++++----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index e811ae5d4..dd62e3a0b 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -290,7 +290,7 @@ def tabularize_univariate_datetime_single_index(
                     * ``regressors`` (OrderedDict), regressors,
                     each with features (np.array, float) of dims: (num_samples, n_lags)
             np.array, float
-                Targets to be predicted of same length as each of the model inputs, dims: (num_samples, n_forecasts)
+                Targets to be predicted of same length as each of the model inputs, dims: (n_forecasts, 1)
         """
         # data is stored in OrderedDict
         inputs = OrderedDict({})
@@ -313,7 +313,7 @@ def tabularize_univariate_datetime_single_index(
         #     n_samples = len(df) - max_lags + 1 - n_forecasts
 
         if predict_mode:
-            targets = torch.zeros((1, 1, n_forecasts), dtype=torch.float32)
+            targets = torch.zeros((n_forecasts, 1), dtype=torch.float32)
             # targets = torch.zeros(n_forecasts, dtype=torch.float32)
 
             ## OLD
@@ -332,10 +332,12 @@ def tabularize_univariate_datetime_single_index(
                     targets = df.at[origin_index, "y_scaled"]
                 if max_lags > 0:
                     targets = df.at[origin_index + 1, "y_scaled"]
+                targets = np.expand_dims(targets, 0)
+                targets = np.expand_dims(targets, 1)  # extra dimension at end for quantiles:median
             else:
                 # Note: df.loc is inclusive of slice end, while df.iloc is not.
                 targets = df.loc[origin_index + 1 : origin_index + n_forecasts, "y_scaled"].values
-            targets = np.expand_dims(np.expand_dims(targets, 0), 0)
+                targets = np.expand_dims(targets, 1)  # extra dimension at end for quantiles:median
             targets = torch.as_tensor(targets, dtype=torch.float32)
 
             ## Alternative 1
@@ -366,8 +368,6 @@ def tabularize_univariate_datetime_single_index(
             # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
             # Note: df.loc is inclusive of slice end, while df.iloc is not.
             inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
-            if n_forecasts == 1:
-                inputs["time"] = np.expand_dims(inputs["time"], 0)
             inputs["time"] = torch.as_tensor(inputs["time"], dtype=torch.float32)
             ## OLD: Time
             # def _stride_time_features_for_forecasts(x):
@@ -431,6 +431,7 @@ def tabularize_univariate_datetime_single_index(
 
         # SEASONALITIES
         if config_seasonality is not None:
+            # TODO: precompute and save fourier features and only tabularize / slide windows when calling __getitem__
             seasonalities = OrderedDict({})
             if max_lags == 0:
                 dates = pd.Series(df.at[origin_index, "ds"])
diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index 37d623c23..3c097d2a3 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -125,7 +125,10 @@ def create_metrics_plot(metrics):
 
 def test_PeytonManning():
     df = pd.read_csv(PEYTON_FILE)
-    m = NeuralProphet()
+    m = NeuralProphet(
+        # learning_rate=0.01,
+        # epochs=3,
+    )
     df_train, df_test = m.split_df(df=df, freq="D", valid_p=0.1)
 
     system_speed, std = get_system_speed()
@@ -146,7 +149,8 @@ def test_PeytonManning():
 def test_YosemiteTemps():
     df = pd.read_csv(YOS_FILE)
     m = NeuralProphet(
-        learning_rate=0.01,
+        # learning_rate=0.01,
+        # epochs=3,
         n_lags=36,
         n_forecasts=12,
         changepoints_range=0.9,
@@ -173,7 +177,8 @@ def test_YosemiteTemps():
 def test_AirPassengers():
     df = pd.read_csv(AIR_FILE)
     m = NeuralProphet(
-        learning_rate=0.01,
+        # learning_rate=0.01,
+        # epochs=3,
         seasonality_mode="multiplicative",
     )
     df_train, df_test = m.split_df(df=df, freq="MS", valid_p=0.1)
@@ -198,7 +203,8 @@ def test_EnergyPriceDaily():
     df["temp"] = df["temperature"]
 
     m = NeuralProphet(
-        learning_rate=0.01,
+        # learning_rate=0.01,
+        # epochs=3,
         n_forecasts=7,
         n_changepoints=0,
         yearly_seasonality=True,

From a5ebff9b8440bce435ecfe8a134b1a103f38a20c Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Mon, 29 Jan 2024 17:07:47 -0800
Subject: [PATCH 050/128] fixed nonstationary issue. all performance tests
 running

---
 neuralprophet/time_dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index dd62e3a0b..766a44387 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -594,7 +594,7 @@ def tabularize_univariate_datetime_single_index(
             else:
                 if len(self.additive_regressors_names) > 0:
                     regressors["additive"] = df.loc[
-                        origin_index + 1 : origin_index + n_forecasts, self.additive_regressors_names
+                        origin_index + 1 - n_lags : origin_index + n_forecasts, self.additive_regressors_names
                     ].values
                     # regressors["additive"] = np.expand_dims(regressors["additive"], axis=0)
                     regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
@@ -622,7 +622,7 @@ def tabularize_univariate_datetime_single_index(
 
                 if len(self.multiplicative_regressors_names) > 0:
                     regressors["multiplicative"] = df.loc[
-                        origin_index + 1 : origin_index + n_forecasts, self.multiplicative_regressors_names
+                        origin_index + 1 - n_lags : origin_index + n_forecasts, self.multiplicative_regressors_names
                     ].values
                     # regressors["multiplicative"] = np.expand_dims(regressors["multiplicative"], axis=0)
                     regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)

From a4152e6248e3b3a315f36c07d891c92422e5a64b Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 30 Jan 2024 11:53:05 -0800
Subject: [PATCH 051/128] refactor tabularize function

---
 neuralprophet/time_dataset.py | 759 +++++++++++-----------------------
 1 file changed, 235 insertions(+), 524 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 766a44387..d3dbffd6e 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -47,6 +47,11 @@ def __init__(self, df, name, **kwargs):
             self.df = self.df.drop("index", axis=1)
         self.meta = OrderedDict({})
         self.meta["df_name"] = name
+
+        self.predict_mode = (kwargs["predict_mode"],)
+        self.n_lags = (kwargs["n_lags"],)
+        self.n_forecasts = (kwargs["n_forecasts"],)
+        self.max_lags = get_max_num_lags(kwargs["config_lagged_regressors"], self.n_lags)
         self.config_args = kwargs
 
         self.two_level_inputs = [
@@ -62,7 +67,9 @@ def __init__(self, df, name, **kwargs):
             self.additive_event_and_holiday_names,
             self.multiplicative_event_and_holiday_names,
         ) = add_event_features_to_df(
-            self.df, self.config_args["config_events"], self.config_args["config_country_holidays"]
+            self.df,
+            self.config_args["config_events"],
+            self.config_args["config_country_holidays"],
         )
         # pre-sort additive/multiplicative regressors
         self.additive_regressors_names, self.multiplicative_regressors_names = sort_regressor_names(
@@ -107,13 +114,20 @@ def __getitem__(self, index):
         df_index = self.sample_index_to_df_index(index)
 
         # Tabularize - extract features from dataframe at given target index position
-        inputs, target = self.tabularize_univariate_datetime_single_index(
-            df=self.df, origin_index=df_index, **self.config_args
+        inputs, target = tabularize_univariate_datetime_single_index(
+            df=self.df,
+            origin_index=df_index,
+            predict_mode=self.predict_mode,
+            n_lags=self.n_lags,
+            max_lags=self.max_lags,
+            n_forecasts=self.n_forecasts,
+            config_seasonality=self.config_args["config_seasonality"],
+            config_lagged_regressors=self.config_args["config_lagged_regressors"],
+            additive_event_and_holiday_names=self.additive_event_and_holiday_names,
+            multiplicative_event_and_holiday_names=self.multiplicative_event_and_holiday_names,
+            additive_regressors_names=self.additive_regressors_names,
+            multiplicative_regressors_names=self.multiplicative_regressors_names,
         )
-        # ------------------
-        # DONE: integrate format_sample into tabularize_univariate_datetime_single_index
-        # sample, target = self.format_sample(inputs, target)
-        # --------------------------
         return inputs, target, self.meta
 
     def __len__(self):
@@ -133,16 +147,14 @@ def create_sample2index_map(self, df):
 
         # Limit target range due to input lags and number of forecasts
         df_length = len(df)
-        max_lags = get_max_num_lags(self.config_args["config_lagged_regressors"], self.config_args["n_lags"])
         n_forecasts = self.config_args["n_forecasts"]
         origin_start_end_mask = create_origin_start_end_mask(
-            df_length=df_length, max_lags=max_lags, n_forecasts=n_forecasts
+            df_length=df_length, max_lags=self.max_lags, n_forecasts=n_forecasts
         )
 
         # Prediction Frequency
         # Filter missing samples and prediction frequency (does not actually drop, but creates indexmapping)
-        # analogous to `self.filter_samples_after_init(
-        # self.kwargs["prediction_frequency"])`
+        # analogous to `self.filter_samples_after_init(self.kwargs["prediction_frequency"])`
         prediction_frequency_mask = create_prediction_frequency_filter_mask(
             df, self.config_args["prediction_frequency"]
         )
@@ -226,525 +238,224 @@ def create_sample2index_map(self, df):
 
     #     return sample_input, targets
 
-    def tabularize_univariate_datetime_single_index(
-        self,
-        df: pd.DataFrame,
-        origin_index: int,
-        predict_mode: bool = False,
-        n_lags: int = 0,
-        n_forecasts: int = 1,
-        predict_steps: int = 1,
-        config_seasonality: Optional[configure.ConfigSeasonality] = None,
-        config_events: Optional[configure.ConfigEvents] = None,
-        config_country_holidays=None,
-        config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None,
-        config_regressors: Optional[configure.ConfigFutureRegressors] = None,
-        config_missing=None,
-        config_train=None,
-        prediction_frequency=None,
-    ):
-        """Create a tabular data sample from timeseries dataframe, used for mini-batch creation.
-        Note
-        ----
-        Data must have no gaps for sample extracted at given index position.
-        ----------
-            df : pd.DataFrame
-                Sequence of observations with original ``ds``, ``y`` and normalized ``t``, ``y_scaled`` columns
-            origin_index: int:
-                dataframe index position of last observed lag before forecast starts.
-            config_seasonality : configure.ConfigSeasonality
-                Configuration for seasonalities
-            n_lags : int
-                Number of lagged values of series to include as model inputs (aka AR-order)
-            n_forecasts : int
-                Number of steps to forecast into future
-            config_events : configure.ConfigEvents
-                User specified events, each with their upper, lower windows (int) and regularization
-            config_country_holidays : configure.ConfigCountryHolidays
-                Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays
-            config_lagged_regressors : configure.ConfigLaggedRegressors
-                Configurations for lagged regressors
-            config_regressors : configure.ConfigFutureRegressors
-                Configuration for regressors
-            predict_mode : bool
-                Chooses the prediction mode
-                Options
-                    * (default) ``False``: Includes target values
-                    * ``True``: Does not include targets but includes entire dataset as input
-        Returns
-        -------
-            OrderedDict
-                Model inputs, each of len(df) but with varying dimensions
-                Note
-                ----
-                Contains the following data:
-                Model Inputs
-                    * ``time`` (np.array, float), dims: (num_samples, 1)
-                    * ``seasonalities`` (OrderedDict), named seasonalities
-                    each with features (np.array, float) - dims: (num_samples, n_features[name])
-                    * ``lags`` (np.array, float), dims: (num_samples, n_lags)
-                    * ``covariates`` (OrderedDict), named covariates,
-                    each with features (np.array, float) of dims: (num_samples, n_lags)
-                    * ``events`` (OrderedDict), events,
-                    each with features (np.array, float) of dims: (num_samples, n_lags)
-                    * ``regressors`` (OrderedDict), regressors,
-                    each with features (np.array, float) of dims: (num_samples, n_lags)
-            np.array, float
-                Targets to be predicted of same length as each of the model inputs, dims: (n_forecasts, 1)
-        """
-        # data is stored in OrderedDict
-        inputs = OrderedDict({})
 
-        max_lags = get_max_num_lags(config_lagged_regressors, n_lags)
-        n_samples = 1
-        if max_lags == 0:
-            assert n_forecasts == 1
-
-        # OLD: previous workaround
-        # learning_rate = config_train.learning_rate
-        # if (
-        #     predict_mode
-        #     or (learning_rate is None)
-        #     or config_lagged_regressors
-        #     or config_country_holidays
-        #     or config_events
-        #     or prediction_frequency
-        # ):
-        #     n_samples = len(df) - max_lags + 1 - n_forecasts
-
-        if predict_mode:
-            targets = torch.zeros((n_forecasts, 1), dtype=torch.float32)
-            # targets = torch.zeros(n_forecasts, dtype=torch.float32)
-
-            ## OLD
-            # # time is the time at each forecast step
-            # t = df.loc[:, "t"].values
-            # if max_lags == 0:
-            #     time = np.expand_dims(t, 1)
-            # else:
-            #     time = _stride_time_features_for_forecasts(t)
-            # inputs["time"] = time  # contains n_lags + n_forecasts
-            # targets = np.empty_like(time[:, n_lags:])
-            # targets = np.nan_to_num(targets)
+def tabularize_univariate_datetime_single_index(
+    df: pd.DataFrame,
+    origin_index: int,
+    predict_mode: bool = False,
+    n_lags: int = 0,
+    max_lags: int = 0,
+    n_forecasts: int = 1,
+    config_seasonality: Optional[configure.ConfigSeasonality] = None,
+    config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None,
+    # config_events: Optional[configure.ConfigEvents] = None,
+    # config_country_holidays=None,
+    additive_event_and_holiday_names: list[str] = [],
+    multiplicative_event_and_holiday_names: list[str] = [],
+    # config_regressors: Optional[configure.ConfigFutureRegressors] = None,
+    additive_regressors_names: list[str] = [],
+    multiplicative_regressors_names: list[str] = [],
+):
+    """Create a tabular data sample from timeseries dataframe, used for mini-batch creation.
+    Note
+    ----
+    Data must have no gaps for sample extracted at given index position.
+    ----------
+        df : pd.DataFrame
+            Sequence of observations with original ``ds``, ``y`` and normalized ``t``, ``y_scaled`` columns
+        origin_index: int:
+            dataframe index position of last observed lag before forecast starts.
+        n_forecasts : int
+            Number of steps to forecast into future
+        n_lags : int
+            Number of lagged values of series to include as model inputs (aka AR-order)
+        config_seasonality : configure.ConfigSeasonality
+            Configuration for seasonalities
+        config_lagged_regressors : configure.ConfigLaggedRegressors
+            Configurations for lagged regressors
+        config_events : configure.ConfigEvents
+            User specified events, each with their upper, lower windows (int) and regularization
+        config_country_holidays : configure.ConfigCountryHolidays
+            Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays
+        config_regressors : configure.ConfigFutureRegressors
+            Configuration for regressors
+        predict_mode : bool
+            Chooses the prediction mode
+            Options
+                * (default) ``False``: Includes target values
+                * ``True``: Does not include targets but includes entire dataset as input
+    Returns
+    -------
+        OrderedDict
+            Model inputs, each of len(df) but with varying dimensions
+            Note
+            ----
+            Contains the following data:
+            Model Inputs
+                * ``time`` (np.array, float), dims: (num_samples, 1)
+                * ``seasonalities`` (OrderedDict), named seasonalities
+                each with features (np.array, float) - dims: (num_samples, n_features[name])
+                * ``lags`` (np.array, float), dims: (num_samples, n_lags)
+                * ``covariates`` (OrderedDict), named covariates,
+                each with features (np.array, float) of dims: (num_samples, n_lags)
+                * ``events`` (OrderedDict), events,
+                each with features (np.array, float) of dims: (num_samples, n_lags)
+                * ``regressors`` (OrderedDict), regressors,
+                each with features (np.array, float) of dims: (num_samples, n_lags)
+        np.array, float
+            Targets to be predicted of same length as each of the model inputs, dims: (n_forecasts, 1)
+    """
+    # sample features are stored and returned in OrderedDict
+    inputs = OrderedDict({})
+
+    if max_lags == 0:
+        assert n_forecasts == 1
+
+    if predict_mode:
+        targets = torch.zeros((n_forecasts, 1), dtype=torch.float32)
+    else:
+        if n_forecasts == 1:
+            if max_lags == 0:
+                targets = df.at[origin_index, "y_scaled"]
+            if max_lags > 0:
+                targets = df.at[origin_index + 1, "y_scaled"]
+            targets = np.expand_dims(targets, 0)
+            targets = np.expand_dims(targets, 1)  # extra dimension at end for quantiles:median
         else:
-            if n_forecasts == 1:
-                if max_lags == 0:
-                    targets = df.at[origin_index, "y_scaled"]
-                if max_lags > 0:
-                    targets = df.at[origin_index + 1, "y_scaled"]
-                targets = np.expand_dims(targets, 0)
-                targets = np.expand_dims(targets, 1)  # extra dimension at end for quantiles:median
-            else:
+            # Note: df.loc is inclusive of slice end, while df.iloc is not.
+            targets = df.loc[origin_index + 1 : origin_index + n_forecasts, "y_scaled"].values
+            targets = np.expand_dims(targets, 1)  # extra dimension at end for quantiles:median
+        targets = torch.as_tensor(targets, dtype=torch.float32)
+
+    # TIME: the time at each sample's lags and forecasts
+    if max_lags == 0:
+        inputs["time"] = df.at[origin_index, "t"]
+        inputs["time"] = np.expand_dims(inputs["time"], 0)
+        inputs["time"] = torch.tensor(inputs["time"], dtype=torch.float32)
+
+    else:
+        # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
+        # Note: df.loc is inclusive of slice end, while df.iloc is not.
+        inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
+        inputs["time"] = torch.as_tensor(inputs["time"], dtype=torch.float32)
+
+    # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
+    if n_lags >= 1 and "y_scaled" in df.columns:
+        # Note: df.loc is inclusive of slice end, while df.iloc is not.
+        inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values
+        inputs["lags"] = torch.as_tensor(inputs["lags"], dtype=torch.float32)
+
+    # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
+    if config_lagged_regressors is not None and max_lags > 0:
+        lagged_regressors = OrderedDict({})
+        # Future TODO: optimize this computation for many lagged_regressors
+        for lagged_reg in df.columns:
+            if lagged_reg in config_lagged_regressors:
+                covar_lags = config_lagged_regressors[lagged_reg].n_lags
+                assert covar_lags > 0
                 # Note: df.loc is inclusive of slice end, while df.iloc is not.
-                targets = df.loc[origin_index + 1 : origin_index + n_forecasts, "y_scaled"].values
-                targets = np.expand_dims(targets, 1)  # extra dimension at end for quantiles:median
-            targets = torch.as_tensor(targets, dtype=torch.float32)
-
-            ## Alternative 1
-            # targets = df.loc[:, "y_scaled"].iloc[origin_index + 1 : origin_index + 1 + n_forecasts].values
-            # targets = np.expand_dims(np.array(targets, dtype=np.float32), axis=0)
-            ## Alternative 2
-            # x = df["y_scaled"].values
-            # targets = np.array([x[origin_index + 1 : origin_index + 1 + n_forecasts]], dtype=x.dtype)
-            ## OLD
-            # # time is the time at each forecast step
-            # t = df.loc[:, "t"].values
-            # if max_lags == 0:
-            #     time = np.expand_dims(t, 1)
-            # else:
-            #     time = _stride_time_features_for_forecasts(t)
-            # inputs["time"] = time  # contains n_lags + n_forecasts
-            # def _stride_future_time_features_for_forecasts(x):
-            # return np.array([x[max_lags + i : max_lags + i + n_forecasts] for i in range(n_samples)], dtype=x.dtype)
-            # targets = _stride_future_time_features_for_forecasts(df["y_scaled"].values)
-
-        # TIME: the time at each sample's lags and forecasts
+                lagged_regressors[lagged_reg] = df.loc[origin_index - covar_lags + 1 : origin_index, lagged_reg].values
+                lagged_regressors[lagged_reg] = torch.as_tensor(lagged_regressors[lagged_reg], dtype=torch.float32)
+        inputs["covariates"] = lagged_regressors
+
+    # SEASONALITIES
+    # TODO: precompute and save fourier features and only tabularize / slide windows when calling __getitem__
+    if config_seasonality is not None:
+        seasonalities = OrderedDict({})
         if max_lags == 0:
-            inputs["time"] = df.at[origin_index, "t"]
-            inputs["time"] = np.expand_dims(inputs["time"], 0)
-            inputs["time"] = torch.tensor(inputs["time"], dtype=torch.float32)
-
+            dates = pd.Series(df.at[origin_index, "ds"])
         else:
-            # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
             # Note: df.loc is inclusive of slice end, while df.iloc is not.
-            inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
-            inputs["time"] = torch.as_tensor(inputs["time"], dtype=torch.float32)
-            ## OLD: Time
-            # def _stride_time_features_for_forecasts(x):
-            #     window_size = n_lags + n_forecasts
-            #     if x.ndim == 1:
-            #         shape = (n_samples, window_size)
-            #     else:
-            #         shape = (n_samples, window_size) + x.shape[1:]
-            #     stride = x.strides[0]
-            #     strides = (stride, stride) + x.strides[1:]
-            #     start_index = max_lags - n_lags
-            #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
-            # inputs["time"] = _stride_time_features_for_forecasts(df.loc[:, "t"].values)
-
-        # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
-        if n_lags >= 1 and "y_scaled" in df.columns:
-            # Note: df.loc is inclusive of slice end, while df.iloc is not.
-            # inputs["lags"] = np.array(df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values, dtype=np.float32)
-            inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values
-            inputs["lags"] = torch.as_tensor(inputs["lags"], dtype=torch.float32)
-            # OLD Lags
-            # def _stride_lagged_features(df_col_name, feature_dims):
-            #     # only for case where max_lags > 0
-            #     assert feature_dims >= 1
-            #     series = df.loc[:, df_col_name].values
-            #     # Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test
-            #     return np.array(
-            #         [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
-            #     )
-            # inputs["lags"] = _stride_lagged_features(df_col_name="y_scaled", feature_dims=n_lags)
-
-        # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
-        if config_lagged_regressors is not None and max_lags > 0:
-            lagged_regressors = OrderedDict({})
-            # Future TODO: optimize this computation for many lagged_regressors
-            for lagged_reg in df.columns:
-                if lagged_reg in config_lagged_regressors:
-                    covar_lags = config_lagged_regressors[lagged_reg].n_lags
-                    assert covar_lags > 0
-                    # Note: df.loc is inclusive of slice end, while df.iloc is not.
-                    lagged_regressors[lagged_reg] = df.loc[
-                        origin_index - covar_lags + 1 : origin_index, lagged_reg
-                    ].values
-                    lagged_regressors[lagged_reg] = torch.as_tensor(lagged_regressors[lagged_reg], dtype=torch.float32)
-            inputs["covariates"] = lagged_regressors
-            # OLD Covariates
-            # def _stride_lagged_features(df_col_name, feature_dims):
-            #     # only for case where max_lags > 0
-            #     assert feature_dims >= 1
-            #     series = df.loc[:, df_col_name].values
-            #     # Added dtype=np.float64 to solve the problem with np.isnan for ubuntu test
-            #     return np.array(
-            #         [series[i + max_lags - feature_dims : i + max_lags] for i in range(n_samples)], dtype=np.float32
-            #     )
-            # for covar in df.columns:
-            #     if covar in config_lagged_regressors:
-            #         assert config_lagged_regressors[covar].n_lags > 0
-            #         window = config_lagged_regressors[covar].n_lags
-            #         covariates[covar] = _stride_lagged_features(df_col_name=covar, feature_dims=window)
-            # inputs["covariates"] = covariates
-
-        # SEASONALITIES
-        if config_seasonality is not None:
-            # TODO: precompute and save fourier features and only tabularize / slide windows when calling __getitem__
-            seasonalities = OrderedDict({})
-            if max_lags == 0:
-                dates = pd.Series(df.at[origin_index, "ds"])
-            else:
-                # Note: df.loc is inclusive of slice end, while df.iloc is not.
-                dates = pd.Series(df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "ds"].values)
-            # Seasonality features
-            for name, period in config_seasonality.periods.items():
-                if period.resolution > 0:
-                    if config_seasonality.computation == "fourier":
-                        # Compute Fourier series components with the specified frequency and order.
-                        # convert to days since epoch
-                        t = np.array((dates - datetime(1900, 1, 1)).dt.total_seconds().astype(np.float32)) / (
-                            3600 * 24.0
-                        )
-                        # features: Matrix with dims (length len(dates), 2*resolution)
-                        features = np.column_stack(
-                            [np.sin((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
-                            + [np.cos((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
-                        )
-                        # Single nested loop version:
-                        # features = np.column_stack(
-                        #     [
-                        #         fun((2.0 * (i + 1) * np.pi * t / period.period))
-                        #         for i in range(period.resolution)
-                        #         for fun in (np.sin, np.cos)
-                        #     ]
-                        # )
-                    else:
-                        raise NotImplementedError
-                    if period.condition_name is not None:
-                        # multiply seasonality features with condition mask/values
-                        features = features * df[period.condition_name].values[:, np.newaxis]
-                    seasonalities[name] = torch.as_tensor(features, dtype=torch.float32)
-                    # TODO: Possibly need extra dim?
-                    # seasonalities[name] = np.expand_dims(seasonalities[name], 0)
-            inputs["seasonalities"] = seasonalities
-
-            ## OLD Seasonality
-            # def fourier_series_t(t, period, series_order):
-            #     """Provides Fourier series components with the specified frequency and order.
-            #     Note
-            #     ----
-            #     This function is identical to Meta AI's Prophet Library
-            #     Parameters
-            #     ----------
-            #         t : pd.Series, float
-            #             Containing time as floating point number of days
-            #         period : float
-            #             Number of days of the period
-            #         series_order : int
-            #             Number of fourier components
-            #     Returns
-            #     -------
-            #         np.array
-            #             Matrix with seasonality features
-            #     """
-            #     features = np.column_stack(
-            #         [fun((2.0 * (i + 1) * np.pi * t / period)) for i in range(series_order) for fun in (np.sin, np.cos)]
-            #     )
-            #     return features
-
-            # def fourier_series(dates, period, series_order):
-            #     """Provides Fourier series components with the specified frequency and order.
-            #     Note
-            #     ----
-            #     Identical to OG Prophet.
-            #     Parameters
-            #     ----------
-            #         dates : pd.Series
-            #             Containing time stamps
-            #         period : float
-            #             Number of days of the period
-            #         series_order : int
-            #             Number of fourier components
-            #     Returns
-            #     -------
-            #         np.array
-            #             Matrix with seasonality features
-            #     """
-            #     # convert to days since epoch
-            #     t = np.array((dates - datetime(1970, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
-            #     return fourier_series_t(t, period, series_order)
-
-            # def seasonal_features_from_dates(df, config_seasonality: configure.ConfigSeasonality):
-            #     """Dataframe with seasonality features.
-            #     Includes seasonality features
-            #     Parameters
-            #     ----------
-            #         df : pd.DataFrame
-            #             Dataframe with all values
-            #         config_seasonality : configure.ConfigSeasonality
-            #             Configuration for seasonalities
-            #     Returns
-            #     -------
-            #         OrderedDict
-            #             Dictionary with keys for each period name containing an np.array
-            #             with the respective regression features. each with dims: (len(dates), 2*fourier_order)
-            #     """
-            #     dates = df["ds"]
-            #     assert len(dates.shape) == 1
-            #     seasonalities = OrderedDict({})
-            #     # Seasonality features
-            #     for name, period in config_seasonality.periods.items():
-            #         if period.resolution > 0:
-            #             if config_seasonality.computation == "fourier":
-            #                 # features: Matrix with dims (length len(dates), 2*resolution)
-            #                 features = fourier_series(
-            #                     dates=dates,
-            #                     period=period.period,
-            #                     series_order=period.resolution,
-            #                 )
-            #             else:
-            #                 raise NotImplementedError
-            #             if period.condition_name is not None
-            #             # multiply seasonality features with condition mask/values:
-            #                 features = features * df[period.condition_name].values[:, np.newaxis]
-            #             seasonalities[name] = features
-            #     return seasonalities
-
-            # def _stride_time_features_for_seasonality(x):
-            #     window_size = n_lags + n_forecasts
-
-            #     if x.ndim == 1:
-            #         shape = (n_samples, window_size)
-            #     else:
-            #         shape = (n_samples, window_size) + x.shape[1:]
-
-            #     stride = x.strides[0]
-            #     strides = (stride, stride) + x.strides[1:]
-            #     start_index = max_lags - n_lags
-            #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
-
-            # seasonalities = seasonal_features_from_dates(df, config_seasonality)
-            # for name, features in seasonalities.items():
-            #     if max_lags == 0:
-            #         seasonalities[name] = np.expand_dims(features, axis=1)
-            #     else:
-            #         # stride into num_forecast at dim=1 for each sample, just like we did with time
-            #         seasonalities[name] = _stride_time_features_for_seasonality(features)
-            # inputs["seasonalities"] = seasonalities
-
-        # FUTURE REGRESSORS: get the future regressors features
-        # create numpy array of values of additive and multiplicative regressors, at correct indexes
-        # features dims: (n_samples/batch, n_forecasts, n_features/n_regressors)
-        any_future_regressors = 0 < len(self.additive_regressors_names + self.multiplicative_regressors_names)
-        if any_future_regressors:  # if config_regressors is not None:
-            regressors = OrderedDict({})
-            # regressors["additive"] = None
-            # regressors["multiplicative"] = None
-            if max_lags == 0:
-                if len(self.additive_regressors_names) > 0:
-                    regressors["additive"] = df.loc[origin_index, self.additive_regressors_names].values
-                    # regressors["additive"] = np.expand_dims(regressors["additive"], axis=0)
-                    regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
-                if len(self.multiplicative_regressors_names) > 0:
-                    regressors["multiplicative"] = df.loc[origin_index, self.multiplicative_regressors_names].values
-                    # regressors["multiplicative"] = np.expand_dims(regressors["multiplicative"], axis=0)
-                    regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)
-            else:
-                if len(self.additive_regressors_names) > 0:
-                    regressors["additive"] = df.loc[
-                        origin_index + 1 - n_lags : origin_index + n_forecasts, self.additive_regressors_names
-                    ].values
-                    # regressors["additive"] = np.expand_dims(regressors["additive"], axis=0)
-                    regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
-
-                    ## OLD
-                    # additive_regressor_feature_windows = []
-                    # # additive_regressor_feature_windows_lagged = []
-                    # for i in range(0, len(additive_regressors_names)):
-                    #     # stride into num_forecast at dim=1 for each sample, just like we did with time
-                    #     x = additive_regressors[:, i]
-                    #     window_size = n_lags + n_forecasts
-
-                    #     if x.ndim == 1:
-                    #         shape = (n_samples, window_size)
-                    #     else:
-                    #         shape = (n_samples, window_size) + x.shape[1:]
-
-                    #     stride = x.strides[0]
-                    #     strides = (stride, stride) + x.strides[1:]
-                    #     start_index = max_lags - n_lags
-                    #     stride = np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
-                    #     additive_regressor_feature_windows.append(stride)
-                    # additive_regressors = np.dstack(additive_regressor_feature_windows)
-                    # regressors["additive"] = additive_regressors
-
-                if len(self.multiplicative_regressors_names) > 0:
-                    regressors["multiplicative"] = df.loc[
-                        origin_index + 1 - n_lags : origin_index + n_forecasts, self.multiplicative_regressors_names
-                    ].values
-                    # regressors["multiplicative"] = np.expand_dims(regressors["multiplicative"], axis=0)
-                    regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)
-
-            inputs["regressors"] = regressors
-
-            ## OLD Future regressors
-            # additive_regressors, multiplicative_regressors = make_regressors_features(df, config_regressors)
-            # for max_lags == 0, see code before merge
-            # if max_lags > 0:
-            # def _stride_time_features_for_forecasts(x):additive_regressors
-            #     window_size = n_lags + n_forecasts
-
-            #     if x.ndim == 1:
-            #         shape = (n_samples, window_size)
-            #     else:
-            #         shape = (n_samples, window_size) + x.shape[1:]
-
-            #     stride = x.strides[0]
-            #     strides = (stride, stride) + x.strides[1:]
-            #     start_index = max_lags - n_lags
-            #     return np.lib.stride_tricks.as_strided(x[start_index:], shape=shape, strides=strides)
-            # if additive_regressors is not None:
-            #     additive_regressor_feature_windows = []
-            #     # additive_regressor_feature_windows_lagged = []
-            #     for i in range(0, additive_regressors.shape[1]):
-            #         # stride into num_forecast at dim=1 for each sample, just like we did with time
-            #         stride = _stride_time_features_for_forecasts(additive_regressors[:, i])
-            #         additive_regressor_feature_windows.append(stride)
-            #     additive_regressors = np.dstack(additive_regressor_feature_windows)
-            #     regressors["additive"] = additive_regressors
-
-            # if multiplicative_regressors is not None:
-            #     multiplicative_regressor_feature_windows = []
-            #     for i in range(0, multiplicative_regressors.shape[1]):
-            #         stride = _stride_time_features_for_forecasts(multiplicative_regressors[:, i])
-            #         multiplicative_regressor_feature_windows.append(stride)
-            #     multiplicative_regressors = np.dstack(multiplicative_regressor_feature_windows)
-            #     regressors["multiplicative"] = multiplicative_regressors
-            # inputs["regressors"] = regressors
-
-        # FUTURE EVENTS: get the events features
-        # create numpy array of values of additive and multiplicative events, at correct indexes
-        # features dims: (n_samples/batch, n_forecasts, n_features/n_events)
-        any_events = 0 < len(self.additive_event_and_holiday_names + self.multiplicative_event_and_holiday_names)
-        if any_events:
-            events = OrderedDict({})
-            # events["additive"] = None
-            # events["multiplicative"] = None
-            if max_lags == 0:
-                if len(self.additive_event_and_holiday_names) > 0:
-                    events["additive"] = df.loc[origin_index, self.additive_event_and_holiday_names].values
-                    # events["additive"] = np.expand_dims( events["additive"], axis=0)
-                    events["additive"] = torch.as_tensor(events["additive"], dtype=torch.float32)
-                if len(self.multiplicative_event_and_holiday_names) > 0:
-                    events["multiplicative"] = df.loc[origin_index, self.multiplicative_event_and_holiday_names].values
-                    # events["multiplicative"] = np.expand_dims(events["multiplicative"], axis=0)
-                    events["multiplicative"] = torch.as_tensor(events["multiplicative"], dtype=torch.float32)
-            else:
-                if len(self.additive_event_and_holiday_names) > 0:
-                    events["additive"] = df.loc[
-                        origin_index + 1 : origin_index + n_forecasts, self.additive_event_and_holiday_names
-                    ].values
-                    # events["additive"] = np.expand_dims(events["additive"], axis=0)
-                    events["additive"] = torch.as_tensor(events["additive"], dtype=torch.float32)
-
-                if len(self.multiplicative_event_and_holiday_names) > 0:
-                    events["multiplicative"] = df.loc[
-                        origin_index + 1 : origin_index + n_forecasts, self.multiplicative_event_and_holiday_names
-                    ].values
-                    # events["multiplicative"] = np.expand_dims(events["multiplicative"], axis=0)
-                    events["multiplicative"] = torch.as_tensor(events["multiplicative"], dtype=torch.float32)
-            inputs["events"] = events
-
-        ## OLD
-        # # get the events features
-        # if config_events is not None or config_country_holidays is not None:
-        #     additive_events, multiplicative_events = make_events_features(df, config_events, config_country_holidays)
-
-        #     events = OrderedDict({})
-        #     if max_lags == 0:
-        #         if additive_events is not None:
-        #             events["additive"] = np.expand_dims(additive_events, axis=1)
-        #         if multiplicative_events is not None:
-        #             events["multiplicative"] = np.expand_dims(multiplicative_events, axis=1)
-        #     else:
-        #         if additive_events is not None:
-        #             additive_event_feature_windows = []
-        #             for i in range(0, additive_events.shape[1]):
-        #                 # stride into num_forecast at dim=1 for each sample, just like we did with time
-        #                 additive_event_feature_windows.append(_stride_time_features_for_forecasts(additive_events[:, i]))
-        #             additive_events = np.dstack(additive_event_feature_windows)
-        #             events["additive"] = additive_events
-
-        #         if multiplicative_events is not None:
-        #             multiplicative_event_feature_windows = []
-        #             # multiplicative_event_feature_windows_lagged = []
-        #             for i in range(0, multiplicative_events.shape[1]):
-        #                 # stride into num_forecast at dim=1 for each sample, just like we did with time
-        #                 multiplicative_event_feature_windows.append(
-        #                     _stride_time_features_for_forecasts(multiplicative_events[:, i])
-        #                 )
-        #             multiplicative_events = np.dstack(multiplicative_event_feature_windows)
-        #             events["multiplicative"] = multiplicative_events
-        #     inputs["events"] = events
-
-        # ONLY FOR DEBUGGING
-        # tabularized_input_shapes_str = ""
-        # for key, value in inputs.items():
-        #     if key in [
-        #         "seasonalities",
-        #         "covariates",
-        #         "events",
-        #         "regressors",
-        #     ]:
-        #         for name, period_features in value.items():
-        #             tabularized_input_shapes_str += f"    {name} {key} {period_features}\n"
-        #     else:
-        #         tabularized_input_shapes_str += f"    {key} {value.shape} \n"
-        # log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
-
-        return inputs, targets
+            dates = pd.Series(df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "ds"].values)
+        # Seasonality features
+        for name, period in config_seasonality.periods.items():
+            if period.resolution > 0:
+                if config_seasonality.computation == "fourier":
+                    # Compute Fourier series components with the specified frequency and order.
+                    # convert to days since epoch
+                    t = np.array((dates - datetime(1900, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
+                    # features: Matrix with dims (length len(dates), 2*resolution)
+                    features = np.column_stack(
+                        [np.sin((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
+                        + [np.cos((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
+                    )
+                else:
+                    raise NotImplementedError
+                if period.condition_name is not None:
+                    # multiply seasonality features with condition mask/values
+                    features = features * df[period.condition_name].values[:, np.newaxis]
+                seasonalities[name] = torch.as_tensor(features, dtype=torch.float32)
+        inputs["seasonalities"] = seasonalities
+
+    # FUTURE REGRESSORS: get the future regressors features
+    # create numpy array of values of additive and multiplicative regressors, at correct indexes
+    # features dims: (n_samples/batch, n_forecasts, n_features/n_regressors)
+    any_future_regressors = 0 < len(additive_regressors_names + multiplicative_regressors_names)
+    if any_future_regressors:  # if config_regressors is not None:
+        regressors = OrderedDict({})
+        if max_lags == 0:
+            if len(additive_regressors_names) > 0:
+                regressors["additive"] = df.loc[origin_index, additive_regressors_names].values
+                regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
+            if len(multiplicative_regressors_names) > 0:
+                regressors["multiplicative"] = df.loc[origin_index, multiplicative_regressors_names].values
+                regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)
+        else:
+            if len(additive_regressors_names) > 0:
+                regressors["additive"] = df.loc[
+                    origin_index + 1 - n_lags : origin_index + n_forecasts, additive_regressors_names
+                ].values
+                regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
+
+            if len(multiplicative_regressors_names) > 0:
+                regressors["multiplicative"] = df.loc[
+                    origin_index + 1 - n_lags : origin_index + n_forecasts, multiplicative_regressors_names
+                ].values
+                regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)
+        inputs["regressors"] = regressors
+
+    # FUTURE EVENTS: get the events features
+    # create numpy array of values of additive and multiplicative events, at correct indexes
+    # features dims: (n_samples/batch, n_forecasts, n_features/n_events)
+    any_events = 0 < len(additive_event_and_holiday_names + multiplicative_event_and_holiday_names)
+    if any_events:
+        events = OrderedDict({})
+        if max_lags == 0:
+            if len(additive_event_and_holiday_names) > 0:
+                events["additive"] = df.loc[origin_index, additive_event_and_holiday_names].values
+                events["additive"] = torch.as_tensor(events["additive"], dtype=torch.float32)
+            if len(multiplicative_event_and_holiday_names) > 0:
+                events["multiplicative"] = df.loc[origin_index, multiplicative_event_and_holiday_names].values
+                events["multiplicative"] = torch.as_tensor(events["multiplicative"], dtype=torch.float32)
+        else:
+            if len(additive_event_and_holiday_names) > 0:
+                events["additive"] = df.loc[
+                    origin_index + 1 : origin_index + n_forecasts, additive_event_and_holiday_names
+                ].values
+                events["additive"] = torch.as_tensor(events["additive"], dtype=torch.float32)
+
+            if len(multiplicative_event_and_holiday_names) > 0:
+                events["multiplicative"] = df.loc[
+                    origin_index + 1 : origin_index + n_forecasts, multiplicative_event_and_holiday_names
+                ].values
+                events["multiplicative"] = torch.as_tensor(events["multiplicative"], dtype=torch.float32)
+        inputs["events"] = events
+
+    # ONLY FOR DEBUGGING
+    # tabularized_input_shapes_str = ""
+    # for key, value in inputs.items():
+    #     if key in [
+    #         "seasonalities",
+    #         "covariates",
+    #         "events",
+    #         "regressors",
+    #     ]:
+    #         for name, period_features in value.items():
+    #             tabularized_input_shapes_str += f"    {name} {key} {period_features}\n"
+    #     else:
+    #         tabularized_input_shapes_str += f"    {key} {value.shape} \n"
+    # log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
+    return inputs, targets
 
 
 class GlobalTimeDataset(TimeDataset):

From fba0d0db14fbce9f14c5b008ffadd522505ea36d Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 30 Jan 2024 14:10:46 -0800
Subject: [PATCH 052/128] fix bug

---
 neuralprophet/df_utils.py     | 15 +++++++--------
 neuralprophet/forecaster.py   |  4 +++-
 neuralprophet/time_dataset.py | 10 ++++++----
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/neuralprophet/df_utils.py b/neuralprophet/df_utils.py
index fcd12d1f4..7d569af98 100644
--- a/neuralprophet/df_utils.py
+++ b/neuralprophet/df_utils.py
@@ -88,15 +88,15 @@ def return_df_in_original_format(df, received_ID_col=False, received_single_time
     return new_df
 
 
-def get_max_num_lags(config_lagged_regressors: Optional[ConfigLaggedRegressors], n_lags: int) -> int:
+def get_max_num_lags(n_lags: int, config_lagged_regressors: Optional[ConfigLaggedRegressors]) -> int:
     """Get the greatest number of lags between the autoregression lags and the covariates lags.
 
     Parameters
     ----------
-        config_lagged_regressors : configure.ConfigLaggedRegressors
-            Configurations for lagged regressors
         n_lags : int
             number of lagged values of series to include as model inputs
+        config_lagged_regressors : configure.ConfigLaggedRegressors
+            Configurations for lagged regressors
 
     Returns
     -------
@@ -104,12 +104,11 @@ def get_max_num_lags(config_lagged_regressors: Optional[ConfigLaggedRegressors],
             Maximum number of lags between the autoregression lags and the covariates lags.
     """
     if config_lagged_regressors is not None:
-        log.debug("config_lagged_regressors exists")
-        max_n_lags = max([n_lags] + [val.n_lags for key, val in config_lagged_regressors.items()])
+        # log.debug("config_lagged_regressors exists")
+        return max([n_lags] + [val.n_lags for key, val in config_lagged_regressors.items()])
     else:
-        log.debug("config_lagged_regressors does not exist")
-        max_n_lags = n_lags
-    return max_n_lags
+        # log.debug("config_lagged_regressors does not exist")
+        return n_lags
 
 
 def merge_dataframes(df: pd.DataFrame) -> pd.DataFrame:
diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index 58137abe4..47897221f 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -951,7 +951,9 @@ def fit(
 
         if self.fitted is True and not continue_training:
             log.error("Model has already been fitted. Re-fitting may break or produce different results.")
-        self.max_lags = df_utils.get_max_num_lags(self.config_lagged_regressors, self.n_lags)
+        self.max_lags = df_utils.get_max_num_lags(
+            n_lags=self.n_lags, config_lagged_regressors=self.config_lagged_regressors
+        )
         if self.max_lags == 0 and self.n_forecasts > 1:
             self.n_forecasts = 1
             self.predict_steps = 1
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index d3dbffd6e..e9c299e47 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -48,10 +48,12 @@ def __init__(self, df, name, **kwargs):
         self.meta = OrderedDict({})
         self.meta["df_name"] = name
 
-        self.predict_mode = (kwargs["predict_mode"],)
-        self.n_lags = (kwargs["n_lags"],)
-        self.n_forecasts = (kwargs["n_forecasts"],)
-        self.max_lags = get_max_num_lags(kwargs["config_lagged_regressors"], self.n_lags)
+        self.predict_mode = kwargs["predict_mode"]
+        self.n_lags = kwargs["n_lags"]
+        self.n_forecasts = kwargs["n_forecasts"]
+        self.max_lags = get_max_num_lags(
+            n_lags=self.n_lags, config_lagged_regressors=kwargs["config_lagged_regressors"]
+        )
         self.config_args = kwargs
 
         self.two_level_inputs = [

From 3493d8abab63136f8eab1197021169ae51061ea2 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 30 Jan 2024 14:59:21 -0800
Subject: [PATCH 053/128] initial build of GlobalTimeDataset

---
 neuralprophet/data/process.py |  2 +-
 neuralprophet/time_dataset.py | 41 +++++++++++++++++++++++++++--------
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py
index 6899496fc..1b1be0b1c 100644
--- a/neuralprophet/data/process.py
+++ b/neuralprophet/data/process.py
@@ -500,7 +500,7 @@ def _handle_missing_data(
         df_grouped = df.groupby("ID").apply(lambda x: x.set_index("ds").resample(freq).asfreq()).drop(columns=["ID"])
         n_missing_dates = len(df_grouped) - len(df)
         if n_missing_dates > 0:
-            df = df_grouped.reset_index(drop=True)
+            df = df_grouped.reset_index()
             log.info(f"Added {n_missing_dates} missing dates.")
 
     if config_regressors is not None:
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index e9c299e47..f3d55308e 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -86,7 +86,7 @@ def __getitem__(self, index):
         Parameters
         ----------
             index : int
-                Sample location in dataset
+                Sample location in dataset, starting at 0, maximum at length-1
         Returns
         -------
         OrderedDict
@@ -471,22 +471,45 @@ def __init__(self, df, **kwargs):
             **kwargs : dict
                 Identical to :meth:`tabularize_univariate_datetime`
         """
-        df_names = list(np.unique(df.loc[:, "ID"].values))
-        if len(df_names) == 1:
-            super().__init__(df, df_names[0], **kwargs)
+        self.df_names = sorted(list(np.unique(df.loc[:, "ID"].values)))
+        if len(self.df_names) == 1:
+            super().__init__(df, self.df_names[0], **kwargs)
         else:
-            raise NotImplementedError
+            self.datasets = OrderedDict({})
+            for df_name in self.df_names:
+                self.datasets[df_name] = TimeDataset(df[df["ID"] == df_name], df_name, **kwargs)
+            self.length = sum(dataset.length for (name, dataset) in self.datasets.items())
+            self.global_sample_to_local_ID = np.full(shape=self.length, fill_value="__df__", dtype=str)
+            self.global_sample_to_local_sample = np.full(shape=self.length, fill_value=0, dtype=int)
+            global_position = 0
+            for name, dataset in self.datasets.items():
+                local_length = dataset.length
+                self.global_sample_to_local_ID[global_position : global_position + local_length] = name
+                self.global_sample_to_local_sample[global_position : global_position + local_length] = np.arange(
+                    local_length, dtype=int
+                )
+                global_position += local_length
+
+            # raise NotImplementedError
             # TODO: re-implement with JIT sample computation in TimeDatase
             # # TODO (future): vectorize
             # timedatasets = [TimeDataset(df_i, df_name, **kwargs) for df_name, df_i in df.groupby("ID")]
             # self.combined_timedataset = [item for timedataset in timedatasets for item in timedataset]
             # self.length = sum(timedataset.length for timedataset in timedatasets)
 
-    # def __len__(self):
-    #     return self.length
+    def __len__(self):
+        return self.length
 
-    # def __getitem__(self, idx):
-    #     return self.combined_timedataset[idx]
+    def __getitem__(self, idx):
+        """Overrides parent class method to get an item at index.
+        Parameters
+        ----------
+            index : int
+                Sample location in dataset, starting at 0
+        """
+        df_name = self.global_sample_to_local_ID[idx]
+        local_pos = self.global_sample_to_local_sample[idx]
+        return self.datasets[df_name].__getitem__(local_pos)
 
 
 def fourier_series(dates, period, series_order):

From dbec862a53cbae96f8dea29a5df069f750765419 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 30 Jan 2024 15:44:29 -0800
Subject: [PATCH 054/128] refactor TimeDataset not to use kwargs passthrough

---
 neuralprophet/forecaster.py   |   4 +-
 neuralprophet/time_dataset.py | 218 ++++++++++++++--------------------
 2 files changed, 92 insertions(+), 130 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index 47897221f..8af79b2f2 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -1770,8 +1770,8 @@ def predict_seasonal_components(self, df: pd.DataFrame, quantile: float = 0.5):
                 df_i,
                 name=df_name,
                 config_seasonality=self.config_seasonality,
-                # n_lags=0,
-                # n_forecasts=1,
+                n_lags=0,
+                n_forecasts=1,
                 predict_steps=self.predict_steps,
                 predict_mode=True,
                 config_missing=self.config_missing,
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index f3d55308e..afe354be6 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -18,7 +18,22 @@
 class TimeDataset(Dataset):
     """Create a PyTorch dataset of a tabularized time-series"""
 
-    def __init__(self, df, name, **kwargs):
+    def __init__(
+        self,
+        df,
+        name,
+        predict_mode,
+        n_lags,
+        n_forecasts,
+        prediction_frequency,
+        predict_steps,
+        config_seasonality,
+        config_events,
+        config_country_holidays,
+        config_regressors,
+        config_lagged_regressors,
+        config_missing,
+    ):
         """Initialize Timedataset from time-series df.
         Parameters
         ----------
@@ -48,13 +63,27 @@ def __init__(self, df, name, **kwargs):
         self.meta = OrderedDict({})
         self.meta["df_name"] = name
 
-        self.predict_mode = kwargs["predict_mode"]
-        self.n_lags = kwargs["n_lags"]
-        self.n_forecasts = kwargs["n_forecasts"]
-        self.max_lags = get_max_num_lags(
-            n_lags=self.n_lags, config_lagged_regressors=kwargs["config_lagged_regressors"]
-        )
-        self.config_args = kwargs
+        self.predict_mode = predict_mode
+        self.n_lags = n_lags
+        self.n_forecasts = n_forecasts
+        self.prediction_frequency = prediction_frequency
+        self.predict_steps = predict_steps
+        self.config_seasonality = config_seasonality
+        self.config_events = config_events
+        self.config_country_holidays = config_country_holidays
+        self.config_regressors = config_regressors
+        self.config_lagged_regressors = config_lagged_regressors
+        self.config_missing = config_missing
+
+        # self.config_args = kwargs
+        # self.predict_mode = kwargs["predict_mode"]
+        # self.n_lags = kwargs["n_lags"]
+        # self.n_forecasts = kwargs["n_forecasts"]
+        # self.config_events = kwargs["config_events"]
+        # self.config_country_holidays = kwargs["config_country_holidays"]
+        # self.config_lagged_regressors = kwargs["config_lagged_regressors"]
+
+        self.max_lags = get_max_num_lags(n_lags=self.n_lags, config_lagged_regressors=self.config_lagged_regressors)
 
         self.two_level_inputs = [
             "seasonalities",
@@ -70,12 +99,12 @@ def __init__(self, df, name, **kwargs):
             self.multiplicative_event_and_holiday_names,
         ) = add_event_features_to_df(
             self.df,
-            self.config_args["config_events"],
-            self.config_args["config_country_holidays"],
+            self.config_events,
+            self.config_country_holidays,
         )
         # pre-sort additive/multiplicative regressors
         self.additive_regressors_names, self.multiplicative_regressors_names = sort_regressor_names(
-            self.config_args["config_regressors"]
+            self.config_regressors
         )
 
         # Construct index map
@@ -123,8 +152,8 @@ def __getitem__(self, index):
             n_lags=self.n_lags,
             max_lags=self.max_lags,
             n_forecasts=self.n_forecasts,
-            config_seasonality=self.config_args["config_seasonality"],
-            config_lagged_regressors=self.config_args["config_lagged_regressors"],
+            config_seasonality=self.config_seasonality,
+            config_lagged_regressors=self.config_lagged_regressors,
             additive_event_and_holiday_names=self.additive_event_and_holiday_names,
             multiplicative_event_and_holiday_names=self.multiplicative_event_and_holiday_names,
             additive_regressors_names=self.additive_regressors_names,
@@ -149,7 +178,7 @@ def create_sample2index_map(self, df):
 
         # Limit target range due to input lags and number of forecasts
         df_length = len(df)
-        n_forecasts = self.config_args["n_forecasts"]
+        n_forecasts = selfn_forecasts
         origin_start_end_mask = create_origin_start_end_mask(
             df_length=df_length, max_lags=self.max_lags, n_forecasts=n_forecasts
         )
@@ -157,15 +186,13 @@ def create_sample2index_map(self, df):
         # Prediction Frequency
         # Filter missing samples and prediction frequency (does not actually drop, but creates indexmapping)
         # analogous to `self.filter_samples_after_init(self.kwargs["prediction_frequency"])`
-        prediction_frequency_mask = create_prediction_frequency_filter_mask(
-            df, self.config_args["prediction_frequency"]
-        )
+        prediction_frequency_mask = create_prediction_frequency_filter_mask(df, self.prediction_frequency)
 
         # TODO Create NAN-free index mapping of sample index to df index
         # analogous to `self.drop_nan_after_init(
         # self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
         nan_mask = create_nan_mask(
-            df, self.config_args["predict_steps"], self.config_args["config_missing"].drop_missing
+            df, self.predict_steps, self.config_missing.drop_missing
         )  # boolean array where NAN are False
 
         # Combine masks
@@ -181,64 +208,51 @@ def create_sample2index_map(self, df):
 
         return sample_index_2_df_origin_index, num_samples
 
-    # def format_sample(self, inputs, targets=None):
-    #     """Convert tabularized sample to correct formats.
-    #     Parameters
-    #     ----------
-    #         inputs : ordered dict
-    #             Identical to returns from :meth:`tabularize_univariate_datetime`
-    #         targets : np.array, float
-    #             Identical to returns from :meth:`tabularize_univariate_datetime`
-    #     """
-    #     sample_input = OrderedDict({})
-    #     sample_input["time"] = inputs["time"]
-    #     if "lags" in inputs.keys():
-    #         sample_input["lags"] = inputs["lags"]
-    #     inputs_dtype = {
-    #         # "time": torch.float,
-    #         # "timestamps": np.datetime64,
-    #         # "lags": torch.float,
-    #         "seasonalities": torch.float,
-    #         "events": torch.float,
-    #         "covariates": torch.float,
-    #         "regressors": torch.float,
-    #     }
-
-    #     for key, data in inputs.items():
-    #         if key in self.two_level_inputs:
-    #             sample_input[key] = OrderedDict({})
-    #             for name, features in data.items():
-    #                 if features.dtype != np.float32:
-    #                     features = features.astype(np.float32, copy=False)
-
-    #                 tensor = torch.from_numpy(features)
-
-    #                 if tensor.dtype != inputs_dtype[key]:
-    #                     sample_input[key][name] = tensor.to(
-    #                         dtype=inputs_dtype[key]
-    #                     )  # this can probably be removed, but was included in the previous code
-    #                 else:
-    #                     sample_input[key][name] = tensor
-
-    #         # No longer needed as - now directly casting to torch in tabularize
-    #         # else: # single_level items
-    #         #     sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
-    #         #     ## OLD
-    #         #     # if key == "timestamps": sample_input[key] = data
-    #         #     # else: sample_input[key] = torch.from_numpy(data).type(inputs_dtype[key])
-
-    #     # TODO Can this be skipped for a single sample?
-    #     # Alternatively, Can this be optimized?
-    #     # Split nested dict into list of dicts with same keys as sample_input.
-    #     # def split_dict(sample_input, index):
-    #     # return {k: v[index] if not isinstance(v, dict) else split_dict(v, index) for k, v in sample_input.items()}
-    #     # length = next(iter(sample_input.values())).shape[0]
-    #     # sample_input = [split_dict(sample_input, i) for i in range(length)]
-
-    #     ## timestamps should no longer be present here?
-    #     # sample_input.pop("timestamps") # Exact timestamps are not needed anymore
-
-    #     return sample_input, targets
+
+class GlobalTimeDataset(TimeDataset):
+    def __init__(self, df, **kwargs):
+        """Initialize Timedataset from time-series df.
+        Parameters
+        ----------
+            df : pd.DataFrame
+                dataframe containing column ``ds``, ``y``, and optionally``ID`` and
+                normalized columns normalized columns ``ds``, ``y``, ``t``, ``y_scaled``
+            **kwargs : dict
+                Identical to :meth:`tabularize_univariate_datetime`
+        """
+        self.df_names = sorted(list(np.unique(df.loc[:, "ID"].values)))
+        # if len(self.df_names) == 1:
+        #     super().__init__(df, self.df_names[0], **kwargs)
+        # else:
+        # raise NotImplementedError
+        # timedatasets = [TimeDataset(df_i, df_name, **kwargs) for df_name, df_i in df.groupby("ID")]
+        # self.combined_timedataset = [item for timedataset in timedatasets for item in timedataset]
+        # self.length = sum(timedataset.length for timedataset in timedatasets)
+        self.datasets = OrderedDict({})
+        for df_name in self.df_names:
+            self.datasets[df_name] = TimeDataset(df[df["ID"] == df_name], df_name, **kwargs)
+        self.length = sum(dataset.length for (name, dataset) in self.datasets.items())
+        global_sample_to_local_ID = []
+        global_sample_to_local_sample = []
+        for name, dataset in self.datasets.items():
+            global_sample_to_local_ID.append(np.full(shape=dataset.length, fill_value=name))
+            global_sample_to_local_sample.append(np.arange(dataset.length))
+        self.global_sample_to_local_ID = np.concatenate(global_sample_to_local_ID)
+        self.global_sample_to_local_sample = np.concatenate(global_sample_to_local_sample)
+
+    def __len__(self):
+        return self.length
+
+    def __getitem__(self, idx):
+        """Overrides parent class method to get an item at index.
+        Parameters
+        ----------
+            index : int
+                Sample location in dataset, starting at 0
+        """
+        df_name = self.global_sample_to_local_ID[idx]
+        local_pos = self.global_sample_to_local_sample[idx]
+        return self.datasets[df_name].__getitem__(local_pos)
 
 
 def tabularize_univariate_datetime_single_index(
@@ -460,58 +474,6 @@ def tabularize_univariate_datetime_single_index(
     return inputs, targets
 
 
-class GlobalTimeDataset(TimeDataset):
-    def __init__(self, df, **kwargs):
-        """Initialize Timedataset from time-series df.
-        Parameters
-        ----------
-            df : pd.DataFrame
-                dataframe containing column ``ds``, ``y``, and optionally``ID`` and
-                normalized columns normalized columns ``ds``, ``y``, ``t``, ``y_scaled``
-            **kwargs : dict
-                Identical to :meth:`tabularize_univariate_datetime`
-        """
-        self.df_names = sorted(list(np.unique(df.loc[:, "ID"].values)))
-        if len(self.df_names) == 1:
-            super().__init__(df, self.df_names[0], **kwargs)
-        else:
-            self.datasets = OrderedDict({})
-            for df_name in self.df_names:
-                self.datasets[df_name] = TimeDataset(df[df["ID"] == df_name], df_name, **kwargs)
-            self.length = sum(dataset.length for (name, dataset) in self.datasets.items())
-            self.global_sample_to_local_ID = np.full(shape=self.length, fill_value="__df__", dtype=str)
-            self.global_sample_to_local_sample = np.full(shape=self.length, fill_value=0, dtype=int)
-            global_position = 0
-            for name, dataset in self.datasets.items():
-                local_length = dataset.length
-                self.global_sample_to_local_ID[global_position : global_position + local_length] = name
-                self.global_sample_to_local_sample[global_position : global_position + local_length] = np.arange(
-                    local_length, dtype=int
-                )
-                global_position += local_length
-
-            # raise NotImplementedError
-            # TODO: re-implement with JIT sample computation in TimeDatase
-            # # TODO (future): vectorize
-            # timedatasets = [TimeDataset(df_i, df_name, **kwargs) for df_name, df_i in df.groupby("ID")]
-            # self.combined_timedataset = [item for timedataset in timedatasets for item in timedataset]
-            # self.length = sum(timedataset.length for timedataset in timedatasets)
-
-    def __len__(self):
-        return self.length
-
-    def __getitem__(self, idx):
-        """Overrides parent class method to get an item at index.
-        Parameters
-        ----------
-            index : int
-                Sample location in dataset, starting at 0
-        """
-        df_name = self.global_sample_to_local_ID[idx]
-        local_pos = self.global_sample_to_local_sample[idx]
-        return self.datasets[df_name].__getitem__(local_pos)
-
-
 def fourier_series(dates, period, series_order):
     """Provides Fourier series components with the specified frequency and order.
     Note

From 254cb236ddf7e855e422ecd47aeecf97439231f7 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 30 Jan 2024 15:52:57 -0800
Subject: [PATCH 055/128] debugged seasonal components call of TimeDataset

---
 neuralprophet/data/process.py |  6 +++---
 neuralprophet/forecaster.py   | 14 +++++++++-----
 neuralprophet/time_dataset.py |  3 +--
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py
index 1b1be0b1c..85e59d0ab 100644
--- a/neuralprophet/data/process.py
+++ b/neuralprophet/data/process.py
@@ -620,13 +620,13 @@ def _create_dataset(model, df, predict_mode, prediction_frequency=None):
         predict_mode=predict_mode,
         n_lags=model.n_lags,
         n_forecasts=model.n_forecasts,
+        prediction_frequency=prediction_frequency,
         predict_steps=model.predict_steps,
         config_seasonality=model.config_seasonality,
         config_events=model.config_events,
         config_country_holidays=model.config_country_holidays,
-        config_lagged_regressors=model.config_lagged_regressors,
         config_regressors=model.config_regressors,
+        config_lagged_regressors=model.config_lagged_regressors,
         config_missing=model.config_missing,
-        prediction_frequency=prediction_frequency,
-        config_train=model.config_train,
+        # config_train=model.config_train, # no longer needed since JIT tabularization.
     )
diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index 8af79b2f2..4193a9ccc 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -1767,16 +1767,20 @@ def predict_seasonal_components(self, df: pd.DataFrame, quantile: float = 0.5):
         df_seasonal = pd.DataFrame()
         for df_name, df_i in df.groupby("ID"):
             dataset = time_dataset.TimeDataset(
-                df_i,
+                df=df_i,
                 name=df_name,
-                config_seasonality=self.config_seasonality,
+                predict_mode=True,
                 n_lags=0,
                 n_forecasts=1,
+                prediction_frequency=self.prediction_frequency,
                 predict_steps=self.predict_steps,
-                predict_mode=True,
+                config_seasonality=self.config_seasonality,
+                config_events=self.config_events,
+                config_country_holidays=self.config_country_holidays,
+                config_regressors=self.config_regressors,
+                config_lagged_regressors=self.config_lagged_regressors,
                 config_missing=self.config_missing,
-                prediction_frequency=self.prediction_frequency,
-                config_train=self.config_train,
+                # config_train=self.config_train, # no longer needed since JIT tabularization.
             )
             loader = DataLoader(dataset, batch_size=min(4096, len(df)), shuffle=False, drop_last=False)
             predicted = {}
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index afe354be6..2955be88e 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -178,9 +178,8 @@ def create_sample2index_map(self, df):
 
         # Limit target range due to input lags and number of forecasts
         df_length = len(df)
-        n_forecasts = selfn_forecasts
         origin_start_end_mask = create_origin_start_end_mask(
-            df_length=df_length, max_lags=self.max_lags, n_forecasts=n_forecasts
+            df_length=df_length, max_lags=self.max_lags, n_forecasts=self.n_forecasts
         )
 
         # Prediction Frequency

From 1b6940a4b81070ff8ff502cfa0c532c101ef5be1 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 30 Jan 2024 16:17:12 -0800
Subject: [PATCH 056/128] fix numpy object type error

---
 neuralprophet/time_dataset.py | 38 ++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 2955be88e..dfe5e18cc 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -320,6 +320,8 @@ def tabularize_univariate_datetime_single_index(
         np.array, float
             Targets to be predicted of same length as each of the model inputs, dims: (n_forecasts, 1)
     """
+    # TODO: pre-process al type conversions (e.g. torch.float32) in __init__
+
     # sample features are stored and returned in OrderedDict
     inputs = OrderedDict({})
 
@@ -411,22 +413,31 @@ def tabularize_univariate_datetime_single_index(
         if max_lags == 0:
             if len(additive_regressors_names) > 0:
                 regressors["additive"] = df.loc[origin_index, additive_regressors_names].values
-                regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
+                regressors["additive"] = torch.as_tensor(
+                    np.array(regressors["additive"], dtype=np.float32), dtype=torch.float32
+                )
             if len(multiplicative_regressors_names) > 0:
                 regressors["multiplicative"] = df.loc[origin_index, multiplicative_regressors_names].values
-                regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)
+                regressors["multiplicative"] = torch.as_tensor(
+                    np.array(regressors["multiplicative"], dtype=np.float32), dtype=torch.float32
+                )
         else:
             if len(additive_regressors_names) > 0:
                 regressors["additive"] = df.loc[
                     origin_index + 1 - n_lags : origin_index + n_forecasts, additive_regressors_names
                 ].values
-                regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
-
+                # regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
+                regressors["additive"] = torch.as_tensor(
+                    np.array(regressors["additive"], dtype=np.float32), dtype=torch.float32
+                )
             if len(multiplicative_regressors_names) > 0:
                 regressors["multiplicative"] = df.loc[
                     origin_index + 1 - n_lags : origin_index + n_forecasts, multiplicative_regressors_names
                 ].values
-                regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)
+                # regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)
+                regressors["multiplicative"] = torch.as_tensor(
+                    np.array(regressors["multiplicative"], dtype=np.float32), dtype=torch.float32
+                )
         inputs["regressors"] = regressors
 
     # FUTURE EVENTS: get the events features
@@ -438,22 +449,29 @@ def tabularize_univariate_datetime_single_index(
         if max_lags == 0:
             if len(additive_event_and_holiday_names) > 0:
                 events["additive"] = df.loc[origin_index, additive_event_and_holiday_names].values
-                events["additive"] = torch.as_tensor(events["additive"], dtype=torch.float32)
+                events["additive"] = torch.as_tensor(
+                    np.array(events["additive"], dtype=np.float32), dtype=torch.float32
+                )
             if len(multiplicative_event_and_holiday_names) > 0:
                 events["multiplicative"] = df.loc[origin_index, multiplicative_event_and_holiday_names].values
-                events["multiplicative"] = torch.as_tensor(events["multiplicative"], dtype=torch.float32)
-        else:
+                events["multiplicative"] = torch.as_tensor(
+                    np.array(events["multiplicative"], dtype=np.float32), dtype=torch.float32
+                )
             if len(additive_event_and_holiday_names) > 0:
                 events["additive"] = df.loc[
                     origin_index + 1 : origin_index + n_forecasts, additive_event_and_holiday_names
                 ].values
-                events["additive"] = torch.as_tensor(events["additive"], dtype=torch.float32)
+                events["additive"] = torch.as_tensor(
+                    np.array(events["additive"], dtype=np.float32), dtype=torch.float32
+                )
 
             if len(multiplicative_event_and_holiday_names) > 0:
                 events["multiplicative"] = df.loc[
                     origin_index + 1 : origin_index + n_forecasts, multiplicative_event_and_holiday_names
                 ].values
-                events["multiplicative"] = torch.as_tensor(events["multiplicative"], dtype=torch.float32)
+                events["multiplicative"] = torch.as_tensor(
+                    np.array(events["multiplicative"], dtype=np.float32), dtype=torch.float32
+                )
         inputs["events"] = events
 
     # ONLY FOR DEBUGGING

From edec3443a01c79de839a08c9aa0f78924ad4c771 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 31 Jan 2024 15:30:52 -0800
Subject: [PATCH 057/128] fix seasonality condition bugs

---
 neuralprophet/time_dataset.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index dfe5e18cc..9e6195239 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -393,14 +393,20 @@ def tabularize_univariate_datetime_single_index(
                     t = np.array((dates - datetime(1900, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
                     # features: Matrix with dims (length len(dates), 2*resolution)
                     features = np.column_stack(
-                        [np.sin((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
-                        + [np.cos((2.0 * (i + 1) * np.pi * t / period.period)) for i in range(period.resolution)]
+                        [np.sin(2.0 * (i + 1) * np.pi * t / period.period) for i in range(period.resolution)]
+                        + [np.cos(2.0 * (i + 1) * np.pi * t / period.period) for i in range(period.resolution)]
                     )
                 else:
                     raise NotImplementedError
                 if period.condition_name is not None:
                     # multiply seasonality features with condition mask/values
-                    features = features * df[period.condition_name].values[:, np.newaxis]
+                    if max_lags == 0:
+                        condition_values = pd.Series(df.at[origin_index, period.condition_name]).values[:, np.newaxis]
+                    else:
+                        condition_values = df.loc[
+                            origin_index - n_lags + 1 : origin_index + n_forecasts, period.condition_name
+                        ].values[:, np.newaxis]
+                    features = features * condition_values
                 seasonalities[name] = torch.as_tensor(features, dtype=torch.float32)
         inputs["seasonalities"] = seasonalities
 

From 5eef5f9c266b6f9817a1f8430ba7cb1488954b7c Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 31 Jan 2024 16:39:06 -0800
Subject: [PATCH 058/128] fix events and future regressor cases

---
 .../components/future_regressors/linear.py    |  6 ++-
 neuralprophet/time_dataset.py                 | 53 +++++++++----------
 neuralprophet/time_net.py                     | 10 ++--
 3 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/neuralprophet/components/future_regressors/linear.py b/neuralprophet/components/future_regressors/linear.py
index e8434384c..7b7685b83 100644
--- a/neuralprophet/components/future_regressors/linear.py
+++ b/neuralprophet/components/future_regressors/linear.py
@@ -51,8 +51,10 @@ def scalar_features_effects(self, features, params, indices=None):
         if indices is not None:
             features = features[:, :, indices]
             params = params[:, indices]
-
-        return torch.sum(features.unsqueeze(dim=2) * params.unsqueeze(dim=0).unsqueeze(dim=0), dim=-1)
+        # features dims: (batch, n_forecasts, n_features)  -> (batch, n_forecasts, 1, n_features)
+        # params dims: (n_quantiles, n_features) -> (batch, 1, n_quantiles, n_features)
+        out = torch.sum(features.unsqueeze(dim=2) * params.unsqueeze(dim=0).unsqueeze(dim=0), dim=-1)
+        return out  # dims (batch, n_forecasts, n_quantiles)
 
     def get_reg_weights(self, name):
         """
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 9e6195239..e90f4f764 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -412,72 +412,69 @@ def tabularize_univariate_datetime_single_index(
 
     # FUTURE REGRESSORS: get the future regressors features
     # create numpy array of values of additive and multiplicative regressors, at correct indexes
-    # features dims: (n_samples/batch, n_forecasts, n_features/n_regressors)
+    # features dims: (n_forecasts, n_features)
     any_future_regressors = 0 < len(additive_regressors_names + multiplicative_regressors_names)
     if any_future_regressors:  # if config_regressors is not None:
         regressors = OrderedDict({})
         if max_lags == 0:
             if len(additive_regressors_names) > 0:
-                regressors["additive"] = df.loc[origin_index, additive_regressors_names].values
+                features = df.loc[origin_index, additive_regressors_names].values
                 regressors["additive"] = torch.as_tensor(
-                    np.array(regressors["additive"], dtype=np.float32), dtype=torch.float32
+                    np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
                 )
             if len(multiplicative_regressors_names) > 0:
-                regressors["multiplicative"] = df.loc[origin_index, multiplicative_regressors_names].values
+                features = df.loc[origin_index, multiplicative_regressors_names].values
                 regressors["multiplicative"] = torch.as_tensor(
-                    np.array(regressors["multiplicative"], dtype=np.float32), dtype=torch.float32
+                    np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
                 )
         else:
             if len(additive_regressors_names) > 0:
-                regressors["additive"] = df.loc[
+                features = df.loc[
                     origin_index + 1 - n_lags : origin_index + n_forecasts, additive_regressors_names
                 ].values
-                # regressors["additive"] = torch.as_tensor(regressors["additive"], dtype=torch.float32)
-                regressors["additive"] = torch.as_tensor(
-                    np.array(regressors["additive"], dtype=np.float32), dtype=torch.float32
-                )
+                # regressors["additive"] = torch.as_tensor(features, dtype=torch.float32)
+                regressors["additive"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
             if len(multiplicative_regressors_names) > 0:
-                regressors["multiplicative"] = df.loc[
+                features = df.loc[
                     origin_index + 1 - n_lags : origin_index + n_forecasts, multiplicative_regressors_names
                 ].values
-                # regressors["multiplicative"] = torch.as_tensor(regressors["multiplicative"], dtype=torch.float32)
+                # regressors["multiplicative"] = torch.as_tensor(features, dtype=torch.float32)
                 regressors["multiplicative"] = torch.as_tensor(
-                    np.array(regressors["multiplicative"], dtype=np.float32), dtype=torch.float32
+                    np.array(features, dtype=np.float32), dtype=torch.float32
                 )
         inputs["regressors"] = regressors
 
     # FUTURE EVENTS: get the events features
     # create numpy array of values of additive and multiplicative events, at correct indexes
-    # features dims: (n_samples/batch, n_forecasts, n_features/n_events)
+    # features dims: (n_forecasts, n_features)
     any_events = 0 < len(additive_event_and_holiday_names + multiplicative_event_and_holiday_names)
     if any_events:
         events = OrderedDict({})
         if max_lags == 0:
+            # forecasts are at origin_index
             if len(additive_event_and_holiday_names) > 0:
-                events["additive"] = df.loc[origin_index, additive_event_and_holiday_names].values
+                features = df.loc[origin_index, additive_event_and_holiday_names].values
                 events["additive"] = torch.as_tensor(
-                    np.array(events["additive"], dtype=np.float32), dtype=torch.float32
+                    np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
                 )
             if len(multiplicative_event_and_holiday_names) > 0:
-                events["multiplicative"] = df.loc[origin_index, multiplicative_event_and_holiday_names].values
+                features = df.loc[origin_index, multiplicative_event_and_holiday_names].values
                 events["multiplicative"] = torch.as_tensor(
-                    np.array(events["multiplicative"], dtype=np.float32), dtype=torch.float32
+                    np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
                 )
+        else:
+            # forecasts are at origin_index + 1 up to origin_index + n_forecasts
             if len(additive_event_and_holiday_names) > 0:
-                events["additive"] = df.loc[
-                    origin_index + 1 : origin_index + n_forecasts, additive_event_and_holiday_names
+                features = df.loc[
+                    origin_index + 1 - n_lags : origin_index + n_forecasts, additive_event_and_holiday_names
                 ].values
-                events["additive"] = torch.as_tensor(
-                    np.array(events["additive"], dtype=np.float32), dtype=torch.float32
-                )
+                events["additive"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
 
             if len(multiplicative_event_and_holiday_names) > 0:
-                events["multiplicative"] = df.loc[
-                    origin_index + 1 : origin_index + n_forecasts, multiplicative_event_and_holiday_names
+                features = df.loc[
+                    origin_index + 1 - n_lags : origin_index + n_forecasts, multiplicative_event_and_holiday_names
                 ].values
-                events["multiplicative"] = torch.as_tensor(
-                    np.array(events["multiplicative"], dtype=np.float32), dtype=torch.float32
-                )
+                events["multiplicative"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
         inputs["events"] = events
 
     # ONLY FOR DEBUGGING
diff --git a/neuralprophet/time_net.py b/neuralprophet/time_net.py
index f2fcbeb80..8674a6482 100644
--- a/neuralprophet/time_net.py
+++ b/neuralprophet/time_net.py
@@ -442,19 +442,21 @@ def scalar_features_effects(self, features: torch.Tensor, params: nn.Parameter,
                 Features (either additive or multiplicative) related to event component dims (batch, n_forecasts,
                 n_features)
             params : nn.Parameter
-                Params (either additive or multiplicative) related to events
+                Params (either additive or multiplicative) related to events dims (n_quantiles, n_features)
             indices : list of int
                 Indices in the feature tensors related to a particular event
         Returns
         -------
             torch.Tensor
-                Forecast component of dims (batch, n_forecasts)
+                Forecast component of dims (batch, n_forecasts, n_quantiles)
         """
         if indices is not None:
             features = features[:, :, indices]
             params = params[:, indices]
-
-        return torch.sum(features.unsqueeze(dim=2) * params.unsqueeze(dim=0).unsqueeze(dim=0), dim=-1)
+        # features dims: (batch, n_forecasts, n_features)  -> (batch, n_forecasts, 1, n_features)
+        # params dims: (n_quantiles, n_features) -> (batch, 1, n_quantiles, n_features)
+        out = torch.sum(features.unsqueeze(dim=2) * params.unsqueeze(dim=0).unsqueeze(dim=0), dim=-1)
+        return out  # dims (batch, n_forecasts, n_quantiles)
 
     def auto_regression(self, lags: Union[torch.Tensor, float]) -> torch.Tensor:
         """Computes auto-regessive model component AR-Net.

From f88e55014293b2ed50d456430cb07e6d1d9fc286 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 31 Jan 2024 16:53:07 -0800
Subject: [PATCH 059/128] fixing prediction frequency filter

---
 neuralprophet/time_dataset.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index e90f4f764..72c892ab3 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -686,9 +686,9 @@ def add_event_features_to_df(
                 holiday_offset_name = utils.create_event_names_for_offsets(holiday, offset)
                 df[holiday_offset_name] = feature.shift(periods=offset, fill_value=0.0)
                 if mode == "additive":
-                    additive_holiday_names.append(event_offset_name)
+                    additive_holiday_names.append(holiday_offset_name)
                 else:
-                    multiplicative_holiday_names.append(event_offset_name)
+                    multiplicative_holiday_names.append(holiday_offset_name)
     # Future TODO: possibly undo merge of events and holidays.
     additive_event_and_holiday_names = sorted(additive_events_names + additive_holiday_names)
     multiplicative_event_and_holiday_names = sorted(multiplicative_events_names + multiplicative_holiday_names)
@@ -877,15 +877,15 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
     filter_masks = []
     for key, value in prediction_frequency.items():
         if key == "daily-hour":
-            mask = timestamps.hour == value
+            mask = timestamps.dt.hour == value
         elif key == "weekly-day":
-            mask = timestamps.dayofweek == value
+            mask = timestamps.dt.dayofweek == value
         elif key == "monthly-day":
-            mask = timestamps.day == value
+            mask = timestamps.dt.day == value
         elif key == "yearly-month":
-            mask = timestamps.month == value
+            mask = timestamps.dt.month == value
         elif key == "hourly-minute":
-            mask = timestamps.minute == value
+            mask = timestamps.dt.minute == value
         else:
             raise ValueError(f"Invalid prediction frequency: {key}")
         filter_masks.append(mask)

From 61aad2a0c2e36e32322d6eb8dd4ff7ffcb6265d4 Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Thu, 1 Feb 2024 17:14:19 -0800
Subject: [PATCH 060/128] performance_test_energy

---
 tests/test_model_performance.py | 92 +++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index 3c097d2a3..f6c30cc5a 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -5,6 +5,7 @@
 import os
 import pathlib
 import time
+import torch
 
 import numpy as np
 import pandas as pd
@@ -230,3 +231,94 @@ def test_EnergyPriceDaily():
         json.dump(accuracy_metrics, outfile)
 
     create_metrics_plot(metrics).write_image(os.path.join(DIR, "tests", "metrics", "EnergyPriceDaily.svg"))
+
+
+def test_EnergyPerformance():
+    ### Temporary Test for on-the-fly sampling - very time consuming!
+
+    df = pd.read_csv(ENERGY_PRICE_DAILY_FILE)
+    df = df[df["ds"] < "2018-01-01"]
+    df["temp"] = df["temperature"]
+    df["ds"] = pd.to_datetime(df["ds"])
+    df["y"] = pd.to_numeric(df["y"], errors="coerce")
+    df["ID"] = "test"
+
+    # Conditional Seasonality
+    df["winter"] = np.where(
+        df["ds"].dt.month.isin(
+            [
+                10,
+                11,
+                12,
+                1,
+                2,
+                3,
+            ]
+        ),
+        1,
+        0,
+    )
+    df["summer"] = np.where(df["ds"].dt.month.isin([4, 5, 6, 7, 8, 9]), 1, 0)
+    df["winter"] = pd.to_numeric(df["winter"], errors="coerce")
+    df["summer"] = pd.to_numeric(df["summer"], errors="coerce")
+
+    # Normalize Temperature
+    df["temp"] = (df["temp"] - 65.0) / 50.0
+
+    # df
+    df = df[["ID", "ds", "y", "temp", "winter", "summer"]]
+
+    # Hyperparameter
+    tuned_params = {
+        "n_lags": 24 * 15,
+        "newer_samples_weight": 2.0,
+        "n_changepoints": 0,
+        "yearly_seasonality": 10,
+        "weekly_seasonality": True,
+        "daily_seasonality": False,  # due to conditional daily seasonality
+        "batch_size": 128,
+        "ar_layers": [32, 64, 32, 16],
+        "lagged_reg_layers": [32, 32],
+        # not tuned
+        "n_forecasts": 33,
+        "learning_rate": 0.001,
+        "epochs": 30,
+        "trend_global_local": "global",
+        "season_global_local": "global",
+        "drop_missing": True,
+        "normalize": "standardize",
+    }
+
+    # Uncertainty Quantification
+    confidence_lv = 0.98
+    quantile_list = [round(((1 - confidence_lv) / 2), 2), round((confidence_lv + (1 - confidence_lv) / 2), 2)]
+
+    # Check if GPU is available
+    use_gpu = torch.cuda.is_available()
+
+    # Set trainer configuration
+    trainer_configs = {
+        "accelerator": "gpu" if use_gpu else "cpu",
+    }
+    print(f"Using {'GPU' if use_gpu else 'CPU'}")
+
+    # Model
+    m = NeuralProphet(**tuned_params, **trainer_configs, quantiles=quantile_list)
+
+    # Lagged Regressor
+    m.add_lagged_regressor(names="temp", n_lags=33, normalize="standardize")
+
+    # Conditional Seasonality
+    m.add_seasonality(name="winter", period=1, fourier_order=6, condition_name="winter")
+    m.add_seasonality(name="summer", period=1, fourier_order=6, condition_name="summer")
+
+    # Holidays
+    m.add_country_holidays(country_name="US", lower_window=-1, upper_window=1)
+
+    # Split
+    df_train = df[df["ds"] < "2016-05-01"]
+    df_test = df[df["ds"] >= "2016-05-01"]
+
+    # Training & Predict
+    _ = m.fit(df=df_train, freq="H", num_workers=4, early_stopping=True)
+    _ = m.predict(df_test)

From 661b5b754cdba7e2a84be343f87ce50cc20db0ac Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 1 Feb 2024 17:41:45 -0800
Subject: [PATCH 061/128] debug events

---
 docs/source/code/forecaster.rst               |   2 +-
 docs/source/code/hdays_utils.rst              |   5 -
 neuralprophet/configure.py                    |   3 +-
 neuralprophet/event_utils.py                  |  90 +++++++++++
 neuralprophet/hdays_utils.py                  |  31 ----
 neuralprophet/time_dataset.py                 | 141 +++++++-----------
 neuralprophet/utils.py                        |  36 -----
 pyproject.toml                                |   2 +-
 ...est_hdays_utils.py => test_event_utils.py} |   8 +-
 tests/test_unit.py                            |  34 ++---
 10 files changed, 171 insertions(+), 181 deletions(-)
 delete mode 100644 docs/source/code/hdays_utils.rst
 create mode 100644 neuralprophet/event_utils.py
 delete mode 100644 neuralprophet/hdays_utils.py
 rename tests/{test_hdays_utils.py => test_event_utils.py} (62%)

diff --git a/docs/source/code/forecaster.rst b/docs/source/code/forecaster.rst
index 26eb0b12e..d48d700f7 100644
--- a/docs/source/code/forecaster.rst
+++ b/docs/source/code/forecaster.rst
@@ -7,7 +7,7 @@ Core Module Documentation
 
    configure.py <configure>
    df_utils.py <df_utils>
-   hdays_utils.py <hdays_utils>
+   event_utils.py <event_utils>
    plot_forecast_plotly.py <plot_forecast_plotly>
    plot_forecast_matplotlib.py <plot_forecast_matplotlib>
    plot_model_parameters_plotly.py <plot_model_parameters_plotly>
diff --git a/docs/source/code/hdays_utils.rst b/docs/source/code/hdays_utils.rst
deleted file mode 100644
index 0b2c83a12..000000000
--- a/docs/source/code/hdays_utils.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-Core Module Documentation
-==========================
-
-.. automodule:: neuralprophet.hdays_utils
-   :members:
\ No newline at end of file
diff --git a/neuralprophet/configure.py b/neuralprophet/configure.py
index 0c9c6458e..52b8b3f0a 100644
--- a/neuralprophet/configure.py
+++ b/neuralprophet/configure.py
@@ -15,6 +15,7 @@
 
 from neuralprophet import df_utils, np_types, utils, utils_torch
 from neuralprophet.custom_loss_metrics import PinballLoss
+from neuralprophet.event_utils import get_holiday_names
 
 log = logging.getLogger("NP.config")
 
@@ -429,7 +430,7 @@ class Holidays:
     holiday_names: set = field(init=False)
 
     def init_holidays(self, df=None):
-        self.holiday_names = utils.get_holidays_from_country(self.country, df)
+        self.holiday_names = get_holiday_names(self.country, df)
 
 
 ConfigCountryHolidays = Holidays
diff --git a/neuralprophet/event_utils.py b/neuralprophet/event_utils.py
new file mode 100644
index 000000000..1633cc16c
--- /dev/null
+++ b/neuralprophet/event_utils.py
@@ -0,0 +1,90 @@
+from collections import defaultdict
+from typing import Iterable, Optional, Union
+
+import numpy as np
+import pandas as pd
+from holidays import country_holidays
+
+# def get_country_holidays(country: str, years: Optional[Union[int, Iterable[int]]] = None):
+#     """
+#     Helper function to get holidays for a country.
+
+#     Parameters
+#     ----------
+#         country : str
+#             Country name to retrieve country specific holidays
+#         years : int, list
+#             Year or list of years to retrieve holidays for
+
+#     Returns
+#     -------
+#         set
+#             All possible holiday dates and names of given country
+
+#     """
+#     # For compatibility with Turkey as "TU" cases.
+#     country = "TUR" if country == "TU" else country
+#     holiday_dict = country_holidays(country=country, years=years, expand=True, observed=False)
+#     return holiday_dict
+
+
+def get_holiday_names(country: Union[str, Iterable[str]], df=None):
+    """
+    Return all possible holiday names for a list of countries over time period in df
+
+    Parameters
+    ----------
+        country : str, list
+            List of country names to retrieve country specific holidays
+        df : pd.Dataframe
+            Dataframe from which datestamps will be retrieved from
+
+    Returns
+    -------
+        set
+            All possible holiday names of given country
+    """
+    if df is None:
+        years = np.arange(1995, 2045)
+    else:
+        dates = df["ds"].copy(deep=True)
+        years = pd.unique(dates.apply(lambda x: x.year))
+        # years = list({x.year for x in dates})
+    # support multiple countries, convert to list if not already
+    if isinstance(country, str):
+        country = [country]
+
+    all_holidays = get_all_holidays(years=years, country=country)
+    return set(all_holidays.keys())
+
+
+def get_all_holidays(years, country):
+    """
+    Make dataframe of country specific holidays for given years and countries
+    Parameters
+    ----------
+        year_list : list
+            List of years
+        country : str, list
+            List of country names
+    Returns
+    -------
+        pd.DataFrame
+            Containing country specific holidays df with columns 'ds' and 'holiday'
+    """
+    # convert to list if not already
+    if isinstance(country, str):
+        country = [country]
+    all_holidays = defaultdict(list)
+    # iterate over countries and get holidays for each country
+    for single_country in country:
+        # For compatibility with Turkey as "TU" cases.
+        single_country = "TUR" if single_country == "TU" else single_country
+        # get dict of dates and their holiday name
+        single_country_specific_holidays = country_holidays(
+            country=single_country, years=years, expand=True, observed=False
+        )
+        # invert order - for given holiday, store list of dates
+        for date, name in single_country_specific_holidays.items():
+            all_holidays[name].append(pd.to_datetime(date))
+    return all_holidays
diff --git a/neuralprophet/hdays_utils.py b/neuralprophet/hdays_utils.py
deleted file mode 100644
index f827b9237..000000000
--- a/neuralprophet/hdays_utils.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from typing import Iterable, Optional, Union
-
-import holidays
-
-
-def get_country_holidays(country: str, years: Optional[Union[int, Iterable[int]]] = None):
-    """
-    Helper function to get holidays for a country.
-
-    Parameters
-    ----------
-        country : str
-            Country name to retrieve country specific holidays
-        years : int, list
-            Year or list of years to retrieve holidays for
-
-    Returns
-    -------
-        set
-            All possible holiday dates and names of given country
-
-    """
-    substitutions = {
-        "TU": "TR",  # For compatibility with Turkey as "TU" cases.
-    }
-
-    country = substitutions.get(country, country)
-    if not hasattr(holidays, country):
-        raise AttributeError(f"Holidays in {country} are not currently supported!")
-
-    return getattr(holidays, country)(years=years)
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 72c892ab3..ddf8405f7 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -10,7 +10,7 @@
 
 from neuralprophet import configure, utils
 from neuralprophet.df_utils import get_max_num_lags
-from neuralprophet.hdays_utils import get_country_holidays
+from neuralprophet.event_utils import get_all_holidays
 
 log = logging.getLogger("NP.time_dataset")
 
@@ -541,35 +541,6 @@ def fourier_series_t(t, period, series_order):
     return features
 
 
-def make_country_specific_holidays_dict(year_list, country):
-    """
-    Make dataframe of country specific holidays for given years and countries
-    Parameters
-    ----------
-        year_list : list
-            List of years
-        country : str, list
-            List of country names
-    Returns
-    -------
-        pd.DataFrame
-            Containing country specific holidays df with columns 'ds' and 'holiday'
-    """
-    # iterate over countries and get holidays for each country
-    # convert to list if not already
-    if isinstance(country, str):
-        country = [country]
-    country_specific_holidays = {}
-    for single_country in country:
-        single_country_specific_holidays = get_country_holidays(single_country, year_list)
-        # only add holiday if it is not already in the dict
-        country_specific_holidays.update(single_country_specific_holidays)
-    country_specific_holidays_dict = defaultdict(list)
-    for date, holiday in country_specific_holidays.items():
-        country_specific_holidays_dict[holiday].append(pd.to_datetime(date))
-    return country_specific_holidays_dict
-
-
 def get_event_offset_features(event, config, feature):
     """
     Create event offset features for the given event, config and feature
@@ -671,7 +642,7 @@ def add_event_features_to_df(
     multiplicative_holiday_names = []
     if config_country_holidays is not None:
         year_list = list({x.year for x in df.ds})
-        country_holidays_dict = make_country_specific_holidays_dict(year_list, config_country_holidays.country)
+        country_holidays_dict = get_all_holidays(year_list, config_country_holidays.country)
         config = config_country_holidays
         mode = config.mode
         for holiday in config_country_holidays.holiday_names:
@@ -681,7 +652,7 @@ def add_event_features_to_df(
                 dates = country_holidays_dict[holiday]
                 feature[df.ds.isin(dates)] = 1.0
             else:
-                raise ValueError(f"Holiday {holiday} not found in country holidays")
+                raise ValueError(f"Holiday {holiday} not found in {config_country_holidays.country} holidays")
             for offset in range(config.lower_window, config.upper_window + 1):
                 holiday_offset_name = utils.create_event_names_for_offsets(holiday, offset)
                 df[holiday_offset_name] = feature.shift(periods=offset, fill_value=0.0)
@@ -695,60 +666,60 @@ def add_event_features_to_df(
     return df, additive_event_and_holiday_names, multiplicative_event_and_holiday_names
 
 
-def make_events_features(df, config_events: Optional[configure.ConfigEvents] = None, config_country_holidays=None):
-    """
-    Construct arrays of all event features
-    Parameters
-    ----------
-        df : pd.DataFrame
-            Dataframe with all values including the user specified events (provided by user)
-        config_events : configure.ConfigEvents
-            User specified events, each with their upper, lower windows (int), regularization
-        config_country_holidays : configure.ConfigCountryHolidays
-            Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays
-    Returns
-    -------
-        np.array
-            All additive event features (both user specified and country specific)
-        np.array
-            All multiplicative event features (both user specified and country specific)
-    """
-    df = df.reset_index(drop=True)
-    additive_events = pd.DataFrame()
-    multiplicative_events = pd.DataFrame()
-
-    # create all user specified events
-    if config_events is not None:
-        for event, configs in config_events.items():
-            feature = df[event]
-            _create_event_offset_features(event, configs, feature, additive_events, multiplicative_events)
-
-    # create all country specific holidays
-    if config_country_holidays is not None:
-        year_list = list({x.year for x in df.ds})
-        country_holidays_dict = make_country_specific_holidays_dict(year_list, config_country_holidays.country)
-        for holiday in config_country_holidays.holiday_names:
-            feature = pd.Series([0.0] * df.shape[0])
-            if holiday in country_holidays_dict.keys():
-                dates = country_holidays_dict[holiday]
-                feature[df.ds.isin(dates)] = 1.0
-            _create_event_offset_features(
-                holiday, config_country_holidays, feature, additive_events, multiplicative_events
-            )
-
-    # Make sure column order is consistent
-    if not additive_events.empty:
-        additive_events = additive_events[sorted(additive_events.columns.tolist())]
-        additive_events = additive_events.values
-    else:
-        additive_events = None
-    if not multiplicative_events.empty:
-        multiplicative_events = multiplicative_events[sorted(multiplicative_events.columns.tolist())]
-        multiplicative_events = multiplicative_events.values
-    else:
-        multiplicative_events = None
+# def make_events_features(df, config_events: Optional[configure.ConfigEvents] = None, config_country_holidays=None):
+#     """
+#     Construct arrays of all event features
+#     Parameters
+#     ----------
+#         df : pd.DataFrame
+#             Dataframe with all values including the user specified events (provided by user)
+#         config_events : configure.ConfigEvents
+#             User specified events, each with their upper, lower windows (int), regularization
+#         config_country_holidays : configure.ConfigCountryHolidays
+#             Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays
+#     Returns
+#     -------
+#         np.array
+#             All additive event features (both user specified and country specific)
+#         np.array
+#             All multiplicative event features (both user specified and country specific)
+#     """
+#     df = df.reset_index(drop=True)
+#     additive_events = pd.DataFrame()
+#     multiplicative_events = pd.DataFrame()
+
+#     # create all user specified events
+#     if config_events is not None:
+#         for event, configs in config_events.items():
+#             feature = df[event]
+#             _create_event_offset_features(event, configs, feature, additive_events, multiplicative_events)
+
+#     # create all country specific holidays
+#     if config_country_holidays is not None:
+#         year_list = list({x.year for x in df.ds})
+#         country_holidays_dict = make_country_specific_holidays_dict(year_list, config_country_holidays.country)
+#         for holiday in config_country_holidays.holiday_names:
+#             feature = pd.Series([0.0] * df.shape[0])
+#             if holiday in country_holidays_dict.keys():
+#                 dates = country_holidays_dict[holiday]
+#                 feature[df.ds.isin(dates)] = 1.0
+#             _create_event_offset_features(
+#                 holiday, config_country_holidays, feature, additive_events, multiplicative_events
+#             )
+
+#     # Make sure column order is consistent
+#     if not additive_events.empty:
+#         additive_events = additive_events[sorted(additive_events.columns.tolist())]
+#         additive_events = additive_events.values
+#     else:
+#         additive_events = None
+#     if not multiplicative_events.empty:
+#         multiplicative_events = multiplicative_events[sorted(multiplicative_events.columns.tolist())]
+#         multiplicative_events = multiplicative_events.values
+#     else:
+#         multiplicative_events = None
 
-    return additive_events, multiplicative_events
+#     return additive_events, multiplicative_events
 
 
 # def make_regressors_features(df, config_regressors):
diff --git a/neuralprophet/utils.py b/neuralprophet/utils.py
index ea245dc7f..c6fec4568 100644
--- a/neuralprophet/utils.py
+++ b/neuralprophet/utils.py
@@ -13,7 +13,6 @@
 import torch
 
 from neuralprophet import utils_torch
-from neuralprophet.hdays_utils import get_country_holidays
 from neuralprophet.logger import ProgressBar
 
 if TYPE_CHECKING:
@@ -321,41 +320,6 @@ def config_seasonality_to_model_dims(config_seasonality: ConfigSeasonality):
     return seasonal_dims
 
 
-def get_holidays_from_country(country: Union[str, Iterable[str]], df=None):
-    """
-    Return all possible holiday names of given country
-
-    Parameters
-    ----------
-        country : str, list
-            List of country names to retrieve country specific holidays
-        df : pd.Dataframe
-            Dataframe from which datestamps will be retrieved from
-
-    Returns
-    -------
-        set
-            All possible holiday names of given country
-    """
-    if df is None:
-        years = np.arange(1995, 2045)
-    else:
-        dates = df["ds"].copy(deep=True)
-        years = list({x.year for x in dates})
-    # support multiple countries
-    if isinstance(country, str):
-        country = [country]
-
-    unique_holidays = {}
-    for single_country in country:
-        holidays_country = get_country_holidays(single_country, years)
-        for date, name in holidays_country.items():
-            if date not in unique_holidays:
-                unique_holidays[date] = name
-    holiday_names = unique_holidays.values()
-    return set(holiday_names)
-
-
 def config_events_to_model_dims(config_events: Optional[ConfigEvents], config_country_holidays):
     """
     Convert user specified events configurations along with country specific
diff --git a/pyproject.toml b/pyproject.toml
index 876d9194d..5e1760d84 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -103,7 +103,7 @@ include = [
     "neuralprophet/forecaster.py",
     "neuralprophet/configure.py",
     "neuralprophet/df_utils.py",
-    "neuralprophet/hdays_utils.py",
+    "neuralprophet/event_utils.py",
 ]
 
 [tool.ruff]
diff --git a/tests/test_hdays_utils.py b/tests/test_event_utils.py
similarity index 62%
rename from tests/test_hdays_utils.py
rename to tests/test_event_utils.py
index 691804649..c124bafc4 100644
--- a/tests/test_hdays_utils.py
+++ b/tests/test_event_utils.py
@@ -3,16 +3,16 @@
 import holidays
 import pytest
 
-from neuralprophet import hdays_utils
+from neuralprophet import event_utils
 
 
 def test_get_country_holidays():
-    assert issubclass(hdays_utils.get_country_holidays("TU").__class__, holidays.countries.turkey.TR) is True
+    assert issubclass(event_utils.get_country_holidays("TU").__class__, holidays.countries.turkey.TR) is True
 
     for country in ("UnitedStates", "US", "USA"):
-        us_holidays = hdays_utils.get_country_holidays(country, years=2019)
+        us_holidays = event_utils.get_country_holidays(country, years=2019)
         assert issubclass(us_holidays.__class__, holidays.countries.united_states.UnitedStates) is True
         assert len(us_holidays) == 10
 
     with pytest.raises(AttributeError):
-        hdays_utils.get_country_holidays("NotSupportedCountry")
+        event_utils.get_country_holidays("NotSupportedCountry")
diff --git a/tests/test_unit.py b/tests/test_unit.py
index be4d7d55a..fc66f48d9 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -809,16 +809,13 @@ def test_make_future():
 
 
 def test_too_many_NaN():
-    # n_lags, n_forecasts = 12, 1
+    n_lags = 12
+    n_forecasts = 1
     config_missing = configure.MissingDataHandling(
-        impute_missing=True, impute_linear=5, impute_rolling=5, drop_missing=False
-    )
-    config_train = configure.Train(
-        learning_rate=None,
-        epochs=EPOCHS,
-        batch_size=BATCH_SIZE,
-        loss_func="SmoothL1Loss",
-        optimizer="AdamW",
+        impute_missing=True,
+        impute_linear=5,
+        impute_rolling=5,
+        drop_missing=False,
     )
     length = 100
     days = pd.date_range(start="2017-01-01", periods=length)
@@ -840,16 +837,19 @@ def test_too_many_NaN():
     # Check if ValueError is thrown, if NaN values remain after auto-imputing
     with pytest.raises(ValueError):
         time_dataset.TimeDataset(
-            df,
-            "name",
+            df=df,
+            name="name",
             predict_mode=False,
-            config_missing=config_missing,
-            config_lagged_regressors=None,
-            config_country_holidays=None,
-            config_events=None,
-            config_train=config_train,
-            predict_steps=1,
+            n_lags=n_lags,
+            n_forecasts=n_forecasts,
             prediction_frequency=None,
+            predict_steps=1,
+            config_seasonality=None,
+            config_events=None,
+            config_country_holidays=None,
+            config_regressors=None,
+            config_lagged_regressors=None,
+            config_missing=config_missing,
         )
 
 

From 3e5dd344deb0f0363533f126189755857cbd6566 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 1 Feb 2024 18:07:47 -0800
Subject: [PATCH 062/128] convert new energytest to daily data

---
 tests/test_model_performance.py | 118 ++++++++++++++++++++++++++++----
 1 file changed, 105 insertions(+), 13 deletions(-)

diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index f6c30cc5a..d741153c7 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -5,11 +5,11 @@
 import os
 import pathlib
 import time
-import torch
 
 import numpy as np
 import pandas as pd
 import plotly.graph_objects as go
+import torch
 from plotly.subplots import make_subplots
 from plotly_resampler import unregister_plotly_resampler
 
@@ -233,7 +233,7 @@ def test_EnergyPriceDaily():
     create_metrics_plot(metrics).write_image(os.path.join(DIR, "tests", "metrics", "EnergyPriceDaily.svg"))
 
 
-def test_EnergyPerformance():
+def test_EnergyDailyDeep():
     ### Temporary Test for on-the-fly sampling - very time consuming!
 
     df = pd.read_csv(ENERGY_PRICE_DAILY_FILE)
@@ -270,17 +270,17 @@ def test_EnergyPerformance():
 
     # Hyperparameter
     tuned_params = {
-        "n_lags": 24 * 15,
+        "n_lags": 15,
         "newer_samples_weight": 2.0,
         "n_changepoints": 0,
         "yearly_seasonality": 10,
-        "weekly_seasonality": True,
-        "daily_seasonality": False,  # due to conditional daily seasonality
-        "batch_size": 128,
-        "ar_layers": [32, 64, 32, 16],
-        "lagged_reg_layers": [32, 32],
+        "weekly_seasonality": False,  # due to conditional daily seasonality
+        "daily_seasonality": False,  # due to data freq
+        "batch_size": 64,
+        "ar_layers": [16, 32, 16, 8],
+        "lagged_reg_layers": [32, 16],
         # not tuned
-        "n_forecasts": 33,
+        "n_forecasts": 7,
         "learning_rate": 0.001,
         "epochs": 30,
         "trend_global_local": "global",
@@ -306,11 +306,11 @@ def test_EnergyPerformance():
     m = NeuralProphet(**tuned_params, **trainer_configs, quantiles=quantile_list)
 
     # Lagged Regressor
-    m.add_lagged_regressor(names="temp", n_lags=33, normalize="standardize")
+    m.add_lagged_regressor(names="temp", n_lags=7, normalize="standardize")
 
     # Conditional Seasonality
-    m.add_seasonality(name="winter", period=1, fourier_order=6, condition_name="winter")
-    m.add_seasonality(name="summer", period=1, fourier_order=6, condition_name="summer")
+    m.add_seasonality(name="winter", period=7, fourier_order=6, condition_name="winter")
+    m.add_seasonality(name="summer", period=7, fourier_order=6, condition_name="summer")
 
     # Holidays
     m.add_country_holidays(country_name="US", lower_window=-1, upper_window=1)
@@ -320,5 +320,97 @@ def test_EnergyPerformance():
     df_test = df[df["ds"] >= "2016-05-01"]
 
     # Training & Predict
-    _ = m.fit(df=df_train, freq="H", num_workers=4, early_stopping=True)
+    _ = m.fit(df=df_train, freq="D", num_workers=4)
     _ = m.predict(df_test)
+
+
+# TODO: adapt to hourly dataset with multiple IDs
+# def test_EnergyPerformance():
+#     ### Temporary Test for on-the-fly sampling - very time consuming!
+
+#     df = pd.read_csv(ENERGY_PRICE_DAILY_FILE)
+#     df = df[df["ds"] < "2018-01-01"]
+#     df["temp"] = df["temperature"]
+#     df["ds"] = pd.to_datetime(df["ds"])
+#     df["y"] = pd.to_numeric(df["y"], errors="coerce")
+#     df["ID"] = "test"
+
+#     # Conditional Seasonality
+#     df["winter"] = np.where(
+#         df["ds"].dt.month.isin(
+#             [
+#                 10,
+#                 11,
+#                 12,
+#                 1,
+#                 2,
+#                 3,
+#             ]
+#         ),
+#         1,
+#         0,
+#     )
+#     df["summer"] = np.where(df["ds"].dt.month.isin([4, 5, 6, 7, 8, 9]), 1, 0)
+#     df["winter"] = pd.to_numeric(df["winter"], errors="coerce")
+#     df["summer"] = pd.to_numeric(df["summer"], errors="coerce")
+
+#     # Normalize Temperature
+#     df["temp"] = (df["temp"] - 65.0) / 50.0
+
+#     # df
+#     df = df[["ID", "ds", "y", "temp", "winter", "summer"]]
+
+#     # Hyperparameter
+#     tuned_params = {
+#         "n_lags": 24 * 15,
+#         "newer_samples_weight": 2.0,
+#         "n_changepoints": 0,
+#         "yearly_seasonality": 10,
+#         "weekly_seasonality": True,
+#         "daily_seasonality": False,  # due to conditional daily seasonality
+#         "batch_size": 128,
+#         "ar_layers": [32, 64, 32, 16],
+#         "lagged_reg_layers": [32, 32],
+#         # not tuned
+#         "n_forecasts": 33,
+#         "learning_rate": 0.001,
+#         "epochs": 30,
+#         "trend_global_local": "global",
+#         "season_global_local": "global",
+#         "drop_missing": True,
+#         "normalize": "standardize",
+#     }
+
+#     # Uncertainty Quantification
+#     confidence_lv = 0.98
+#     quantile_list = [round(((1 - confidence_lv) / 2), 2), round((confidence_lv + (1 - confidence_lv) / 2), 2)]
+
+#     # Check if GPU is available
+#     use_gpu = torch.cuda.is_available()
+
+#     # Set trainer configuration
+#     trainer_configs = {
+#         "accelerator": "gpu" if use_gpu else "cpu",
+#     }
+#     print(f"Using {'GPU' if use_gpu else 'CPU'}")
+
+#     # Model
+#     m = NeuralProphet(**tuned_params, **trainer_configs, quantiles=quantile_list)
+
+#     # Lagged Regressor
+#     m.add_lagged_regressor(names="temp", n_lags=33, normalize="standardize")
+
+#     # Conditional Seasonality
+#     m.add_seasonality(name="winter", period=1, fourier_order=6, condition_name="winter")
+#     m.add_seasonality(name="summer", period=1, fourier_order=6, condition_name="summer")
+
+#     # Holidays
+#     m.add_country_holidays(country_name="US", lower_window=-1, upper_window=1)
+
+#     # Split
+#     df_train = df[df["ds"] < "2016-05-01"]
+#     df_test = df[df["ds"] >= "2016-05-01"]
+
+#     # Training & Predict
+#     _ = m.fit(df=df_train, freq="H", num_workers=4, early_stopping=True)
+#     _ = m.predict(df_test)

From b78477b653decfb4390c7bb024884e3a9f033b47 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 1 Feb 2024 18:16:18 -0800
Subject: [PATCH 063/128] fix events util reference

---
 tests/utils/dataset_generators.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/utils/dataset_generators.py b/tests/utils/dataset_generators.py
index 065b91162..275fd8b69 100644
--- a/tests/utils/dataset_generators.py
+++ b/tests/utils/dataset_generators.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pandas as pd
 
-from neuralprophet.time_dataset import make_country_specific_holidays_dict
+from neuralprophet.event_utils import get_all_holidays
 
 
 def generate_holiday_dataset(country="US", years=[2022], y_default=1, y_holiday=100, y_holidays_override={}):
@@ -11,7 +11,7 @@ def generate_holiday_dataset(country="US", years=[2022], y_default=1, y_holiday=
     dates = pd.date_range("%i-01-01" % (years[0]), periods=periods, freq="D")
     df = pd.DataFrame({"ds": dates, "y": y_default}, index=dates)
 
-    holidays = make_country_specific_holidays_dict(years, country)
+    holidays = get_all_holidays(years, country)
     for holiday_name, timestamps in holidays.items():
         df.loc[timestamps[0], "y"] = y_holidays_override.get(holiday_name, y_holiday)
 

From 190e3b7c3147507b8e907c329a62e367e21015e4 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 1 Feb 2024 18:26:40 -0800
Subject: [PATCH 064/128] fix test_get_country_holidays

---
 tests/test_event_utils.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/test_event_utils.py b/tests/test_event_utils.py
index c124bafc4..862c11c2f 100644
--- a/tests/test_event_utils.py
+++ b/tests/test_event_utils.py
@@ -7,12 +7,14 @@
 
 
 def test_get_country_holidays():
-    assert issubclass(event_utils.get_country_holidays("TU").__class__, holidays.countries.turkey.TR) is True
+    # deprecated
+    # assert issubclass(event_utils.get_country_holidays("TU").__class__, holidays.countries.turkey.TR) is True
+    # new format
+    assert issubclass(event_utils.get_all_holidays(country=["TU", "US"], years=2025).__class__, dict) is True
 
     for country in ("UnitedStates", "US", "USA"):
-        us_holidays = event_utils.get_country_holidays(country, years=2019)
-        assert issubclass(us_holidays.__class__, holidays.countries.united_states.UnitedStates) is True
+        us_holidays = event_utils.get_all_holidays(country=country, years=[2019, 2020])
         assert len(us_holidays) == 10
 
-    with pytest.raises(AttributeError):
-        event_utils.get_country_holidays("NotSupportedCountry")
+    with pytest.raises(NotImplementedError):
+        event_utils.get_holiday_names("NotSupportedCountry")

From 767ca0260b1ccf59dd8b745b52accccf995b58fe Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 1 Feb 2024 18:57:51 -0800
Subject: [PATCH 065/128] fix test_timedataset_minima

---
 neuralprophet/data/process.py | 10 ++---
 tests/test_unit.py            | 71 +++++++++++++++++++++++------------
 2 files changed, 52 insertions(+), 29 deletions(-)

diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py
index 85e59d0ab..c0fd9ae04 100644
--- a/neuralprophet/data/process.py
+++ b/neuralprophet/data/process.py
@@ -438,14 +438,14 @@ def _check_dataframe(
 
 def _handle_missing_data(
     df: pd.DataFrame,
-    freq: Optional[str],
+    freq: str,
     n_lags: int,
     n_forecasts: int,
     config_missing,
-    config_regressors: Optional[ConfigFutureRegressors],
-    config_lagged_regressors: Optional[ConfigLaggedRegressors],
-    config_events: Optional[ConfigEvents],
-    config_seasonality: Optional[ConfigSeasonality],
+    config_regressors: Optional[ConfigFutureRegressors] = None,
+    config_lagged_regressors: Optional[ConfigLaggedRegressors] = None,
+    config_events: Optional[ConfigEvents] = None,
+    config_seasonality: Optional[ConfigSeasonality] = None,
     predicting: bool = False,
 ) -> pd.DataFrame:
     """
diff --git a/tests/test_unit.py b/tests/test_unit.py
index fc66f48d9..8796abd95 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -68,35 +68,58 @@ def test_impute_missing():
         plt.show()
 
 
-def test_time_dataset():
+def test_timedataset_minimal():
     # manually load any file that stores a time series, for example:
     df_in = pd.read_csv(AIR_FILE, index_col=False, nrows=NROWS)
     log.debug(f"Infile shape: {df_in.shape}")
-    n_lags = 3
-    n_forecasts = 1
     valid_p = 0.2
-    config_missing = configure.MissingDataHandling()
-    config_train = configure.Train(
-        learning_rate=LR,
-        epochs=EPOCHS,
-        batch_size=BATCH_SIZE,
-        loss_func="SmoothL1Loss",
-        optimizer="AdamW",
-    )
-    df_train, df_val = df_utils.split_df(df_in, n_lags, n_forecasts, valid_p)
-    # create a tabularized dataset from time series
-    df, _, _ = df_utils.check_dataframe(df_train)
-    local_data_params, global_data_params = df_utils.init_data_params(df=df, normalize="minmax")
-    df = df.drop("ID", axis=1)
-    df = df_utils.normalize(df, global_data_params)
-    inputs, targets = time_dataset.tabularize_univariate_datetime(
-        df, n_lags=n_lags, n_forecasts=n_forecasts, config_missing=config_missing, config_train=config_train
-    )
-    log.debug(
-        "tabularized inputs: {}".format(
-            "; ".join(["{}: {}".format(inp, values.shape) for inp, values in inputs.items()])
+    for n_forecasts, n_lags in [(1, 0), (1, 5), (3, 5)]:
+        config_missing = configure.MissingDataHandling()
+        # config_train = configure.Train()
+        df, df_val = df_utils.split_df(df_in, n_lags, n_forecasts, valid_p)
+        # create a tabularized dataset from time series
+        df, _, _, _ = df_utils.prep_or_copy_df(df)
+        df, _, _ = df_utils.check_dataframe(df)
+        df = _handle_missing_data(
+            df,
+            freq="MS",
+            n_lags=n_lags,
+            n_forecasts=n_forecasts,
+            config_missing=config_missing,
+            # config_regressors: Optional[ConfigFutureRegressors],
+            # config_lagged_regressors: Optional[ConfigLaggedRegressors],
+            # config_events: Optional[ConfigEvents],
+            # config_seasonality: Optional[ConfigSeasonality],
+            predicting=False,
+        )
+        local_data_params, global_data_params = df_utils.init_data_params(df=df, normalize="minmax")
+        df = df.drop("ID", axis=1)
+        df = df_utils.normalize(df, global_data_params)
+
+        dataset = time_dataset.TimeDataset(
+            df=df,
+            name="name",
+            predict_mode=False,
+            n_lags=n_lags,
+            n_forecasts=n_forecasts,
+            prediction_frequency=None,
+            predict_steps=1,
+            config_seasonality=None,
+            config_events=None,
+            config_country_holidays=None,
+            config_regressors=None,
+            config_lagged_regressors=None,
+            config_missing=config_missing,
+        )
+        inputs, targets, meta = dataset.__getitem__(0)
+        # inputs50, targets50, meta50 = dataset.__getitem__(50)
+        log.debug(f"(n_forecasts {n_forecasts}, n_lags {n_lags})")
+        log.debug(f"tabularized targets: {targets.shape}")
+        log.debug(
+            "tabularized inputs: {}".format(
+                "; ".join(["{}: {}".format(inp, values.shape) for inp, values in inputs.items()])
+            )
         )
-    )
 
 
 def test_normalize():

From 7e9b29d701f6345d5077a996be3595466ba4e94a Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 1 Feb 2024 19:08:33 -0800
Subject: [PATCH 066/128] fix selective forecasting

---
 neuralprophet/df_utils.py |  2 +-
 tests/test_integration.py | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/neuralprophet/df_utils.py b/neuralprophet/df_utils.py
index 7d569af98..79c6c4ea6 100644
--- a/neuralprophet/df_utils.py
+++ b/neuralprophet/df_utils.py
@@ -1052,7 +1052,7 @@ def add_missing_dates_nan(df, freq):
     df_resampled = df.resample(freq).asfreq()
     if "ID" in df.columns:
         df_resampled["ID"].fillna(df["ID"].iloc[0], inplace=True)
-    df_resampled.reset_index(drop=True, inplace=True)
+    df_resampled.reset_index(inplace=True)
 
     num_added = len(df_resampled) - len(df)
     return df_resampled, num_added
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 449c560a3..6be735def 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1601,6 +1601,23 @@ def test_selective_forecasting():
     date_range = pd.date_range(start=start_date, end=end_date, freq="H")
     y = np.random.randint(0, 1000, size=(len(date_range),))
     df = pd.DataFrame({"ds": date_range, "y": y})
+    m = NeuralProphet(
+        n_forecasts=24,
+        n_lags=48,
+        epochs=1,
+        batch_size=BATCH_SIZE,
+        learning_rate=LR,
+        prediction_frequency={"daily-hour": 7},
+    )
+    m.fit(df, freq="H")
+    m.predict(df)
+
+    log.info("testing: selective forecasting with n_forecasts < prediction_frequency with lags")
+    start_date = "2019-01-01"
+    end_date = "2019-03-01"
+    date_range = pd.date_range(start=start_date, end=end_date, freq="H")
+    y = np.random.randint(0, 1000, size=(len(date_range),))
+    df = pd.DataFrame({"ds": date_range, "y": y})
     m = NeuralProphet(
         n_forecasts=1,
         n_lags=14,

From 32d2cc651dafe2bed32caa42cbfc4d041472c042 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 1 Feb 2024 19:20:24 -0800
Subject: [PATCH 067/128] cleanup timedataset

---
 neuralprophet/time_dataset.py | 188 +---------------------------------
 1 file changed, 1 insertion(+), 187 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index ddf8405f7..d9bfa1b9c 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -75,22 +75,9 @@ def __init__(
         self.config_lagged_regressors = config_lagged_regressors
         self.config_missing = config_missing
 
-        # self.config_args = kwargs
-        # self.predict_mode = kwargs["predict_mode"]
-        # self.n_lags = kwargs["n_lags"]
-        # self.n_forecasts = kwargs["n_forecasts"]
-        # self.config_events = kwargs["config_events"]
-        # self.config_country_holidays = kwargs["config_country_holidays"]
-        # self.config_lagged_regressors = kwargs["config_lagged_regressors"]
-
         self.max_lags = get_max_num_lags(n_lags=self.n_lags, config_lagged_regressors=self.config_lagged_regressors)
 
-        self.two_level_inputs = [
-            "seasonalities",
-            "covariates",
-            "events",
-            "regressors",
-        ]
+        self.two_level_inputs = ["seasonalities", "covariates", "events", "regressors"]
 
         # Preprocessing of events and holidays features (added to self.df)
         (
@@ -220,13 +207,6 @@ def __init__(self, df, **kwargs):
                 Identical to :meth:`tabularize_univariate_datetime`
         """
         self.df_names = sorted(list(np.unique(df.loc[:, "ID"].values)))
-        # if len(self.df_names) == 1:
-        #     super().__init__(df, self.df_names[0], **kwargs)
-        # else:
-        # raise NotImplementedError
-        # timedatasets = [TimeDataset(df_i, df_name, **kwargs) for df_name, df_i in df.groupby("ID")]
-        # self.combined_timedataset = [item for timedataset in timedatasets for item in timedataset]
-        # self.length = sum(timedataset.length for timedataset in timedatasets)
         self.datasets = OrderedDict({})
         for df_name in self.df_names:
             self.datasets[df_name] = TimeDataset(df[df["ID"] == df_name], df_name, **kwargs)
@@ -263,11 +243,8 @@ def tabularize_univariate_datetime_single_index(
     n_forecasts: int = 1,
     config_seasonality: Optional[configure.ConfigSeasonality] = None,
     config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None,
-    # config_events: Optional[configure.ConfigEvents] = None,
-    # config_country_holidays=None,
     additive_event_and_holiday_names: list[str] = [],
     multiplicative_event_and_holiday_names: list[str] = [],
-    # config_regressors: Optional[configure.ConfigFutureRegressors] = None,
     additive_regressors_names: list[str] = [],
     multiplicative_regressors_names: list[str] = [],
 ):
@@ -432,13 +409,11 @@ def tabularize_univariate_datetime_single_index(
                 features = df.loc[
                     origin_index + 1 - n_lags : origin_index + n_forecasts, additive_regressors_names
                 ].values
-                # regressors["additive"] = torch.as_tensor(features, dtype=torch.float32)
                 regressors["additive"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
             if len(multiplicative_regressors_names) > 0:
                 features = df.loc[
                     origin_index + 1 - n_lags : origin_index + n_forecasts, multiplicative_regressors_names
                 ].values
-                # regressors["multiplicative"] = torch.as_tensor(features, dtype=torch.float32)
                 regressors["multiplicative"] = torch.as_tensor(
                     np.array(features, dtype=np.float32), dtype=torch.float32
                 )
@@ -646,7 +621,6 @@ def add_event_features_to_df(
         config = config_country_holidays
         mode = config.mode
         for holiday in config_country_holidays.holiday_names:
-            # feature = pd.Series([0.0] * df.shape[0])
             feature = pd.Series(np.zeros(df.shape[0], dtype=np.float32))
             if holiday in country_holidays_dict.keys():
                 dates = country_holidays_dict[holiday]
@@ -666,137 +640,6 @@ def add_event_features_to_df(
     return df, additive_event_and_holiday_names, multiplicative_event_and_holiday_names
 
 
-# def make_events_features(df, config_events: Optional[configure.ConfigEvents] = None, config_country_holidays=None):
-#     """
-#     Construct arrays of all event features
-#     Parameters
-#     ----------
-#         df : pd.DataFrame
-#             Dataframe with all values including the user specified events (provided by user)
-#         config_events : configure.ConfigEvents
-#             User specified events, each with their upper, lower windows (int), regularization
-#         config_country_holidays : configure.ConfigCountryHolidays
-#             Configurations (holiday_names, upper, lower windows, regularization) for country specific holidays
-#     Returns
-#     -------
-#         np.array
-#             All additive event features (both user specified and country specific)
-#         np.array
-#             All multiplicative event features (both user specified and country specific)
-#     """
-#     df = df.reset_index(drop=True)
-#     additive_events = pd.DataFrame()
-#     multiplicative_events = pd.DataFrame()
-
-#     # create all user specified events
-#     if config_events is not None:
-#         for event, configs in config_events.items():
-#             feature = df[event]
-#             _create_event_offset_features(event, configs, feature, additive_events, multiplicative_events)
-
-#     # create all country specific holidays
-#     if config_country_holidays is not None:
-#         year_list = list({x.year for x in df.ds})
-#         country_holidays_dict = make_country_specific_holidays_dict(year_list, config_country_holidays.country)
-#         for holiday in config_country_holidays.holiday_names:
-#             feature = pd.Series([0.0] * df.shape[0])
-#             if holiday in country_holidays_dict.keys():
-#                 dates = country_holidays_dict[holiday]
-#                 feature[df.ds.isin(dates)] = 1.0
-#             _create_event_offset_features(
-#                 holiday, config_country_holidays, feature, additive_events, multiplicative_events
-#             )
-
-#     # Make sure column order is consistent
-#     if not additive_events.empty:
-#         additive_events = additive_events[sorted(additive_events.columns.tolist())]
-#         additive_events = additive_events.values
-#     else:
-#         additive_events = None
-#     if not multiplicative_events.empty:
-#         multiplicative_events = multiplicative_events[sorted(multiplicative_events.columns.tolist())]
-#         multiplicative_events = multiplicative_events.values
-#     else:
-#         multiplicative_events = None
-
-#     return additive_events, multiplicative_events
-
-
-# def make_regressors_features(df, config_regressors):
-#     """Construct arrays of all scalar regressor features
-#     Parameters
-#     ----------
-#         df : pd.DataFrame
-#             Dataframe with all values including the user specified regressors
-#         config_regressors : configure.ConfigFutureRegressors
-#             User specified regressors config
-#     Returns
-#     -------
-#         np.array
-#             All additive regressor features
-#         np.array
-#             All multiplicative regressor features
-#     """
-#     additive_regressors = pd.DataFrame()
-#     multiplicative_regressors = pd.DataFrame()
-
-#     for reg in df.columns:
-#         if reg in config_regressors:
-#             mode = config_regressors[reg].mode
-#             if mode == "additive":
-#                 additive_regressors[reg] = df[reg]
-#             else:
-#                 multiplicative_regressors[reg] = df[reg]
-
-#     if not additive_regressors.empty:
-#         additive_regressors = additive_regressors[sorted(additive_regressors.columns.tolist())]
-#         additive_regressors = additive_regressors.values
-#     else:
-#         additive_regressors = None
-#     if not multiplicative_regressors.empty:
-#         multiplicative_regressors = multiplicative_regressors[sorted(multiplicative_regressors.columns.tolist())]
-#         multiplicative_regressors = multiplicative_regressors.values
-#     else:
-#         multiplicative_regressors = None
-
-#     return additive_regressors, multiplicative_regressors
-
-
-# def seasonal_features_from_dates(df, config_seasonality: configure.ConfigSeasonality):
-#     """Dataframe with seasonality features.
-#     Includes seasonality features
-#     Parameters
-#     ----------
-#         df : pd.DataFrame
-#             Dataframe with all values
-#         config_seasonality : configure.ConfigSeasonality
-#             Configuration for seasonalities
-#     Returns
-#     -------
-#         OrderedDict
-#             Dictionary with keys for each period name containing an np.array
-#             with the respective regression features. each with dims: (len(dates), 2*fourier_order)
-#     """
-#     dates = df["ds"]
-#     assert len(dates.shape) == 1
-#     seasonalities = OrderedDict({})
-#     # Seasonality features
-#     for name, period in config_seasonality.periods.items():
-#         if period.resolution > 0:
-#             if config_seasonality.computation == "fourier":
-#                 features = fourier_series(
-#                     dates=dates,
-#                     period=period.period,
-#                     series_order=period.resolution,
-#                 )
-#             else:
-#                 raise NotImplementedError
-#             if period.condition_name is not None:
-#                 features = features * df[period.condition_name].values[:, np.newaxis]
-#             seasonalities[name] = features
-#     return seasonalities
-
-
 def create_origin_start_end_mask(df_length, max_lags, n_forecasts):
     """Creates a boolean mask for valid prediction origin positions.
     (based on limiting input lags and forecast targets at start and end of df)"""
@@ -839,11 +682,6 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
     if prediction_frequency is None or prediction_frequency == 1:
         return mask
 
-    # OLD: timestamps were created from "ds" column in tabularization and then re-converted here
-    # timestamps = pd.to_datetime([x["timestamps"][0] for x in df])
-    # OR
-    # timestamps = df["timestamps"].apply(lambda x: pd.to_datetime(x[0]))
-
     timestamps = pd.to_datetime(df.loc[:, "ds"])
     filter_masks = []
     for key, value in prediction_frequency.items():
@@ -941,27 +779,3 @@ def sort_regressor_names(config):
             else:
                 multiplicative_regressors_names.append(reg)
     return additive_regressors_names, multiplicative_regressors_names
-
-
-# ## TODO: rename - used elsewhere, not in this file.
-# def make_country_specific_holidays_df(year_list, country):
-#     return make_country_specific_holidays_dict(year_list, country)
-
-
-# def split_nested_dict(inputs):
-#     """Split nested dict into list of dicts.
-#     Parameters
-#     ----------
-#         inputs : ordered dict
-#             Nested dict to be split.
-#     Returns
-#     -------
-#         list of dicts
-#             List of dicts with same keys as inputs.
-#     """
-
-#     def split_dict(inputs, index):
-#         return {k: v[index] if not isinstance(v, dict) else split_dict(v, index) for k, v in inputs.items()}
-
-#     length = next(iter(inputs.values())).shape[0]
-#     return [split_dict(inputs, i) for i in range(length)]

From b709f2d1b165b83962c4d62f0a201fe542b8a6e4 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 1 Feb 2024 19:45:36 -0800
Subject: [PATCH 068/128] refactor tabularize_univariate

---
 neuralprophet/time_dataset.py | 324 ++++++++++++++++++++--------------
 1 file changed, 190 insertions(+), 134 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index d9bfa1b9c..9ad350ce5 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -234,6 +234,153 @@ def __getitem__(self, idx):
         return self.datasets[df_name].__getitem__(local_pos)
 
 
+def get_sample_targets(df, origin_index, n_forecasts, max_lags, predict_mode):
+    if predict_mode:
+        return torch.zeros((n_forecasts, 1), dtype=torch.float32)
+    else:
+        if n_forecasts == 1:
+            if max_lags == 0:
+                targets = df.at[origin_index, "y_scaled"]
+            if max_lags > 0:
+                targets = df.at[origin_index + 1, "y_scaled"]
+            targets = np.expand_dims(targets, 0)
+            targets = np.expand_dims(targets, 1)  # extra dimension at end for quantiles:median
+        else:
+            # Note: df.loc is inclusive of slice end, while df.iloc is not.
+            targets = df.loc[origin_index + 1 : origin_index + n_forecasts, "y_scaled"].values
+            targets = np.expand_dims(targets, 1)  # extra dimension at end for quantiles:median
+        return torch.as_tensor(targets, dtype=torch.float32)
+
+
+def get_sample_lagged_regressors(df, origin_index, config_lagged_regressors):
+    lagged_regressors = OrderedDict({})
+    # Future TODO: optimize this computation for many lagged_regressors
+    for lagged_reg in df.columns:
+        if lagged_reg in config_lagged_regressors:
+            covar_lags = config_lagged_regressors[lagged_reg].n_lags
+            assert covar_lags > 0
+            # Note: df.loc is inclusive of slice end, while df.iloc is not.
+            lagged_regressors[lagged_reg] = df.loc[origin_index - covar_lags + 1 : origin_index, lagged_reg].values
+            lagged_regressors[lagged_reg] = torch.as_tensor(lagged_regressors[lagged_reg], dtype=torch.float32)
+    return lagged_regressors
+
+
+def get_sample_seasonalities(df, origin_index, n_forecasts, max_lags, n_lags, config_seasonality):
+    # TODO: precompute and save fourier features and only tabularize / slide windows when calling __getitem_
+    seasonalities = OrderedDict({})
+    if max_lags == 0:
+        dates = pd.Series(df.at[origin_index, "ds"])
+    else:
+        # Note: df.loc is inclusive of slice end, while df.iloc is not.
+        dates = pd.Series(df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "ds"].values)
+    # Seasonality features
+    for name, period in config_seasonality.periods.items():
+        if period.resolution > 0:
+            if config_seasonality.computation == "fourier":
+                # Compute Fourier series components with the specified frequency and order.
+                # convert to days since epoch
+                t = np.array((dates - datetime(1900, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
+                # features: Matrix with dims (length len(dates), 2*resolution)
+                features = np.column_stack(
+                    [np.sin(2.0 * (i + 1) * np.pi * t / period.period) for i in range(period.resolution)]
+                    + [np.cos(2.0 * (i + 1) * np.pi * t / period.period) for i in range(period.resolution)]
+                )
+            else:
+                raise NotImplementedError
+            if period.condition_name is not None:
+                # multiply seasonality features with condition mask/values
+                if max_lags == 0:
+                    condition_values = pd.Series(df.at[origin_index, period.condition_name]).values[:, np.newaxis]
+                else:
+                    condition_values = df.loc[
+                        origin_index - n_lags + 1 : origin_index + n_forecasts, period.condition_name
+                    ].values[:, np.newaxis]
+                features = features * condition_values
+            seasonalities[name] = torch.as_tensor(features, dtype=torch.float32)
+    return seasonalities
+
+
+def get_sample_future_regressors(
+    df, origin_index, n_forecasts, max_lags, n_lags, additive_regressors_names, multiplicative_regressors_names
+):
+    regressors = OrderedDict({})
+    if max_lags == 0:
+        if len(additive_regressors_names) > 0:
+            features = df.loc[origin_index, additive_regressors_names].values
+            regressors["additive"] = torch.as_tensor(
+                np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
+            )
+        if len(multiplicative_regressors_names) > 0:
+            features = df.loc[origin_index, multiplicative_regressors_names].values
+            regressors["multiplicative"] = torch.as_tensor(
+                np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
+            )
+    else:
+        if len(additive_regressors_names) > 0:
+            features = df.loc[origin_index + 1 - n_lags : origin_index + n_forecasts, additive_regressors_names].values
+            regressors["additive"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
+        if len(multiplicative_regressors_names) > 0:
+            features = df.loc[
+                origin_index + 1 - n_lags : origin_index + n_forecasts, multiplicative_regressors_names
+            ].values
+            regressors["multiplicative"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
+    return regressors
+
+
+def get_sample_future_events(
+    df,
+    origin_index,
+    n_forecasts,
+    max_lags,
+    n_lags,
+    additive_event_and_holiday_names,
+    multiplicative_event_and_holiday_names,
+):
+    events = OrderedDict({})
+    if max_lags == 0:
+        # forecasts are at origin_index
+        if len(additive_event_and_holiday_names) > 0:
+            features = df.loc[origin_index, additive_event_and_holiday_names].values
+            events["additive"] = torch.as_tensor(
+                np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
+            )
+        if len(multiplicative_event_and_holiday_names) > 0:
+            features = df.loc[origin_index, multiplicative_event_and_holiday_names].values
+            events["multiplicative"] = torch.as_tensor(
+                np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
+            )
+    else:
+        # forecasts are at origin_index + 1 up to origin_index + n_forecasts
+        if len(additive_event_and_holiday_names) > 0:
+            features = df.loc[
+                origin_index + 1 - n_lags : origin_index + n_forecasts, additive_event_and_holiday_names
+            ].values
+            events["additive"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
+
+        if len(multiplicative_event_and_holiday_names) > 0:
+            features = df.loc[
+                origin_index + 1 - n_lags : origin_index + n_forecasts, multiplicative_event_and_holiday_names
+            ].values
+            events["multiplicative"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
+    return events
+
+
+def log_input_shapes(inputs):
+    tabularized_input_shapes_str = ""
+    for key, value in inputs.items():
+        if key in [
+            "seasonalities",
+            "covariates",
+            "events",
+            "regressors",
+        ]:
+            for name, period_features in value.items():
+                tabularized_input_shapes_str += f"    {name} {key} {period_features}\n"
+        else:
+            tabularized_input_shapes_str += f"    {key} {value.shape} \n"
+    log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
+
+
 def tabularize_univariate_datetime_single_index(
     df: pd.DataFrame,
     origin_index: int,
@@ -297,7 +444,7 @@ def tabularize_univariate_datetime_single_index(
         np.array, float
             Targets to be predicted of same length as each of the model inputs, dims: (n_forecasts, 1)
     """
-    # TODO: pre-process al type conversions (e.g. torch.float32) in __init__
+    # TODO: pre-process all type conversions (e.g. torch.float32) in __init__
 
     # sample features are stored and returned in OrderedDict
     inputs = OrderedDict({})
@@ -305,167 +452,76 @@ def tabularize_univariate_datetime_single_index(
     if max_lags == 0:
         assert n_forecasts == 1
 
-    if predict_mode:
-        targets = torch.zeros((n_forecasts, 1), dtype=torch.float32)
-    else:
-        if n_forecasts == 1:
-            if max_lags == 0:
-                targets = df.at[origin_index, "y_scaled"]
-            if max_lags > 0:
-                targets = df.at[origin_index + 1, "y_scaled"]
-            targets = np.expand_dims(targets, 0)
-            targets = np.expand_dims(targets, 1)  # extra dimension at end for quantiles:median
-        else:
-            # Note: df.loc is inclusive of slice end, while df.iloc is not.
-            targets = df.loc[origin_index + 1 : origin_index + n_forecasts, "y_scaled"].values
-            targets = np.expand_dims(targets, 1)  # extra dimension at end for quantiles:median
-        targets = torch.as_tensor(targets, dtype=torch.float32)
+    targets = get_sample_targets(
+        df=df, origin_index=origin_index, n_forecasts=n_forecasts, max_lags=max_lags, predict_mode=predict_mode
+    )
 
     # TIME: the time at each sample's lags and forecasts
     if max_lags == 0:
-        inputs["time"] = df.at[origin_index, "t"]
-        inputs["time"] = np.expand_dims(inputs["time"], 0)
-        inputs["time"] = torch.tensor(inputs["time"], dtype=torch.float32)
-
+        t = df.at[origin_index, "t"]
+        inputs["time"] = torch.tensor(np.expand_dims(t, 0), dtype=torch.float32)
     else:
         # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
         # Note: df.loc is inclusive of slice end, while df.iloc is not.
-        inputs["time"] = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
-        inputs["time"] = torch.as_tensor(inputs["time"], dtype=torch.float32)
+        t = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
+        inputs["time"] = torch.as_tensor(t, dtype=torch.float32)
 
     # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
     if n_lags >= 1 and "y_scaled" in df.columns:
         # Note: df.loc is inclusive of slice end, while df.iloc is not.
-        inputs["lags"] = df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values
-        inputs["lags"] = torch.as_tensor(inputs["lags"], dtype=torch.float32)
+        lags = df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values
+        inputs["lags"] = torch.as_tensor(lags, dtype=torch.float32)
 
     # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
     if config_lagged_regressors is not None and max_lags > 0:
-        lagged_regressors = OrderedDict({})
-        # Future TODO: optimize this computation for many lagged_regressors
-        for lagged_reg in df.columns:
-            if lagged_reg in config_lagged_regressors:
-                covar_lags = config_lagged_regressors[lagged_reg].n_lags
-                assert covar_lags > 0
-                # Note: df.loc is inclusive of slice end, while df.iloc is not.
-                lagged_regressors[lagged_reg] = df.loc[origin_index - covar_lags + 1 : origin_index, lagged_reg].values
-                lagged_regressors[lagged_reg] = torch.as_tensor(lagged_regressors[lagged_reg], dtype=torch.float32)
-        inputs["covariates"] = lagged_regressors
-
-    # SEASONALITIES
-    # TODO: precompute and save fourier features and only tabularize / slide windows when calling __getitem__
+        inputs["covariates"] = get_sample_lagged_regressors(
+            df=df, origin_index=origin_index, config_lagged_regressors=config_lagged_regressors
+        )
+
+    # SEASONALITIES_
     if config_seasonality is not None:
-        seasonalities = OrderedDict({})
-        if max_lags == 0:
-            dates = pd.Series(df.at[origin_index, "ds"])
-        else:
-            # Note: df.loc is inclusive of slice end, while df.iloc is not.
-            dates = pd.Series(df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "ds"].values)
-        # Seasonality features
-        for name, period in config_seasonality.periods.items():
-            if period.resolution > 0:
-                if config_seasonality.computation == "fourier":
-                    # Compute Fourier series components with the specified frequency and order.
-                    # convert to days since epoch
-                    t = np.array((dates - datetime(1900, 1, 1)).dt.total_seconds().astype(np.float32)) / (3600 * 24.0)
-                    # features: Matrix with dims (length len(dates), 2*resolution)
-                    features = np.column_stack(
-                        [np.sin(2.0 * (i + 1) * np.pi * t / period.period) for i in range(period.resolution)]
-                        + [np.cos(2.0 * (i + 1) * np.pi * t / period.period) for i in range(period.resolution)]
-                    )
-                else:
-                    raise NotImplementedError
-                if period.condition_name is not None:
-                    # multiply seasonality features with condition mask/values
-                    if max_lags == 0:
-                        condition_values = pd.Series(df.at[origin_index, period.condition_name]).values[:, np.newaxis]
-                    else:
-                        condition_values = df.loc[
-                            origin_index - n_lags + 1 : origin_index + n_forecasts, period.condition_name
-                        ].values[:, np.newaxis]
-                    features = features * condition_values
-                seasonalities[name] = torch.as_tensor(features, dtype=torch.float32)
-        inputs["seasonalities"] = seasonalities
+        inputs["seasonalities"] = get_sample_seasonalities(
+            df=df,
+            origin_index=origin_index,
+            n_forecasts=n_forecasts,
+            max_lags=max_lags,
+            n_lags=n_lags,
+            config_seasonality=config_seasonality,
+        )
 
     # FUTURE REGRESSORS: get the future regressors features
     # create numpy array of values of additive and multiplicative regressors, at correct indexes
     # features dims: (n_forecasts, n_features)
     any_future_regressors = 0 < len(additive_regressors_names + multiplicative_regressors_names)
     if any_future_regressors:  # if config_regressors is not None:
-        regressors = OrderedDict({})
-        if max_lags == 0:
-            if len(additive_regressors_names) > 0:
-                features = df.loc[origin_index, additive_regressors_names].values
-                regressors["additive"] = torch.as_tensor(
-                    np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
-                )
-            if len(multiplicative_regressors_names) > 0:
-                features = df.loc[origin_index, multiplicative_regressors_names].values
-                regressors["multiplicative"] = torch.as_tensor(
-                    np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
-                )
-        else:
-            if len(additive_regressors_names) > 0:
-                features = df.loc[
-                    origin_index + 1 - n_lags : origin_index + n_forecasts, additive_regressors_names
-                ].values
-                regressors["additive"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
-            if len(multiplicative_regressors_names) > 0:
-                features = df.loc[
-                    origin_index + 1 - n_lags : origin_index + n_forecasts, multiplicative_regressors_names
-                ].values
-                regressors["multiplicative"] = torch.as_tensor(
-                    np.array(features, dtype=np.float32), dtype=torch.float32
-                )
-        inputs["regressors"] = regressors
+        inputs["regressors"] = get_sample_future_regressors(
+            df=df,
+            origin_index=origin_index,
+            n_forecasts=n_forecasts,
+            max_lags=max_lags,
+            n_lags=n_lags,
+            additive_regressors_names=additive_regressors_names,
+            multiplicative_regressors_names=multiplicative_regressors_names,
+        )
 
     # FUTURE EVENTS: get the events features
     # create numpy array of values of additive and multiplicative events, at correct indexes
     # features dims: (n_forecasts, n_features)
     any_events = 0 < len(additive_event_and_holiday_names + multiplicative_event_and_holiday_names)
     if any_events:
-        events = OrderedDict({})
-        if max_lags == 0:
-            # forecasts are at origin_index
-            if len(additive_event_and_holiday_names) > 0:
-                features = df.loc[origin_index, additive_event_and_holiday_names].values
-                events["additive"] = torch.as_tensor(
-                    np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
-                )
-            if len(multiplicative_event_and_holiday_names) > 0:
-                features = df.loc[origin_index, multiplicative_event_and_holiday_names].values
-                events["multiplicative"] = torch.as_tensor(
-                    np.expand_dims(np.array(features, dtype=np.float32), axis=0), dtype=torch.float32
-                )
-        else:
-            # forecasts are at origin_index + 1 up to origin_index + n_forecasts
-            if len(additive_event_and_holiday_names) > 0:
-                features = df.loc[
-                    origin_index + 1 - n_lags : origin_index + n_forecasts, additive_event_and_holiday_names
-                ].values
-                events["additive"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
-
-            if len(multiplicative_event_and_holiday_names) > 0:
-                features = df.loc[
-                    origin_index + 1 - n_lags : origin_index + n_forecasts, multiplicative_event_and_holiday_names
-                ].values
-                events["multiplicative"] = torch.as_tensor(np.array(features, dtype=np.float32), dtype=torch.float32)
-        inputs["events"] = events
+        inputs["events"] = get_sample_future_events(
+            df=df,
+            origin_index=origin_index,
+            n_forecasts=n_forecasts,
+            max_lags=max_lags,
+            n_lags=n_lags,
+            additive_event_and_holiday_names=additive_event_and_holiday_names,
+            multiplicative_event_and_holiday_names=multiplicative_event_and_holiday_names,
+        )
 
     # ONLY FOR DEBUGGING
-    # tabularized_input_shapes_str = ""
-    # for key, value in inputs.items():
-    #     if key in [
-    #         "seasonalities",
-    #         "covariates",
-    #         "events",
-    #         "regressors",
-    #     ]:
-    #         for name, period_features in value.items():
-    #             tabularized_input_shapes_str += f"    {name} {key} {period_features}\n"
-    #     else:
-    #         tabularized_input_shapes_str += f"    {key} {value.shape} \n"
-    # log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
+    if log.level == 0:
+        log_input_shapes(inputs)
     return inputs, targets
 
 

From 9fe44c4ec801829d8c5ba31dea9c8b4354e9b39f Mon Sep 17 00:00:00 2001
From: Simon W <simon.wittner@gmx.net>
Date: Fri, 2 Feb 2024 12:15:53 -0800
Subject: [PATCH 069/128] daily_data

---
 tests/test_model_performance.py | 174 ++++++++++++++++----------------
 1 file changed, 85 insertions(+), 89 deletions(-)

diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index d741153c7..8c68bddf8 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -325,92 +325,88 @@ def test_EnergyDailyDeep():
 
 
 # TODO: adapt to hourly dataset with multiple IDs
-# def test_EnergyPerformance():
-#     ### Temporary Test for on-the-fly sampling - very time consuming!
-
-#     df = pd.read_csv(ENERGY_PRICE_DAILY_FILE)
-#     df = df[df["ds"] < "2018-01-01"]
-#     df["temp"] = df["temperature"]
-#     df["ds"] = pd.to_datetime(df["ds"])
-#     df["y"] = pd.to_numeric(df["y"], errors="coerce")
-#     df["ID"] = "test"
-
-#     # Conditional Seasonality
-#     df["winter"] = np.where(
-#         df["ds"].dt.month.isin(
-#             [
-#                 10,
-#                 11,
-#                 12,
-#                 1,
-#                 2,
-#                 3,
-#             ]
-#         ),
-#         1,
-#         0,
-#     )
-#     df["summer"] = np.where(df["ds"].dt.month.isin([4, 5, 6, 7, 8, 9]), 1, 0)
-#     df["winter"] = pd.to_numeric(df["winter"], errors="coerce")
-#     df["summer"] = pd.to_numeric(df["summer"], errors="coerce")
-
-#     # Normalize Temperature
-#     df["temp"] = (df["temp"] - 65.0) / 50.0
-
-#     # df
-#     df = df[["ID", "ds", "y", "temp", "winter", "summer"]]
-
-#     # Hyperparameter
-#     tuned_params = {
-#         "n_lags": 24 * 15,
-#         "newer_samples_weight": 2.0,
-#         "n_changepoints": 0,
-#         "yearly_seasonality": 10,
-#         "weekly_seasonality": True,
-#         "daily_seasonality": False,  # due to conditional daily seasonality
-#         "batch_size": 128,
-#         "ar_layers": [32, 64, 32, 16],
-#         "lagged_reg_layers": [32, 32],
-#         # not tuned
-#         "n_forecasts": 33,
-#         "learning_rate": 0.001,
-#         "epochs": 30,
-#         "trend_global_local": "global",
-#         "season_global_local": "global",
-#         "drop_missing": True,
-#         "normalize": "standardize",
-#     }
-
-#     # Uncertainty Quantification
-#     confidence_lv = 0.98
-#     quantile_list = [round(((1 - confidence_lv) / 2), 2), round((confidence_lv + (1 - confidence_lv) / 2), 2)]
-
-#     # Check if GPU is available
-#     use_gpu = torch.cuda.is_available()
-
-#     # Set trainer configuration
-#     trainer_configs = {
-#         "accelerator": "gpu" if use_gpu else "cpu",
-#     }
-#     print(f"Using {'GPU' if use_gpu else 'CPU'}")
-
-#     # Model
-#     m = NeuralProphet(**tuned_params, **trainer_configs, quantiles=quantile_list)
-
-#     # Lagged Regressor
-#     m.add_lagged_regressor(names="temp", n_lags=33, normalize="standardize")
-
-#     # Conditional Seasonality
-#     m.add_seasonality(name="winter", period=1, fourier_order=6, condition_name="winter")
-#     m.add_seasonality(name="summer", period=1, fourier_order=6, condition_name="summer")
-
-#     # Holidays
-#     m.add_country_holidays(country_name="US", lower_window=-1, upper_window=1)
-
-#     # Split
-#     df_train = df[df["ds"] < "2016-05-01"]
-#     df_test = df[df["ds"] >= "2016-05-01"]
-
-#     # Training & Predict
-#     _ = m.fit(df=df_train, freq="H", num_workers=4, early_stopping=True)
-#     _ = m.predict(df_test)
+def test_EnergyHourlyDeep():
+    ### Temporary Test for on-the-fly sampling - very time consuming!
+
+    df = pd.read_csv(ENERGY_PRICE_DAILY_FILE)
+    df["temp"] = df["temperature"]
+    df = df.drop(columns="temperature")
+    df["ds"] = pd.to_datetime(df["ds"])
+    df["y"] = pd.to_numeric(df["y"], errors="coerce")
+
+    df = df.drop("ds", axis=1)
+    df['ds'] = pd.date_range(start="2015-01-01 00:00:00", periods=len(df), freq="H")
+    df["ID"] = "test"
+
+    df_id = df[['ds', 'y', 'temp']].copy()
+    df_id['ID'] = "test2"
+    df_id['y'] = df_id['y'] * 0.3
+    df_id['temp'] = df_id['temp'] * 0.4
+    df = pd.concat([df, df_id], ignore_index=True)
+
+    # Conditional Seasonality
+    df["winter"] = np.where(df["ds"].dt.month.isin([1]), 1, 0,)
+    df["summer"] = np.where(df["ds"].dt.month.isin([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), 1, 0)
+    df["winter"] = pd.to_numeric(df["winter"], errors="coerce")
+    df["summer"] = pd.to_numeric(df["summer"], errors="coerce")
+
+    # Normalize Temperature
+    df["temp"] = (df["temp"] - 65.0) / 50.0
+
+    # df
+    df = df[["ID", "ds", "y", "temp", "winter", "summer"]]
+
+    # Hyperparameter
+    tuned_params = {
+        "n_lags": 24 * 15,
+        "newer_samples_weight": 2.0,
+        "n_changepoints": 0,
+        "yearly_seasonality": 10,
+        "weekly_seasonality": True,
+        "daily_seasonality": False,  # due to conditional daily seasonality
+        "batch_size": 128,
+        "ar_layers": [32, 64, 32, 16],
+        "lagged_reg_layers": [32, 32],
+        # not tuned
+        "n_forecasts": 33,
+        "learning_rate": 0.001,
+        "epochs": 30,
+        "trend_global_local": "global",
+        "season_global_local": "global",
+        "drop_missing": True,
+        "normalize": "standardize",
+    }
+
+    # Uncertainty Quantification
+    confidence_lv = 0.98
+    quantile_list = [round(((1 - confidence_lv) / 2), 2), round((confidence_lv + (1 - confidence_lv) / 2), 2)]
+
+    # Check if GPU is available
+    use_gpu = torch.cuda.is_available()
+
+    # Set trainer configuration
+    trainer_configs = {
+        "accelerator": "gpu" if use_gpu else "cpu",
+    }
+    print(f"Using {'GPU' if use_gpu else 'CPU'}")
+
+    # Model
+    m = NeuralProphet(**tuned_params, **trainer_configs, quantiles=quantile_list)
+
+    # Lagged Regressor
+    m.add_lagged_regressor(names="temp", n_lags=33, normalize="standardize")
+
+    # Conditional Seasonality
+    m.add_seasonality(name="winter", period=1, fourier_order=6, condition_name="winter")
+    m.add_seasonality(name="summer", period=1, fourier_order=6, condition_name="summer")
+
+    # Holidays
+    m.add_country_holidays(country_name="US", lower_window=-1, upper_window=1)
+
+    # Split
+    df_train = df[df["ds"] < "2015-03-01"]
+    df_test = df[df["ds"] >= "2015-03-01"]
+
+    # Training & Predict
+    _ = m.fit(df=df_train, freq="H", num_workers=4, early_stopping=True)
+    _ = m.predict(df_test)

From 7d84b37d8d2bd0b6bf0af5f50baeb74499f7d7d1 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 6 Feb 2024 16:29:40 -0800
Subject: [PATCH 070/128] start nan check for smaple mask

---
 neuralprophet/time_dataset.py | 162 +++++++++++++++++++++++++---------
 1 file changed, 119 insertions(+), 43 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 9ad350ce5..614c9b4ed 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -6,6 +6,7 @@
 import numpy as np
 import pandas as pd
 import torch
+from numpy.lib.stride_tricks import sliding_window_view
 from torch.utils.data.dataset import Dataset
 
 from neuralprophet import configure, utils
@@ -76,7 +77,8 @@ def __init__(
         self.config_missing = config_missing
 
         self.max_lags = get_max_num_lags(n_lags=self.n_lags, config_lagged_regressors=self.config_lagged_regressors)
-
+        if self.max_lags == 0:
+            assert self.n_forecasts == 1
         self.two_level_inputs = ["seasonalities", "covariates", "events", "regressors"]
 
         # Preprocessing of events and holidays features (added to self.df)
@@ -154,7 +156,6 @@ def __len__(self):
 
     def sample_index_to_df_index(self, sample_index):
         """Translates a single outer sample to dataframe index"""
-        # Will need more sophisticated mapping for GlobalTimeDataset
         return self.sample2index_map[sample_index]
 
     def create_sample2index_map(self, df):
@@ -174,16 +175,29 @@ def create_sample2index_map(self, df):
         # analogous to `self.filter_samples_after_init(self.kwargs["prediction_frequency"])`
         prediction_frequency_mask = create_prediction_frequency_filter_mask(df, self.prediction_frequency)
 
+        # Combine prediction origin masks
+        valid_prediction_mask = np.logical_and(prediction_frequency_mask, origin_start_end_mask)
+
         # TODO Create NAN-free index mapping of sample index to df index
         # analogous to `self.drop_nan_after_init(
         # self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
         nan_mask = create_nan_mask(
-            df, self.predict_steps, self.config_missing.drop_missing
+            df=df,
+            predict_steps=self.predict_steps,
+            drop_missing=self.config_missing.drop_missing,
+            n_lags=self.n_lags,
         )  # boolean array where NAN are False
 
-        # Combine masks
-        mask = np.logical_and(prediction_frequency_mask, origin_start_end_mask)
-        valid_sample_mask = np.logical_and(mask, nan_mask)
+        # Filter NAN
+        valid_sample_mask = np.logical_and(valid_prediction_mask, nan_mask)
+        n_clean_data_samples = sum(valid_prediction_mask)
+        n_real_data_samples = sum(valid_sample_mask)
+        nan_samples_to_drop = n_clean_data_samples - n_real_data_samples
+        if nan_samples_to_drop > 0 and not self.config_missing.drop_missing:
+            raise ValueError(
+                f"NANs found. {nan_samples_to_drop} samples affected. Set `drop_missing` to `True` to drop these samples."
+            )
+
         # Convert boolean valid_sample to list of the positinal index of all true/one entries
         #   e.g. [0,0,1,1,0,1,0] -> [2,3,5]
         index_range = np.arange(0, df_length)
@@ -445,13 +459,11 @@ def tabularize_univariate_datetime_single_index(
             Targets to be predicted of same length as each of the model inputs, dims: (n_forecasts, 1)
     """
     # TODO: pre-process all type conversions (e.g. torch.float32) in __init__
+    # Note: if max_lags == 0, then n_forecasts == 1
 
     # sample features are stored and returned in OrderedDict
     inputs = OrderedDict({})
 
-    if max_lags == 0:
-        assert n_forecasts == 1
-
     targets = get_sample_targets(
         df=df, origin_index=origin_index, n_forecasts=n_forecasts, max_lags=max_lags, predict_mode=predict_mode
     )
@@ -598,38 +610,6 @@ def get_event_offset_features(event, config, feature):
     return events
 
 
-def _create_event_offset_features(event, config, feature, additive_events, multiplicative_events):
-    """
-    Create event offset features for the given event, config and feature
-    Parameters
-    ----------
-        event : str
-            Name of the event
-        config : configure.ConfigEvents
-            User specified events, holidays, and country specific holidays
-        feature : pd.Series
-            Feature for the event
-        additive_events : pd.DataFrame
-            Dataframe of additive events
-        multiplicative_events : pd.DataFrame
-            Dataframe of multiplicative events
-    Returns
-    -------
-        tuple
-            Tuple of additive_events and multiplicative_events
-    """
-    lw = config.lower_window
-    uw = config.upper_window
-    mode = config.mode
-    for offset in range(lw, uw + 1):
-        key = utils.create_event_names_for_offsets(event, offset)
-        offset_feature = feature.shift(periods=offset, fill_value=0.0)
-        if mode == "additive":
-            additive_events[key] = offset_feature
-        else:
-            multiplicative_events[key] = offset_feature
-
-
 def add_event_features_to_df(
     df,
     config_events: Optional[configure.ConfigEvents] = None,
@@ -759,7 +739,7 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
     return mask
 
 
-def create_nan_mask(df, predict_steps, drop_missing):
+def create_nan_mask(df, predict_steps, drop_missing, predict_mode, max_lags, n_lags, n_forecasts):
     """Creates mask for each prediction origin,
     accounting for corresponding input lags / forecast targets containing any NaN values.
 
@@ -770,9 +750,105 @@ def create_nan_mask(df, predict_steps, drop_missing):
         predict_steps : int
             number of steps to predict
     """
+    # check y: lags:
+    non_nan = np.ones(len(df), dtype=bool)
+    df_isna = df.isna()
+    if n_lags > 0:
+        # boolean vector, starting at origin_index = n_lags -1
+        y_lags_nan = sliding_window_view(df_isna["y_scaled"], window_shape=n_lags, axis=0).any(axis=-1)
+        # fill first n_lags -1 positions with True
+        y_lags_nan = np.pad(y_lags_nan, pad_width=(n_lags - 1, 0), mode="constant", constant_values=True)
+        y_lags_valid = np.logical_not(y_lags_nan)
+        non_nan = np.logical_and(non_nan, y_lags_valid)
+
+    # Targets
+    if predict_mode:
+        targets_valid = np.ones(len(df), dtype=bool)
+    else:
+        if n_forecasts == 1:
+            if max_lags == 0:  # y-series and origin index match
+                targets_valid = np.logical_not(df_isna["y_scaled"].values)
+            if max_lags > 0:
+                targets_nan = df_isna.loc[1:, "y_scaled"].values
+                targets_nan = np.pad(targets_nan, pad_width=(1, 0), mode="constant", constant_values=True)
+                targets_valid = np.logical_not(targets_nan)
+        else:
+            targets_nan = sliding_window_view(df_isna["y_scaled"], window_shape=n_forecasts, axis=0).any(axis=-1)
+            # first entry corresponds to origin_index -1, drop this.
+            targets_nan = targets_nan[1:]
+            # pad last n_forecasts as missing, as forecast origins will have missing forecast-targets there.
+            targets_nan = np.pad(targets_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
+            targets_valid = np.logical_not(targets_nan)
+
+    non_nan = np.logical_and(non_nan, targets_valid)
+    return non_nan
+
+    # TIME: the time at each sample's lags and forecasts
+    if max_lags == 0:
+        t = df.at[origin_index, "t"]
+        inputs["time"] = torch.tensor(np.expand_dims(t, 0), dtype=torch.float32)
+    else:
+        # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
+        # Note: df.loc is inclusive of slice end, while df.iloc is not.
+        t = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
+        inputs["time"] = torch.as_tensor(t, dtype=torch.float32)
+
+    # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
+    if n_lags >= 1 and "y_scaled" in df.columns:
+        # Note: df.loc is inclusive of slice end, while df.iloc is not.
+        lags = df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values
+        inputs["lags"] = torch.as_tensor(lags, dtype=torch.float32)
+
+    # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
+    if config_lagged_regressors is not None and max_lags > 0:
+        inputs["covariates"] = get_sample_lagged_regressors(
+            df=df, origin_index=origin_index, config_lagged_regressors=config_lagged_regressors
+        )
+
+    # SEASONALITIES_
+    if config_seasonality is not None:
+        inputs["seasonalities"] = get_sample_seasonalities(
+            df=df,
+            origin_index=origin_index,
+            n_forecasts=n_forecasts,
+            max_lags=max_lags,
+            n_lags=n_lags,
+            config_seasonality=config_seasonality,
+        )
+
+    # FUTURE REGRESSORS: get the future regressors features
+    # create numpy array of values of additive and multiplicative regressors, at correct indexes
+    # features dims: (n_forecasts, n_features)
+    any_future_regressors = 0 < len(additive_regressors_names + multiplicative_regressors_names)
+    if any_future_regressors:  # if config_regressors is not None:
+        inputs["regressors"] = get_sample_future_regressors(
+            df=df,
+            origin_index=origin_index,
+            n_forecasts=n_forecasts,
+            max_lags=max_lags,
+            n_lags=n_lags,
+            additive_regressors_names=additive_regressors_names,
+            multiplicative_regressors_names=multiplicative_regressors_names,
+        )
+
+    # FUTURE EVENTS: get the events features
+    # create numpy array of values of additive and multiplicative events, at correct indexes
+    # features dims: (n_forecasts, n_features)
+    any_events = 0 < len(additive_event_and_holiday_names + multiplicative_event_and_holiday_names)
+    if any_events:
+        inputs["events"] = get_sample_future_events(
+            df=df,
+            origin_index=origin_index,
+            n_forecasts=n_forecasts,
+            max_lags=max_lags,
+            n_lags=n_lags,
+            additive_event_and_holiday_names=additive_event_and_holiday_names,
+            multiplicative_event_and_holiday_names=multiplicative_event_and_holiday_names,
+        )
+
     # IMPORTANT !!
     # TODO implement actual filtering
-    return np.ones(len(df), dtype=bool)
+    # return np.ones(len(df), dtype=bool)
 
     # Create index mapping of sample index to df index
     # - Filter missing samples (does not actually drop, but creates indexmapping)

From 79ad0e70710cefa8b2cb0bc4c88ca07d0e8e35e9 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Tue, 6 Feb 2024 16:38:33 -0800
Subject: [PATCH 071/128] working on time nan2

---
 neuralprophet/time_dataset.py | 48 ++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 614c9b4ed..1929816b6 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -763,24 +763,38 @@ def create_nan_mask(df, predict_steps, drop_missing, predict_mode, max_lags, n_l
 
     # Targets
     if predict_mode:
+        # Targets not needed
         targets_valid = np.ones(len(df), dtype=bool)
     else:
-        if n_forecasts == 1:
-            if max_lags == 0:  # y-series and origin index match
-                targets_valid = np.logical_not(df_isna["y_scaled"].values)
-            if max_lags > 0:
+        if max_lags == 0:  # y-series and origin index match
+            targets_valid = np.logical_not(df_isna["y_scaled"].values)
+        else:
+            if n_forecasts == 1:
                 targets_nan = df_isna.loc[1:, "y_scaled"].values
-                targets_nan = np.pad(targets_nan, pad_width=(1, 0), mode="constant", constant_values=True)
+                targets_nan = np.pad(targets_nan, pad_width=(0, 1), mode="constant", constant_values=True)
                 targets_valid = np.logical_not(targets_nan)
-        else:
-            targets_nan = sliding_window_view(df_isna["y_scaled"], window_shape=n_forecasts, axis=0).any(axis=-1)
-            # first entry corresponds to origin_index -1, drop this.
-            targets_nan = targets_nan[1:]
-            # pad last n_forecasts as missing, as forecast origins will have missing forecast-targets there.
-            targets_nan = np.pad(targets_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
-            targets_valid = np.logical_not(targets_nan)
-
-    non_nan = np.logical_and(non_nan, targets_valid)
+            else:  # This is also correct for n_forecasts == 1, but slower.
+                targets_nan = sliding_window_view(df_isna["y_scaled"], window_shape=n_forecasts, axis=0).any(axis=-1)
+                # first entry corresponds to origin_index -1, drop this.
+                targets_nan = targets_nan[1:]
+                # pad last n_forecasts as missing, as forecast origins will have missing forecast-targets there.
+                targets_nan = np.pad(targets_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
+                targets_valid = np.logical_not(targets_nan)
+        non_nan = np.logical_and(non_nan, targets_valid)
+
+    # TIME: the time at each sample's lags and forecasts
+    if max_lags == 0:  # y-series and origin_index match
+        time_valid = np.logical_not(df_isna["t"].values)
+    else:
+        # TODO: sliding_window_view and pad operations.
+        time_valid = np.ones(len(df), dtype=bool)
+        ## inspiration from tabularization:
+        # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
+        # Note: df.loc is inclusive of slice end, while df.iloc is not.
+        # t = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
+        # inputs["time"] = torch.as_tensor(t, dtype=torch.float32)
+    non_nan = np.logical_and(non_nan, time_valid)
+
     return non_nan
 
     # TIME: the time at each sample's lags and forecasts
@@ -793,12 +807,6 @@ def create_nan_mask(df, predict_steps, drop_missing, predict_mode, max_lags, n_l
         t = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
         inputs["time"] = torch.as_tensor(t, dtype=torch.float32)
 
-    # LAGS: From y-series, extract preceeding n_lags steps up to and including origin_index
-    if n_lags >= 1 and "y_scaled" in df.columns:
-        # Note: df.loc is inclusive of slice end, while df.iloc is not.
-        lags = df.loc[origin_index - n_lags + 1 : origin_index, "y_scaled"].values
-        inputs["lags"] = torch.as_tensor(lags, dtype=torch.float32)
-
     # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
     if config_lagged_regressors is not None and max_lags > 0:
         inputs["covariates"] = get_sample_lagged_regressors(

From 469b11c3d9f0170a83051441b67ef4c5de02f018 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 15:16:27 -0800
Subject: [PATCH 072/128] fix tests

---
 neuralprophet/time_dataset.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 1929816b6..1e9993ece 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -185,7 +185,10 @@ def create_sample2index_map(self, df):
             df=df,
             predict_steps=self.predict_steps,
             drop_missing=self.config_missing.drop_missing,
+            predict_mode=self.predict_mode,
+            max_lags=self.max_lags,
             n_lags=self.n_lags,
+            n_forecasts=self.n_forecasts,
         )  # boolean array where NAN are False
 
         # Filter NAN

From 38f70fad412b8f3947ef88fcc53bbf5da1500fc2 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 17:08:17 -0800
Subject: [PATCH 073/128] finish nan-check

---
 neuralprophet/time_dataset.py | 278 +++++++++++++++++-----------------
 1 file changed, 139 insertions(+), 139 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 1e9993ece..acde711ea 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -189,6 +189,9 @@ def create_sample2index_map(self, df):
             max_lags=self.max_lags,
             n_lags=self.n_lags,
             n_forecasts=self.n_forecasts,
+            config_lagged_regressors=self.config_lagged_regressors,
+            future_regressor_names=self.additive_regressors_names + self.multiplicative_regressors_names,
+            event_names=self.additive_event_and_holiday_names + self.multiplicative_event_and_holiday_names,
         )  # boolean array where NAN are False
 
         # Filter NAN
@@ -272,13 +275,13 @@ def get_sample_targets(df, origin_index, n_forecasts, max_lags, predict_mode):
 def get_sample_lagged_regressors(df, origin_index, config_lagged_regressors):
     lagged_regressors = OrderedDict({})
     # Future TODO: optimize this computation for many lagged_regressors
-    for lagged_reg in df.columns:
-        if lagged_reg in config_lagged_regressors:
-            covar_lags = config_lagged_regressors[lagged_reg].n_lags
+    for name in df.columns:
+        if name in config_lagged_regressors:
+            covar_lags = config_lagged_regressors[name].n_lags
             assert covar_lags > 0
             # Note: df.loc is inclusive of slice end, while df.iloc is not.
-            lagged_regressors[lagged_reg] = df.loc[origin_index - covar_lags + 1 : origin_index, lagged_reg].values
-            lagged_regressors[lagged_reg] = torch.as_tensor(lagged_regressors[lagged_reg], dtype=torch.float32)
+            lagged_regressors[name] = df.loc[origin_index - covar_lags + 1 : origin_index, name].values
+            lagged_regressors[name] = torch.as_tensor(lagged_regressors[name], dtype=torch.float32)
     return lagged_regressors
 
 
@@ -488,7 +491,7 @@ def tabularize_univariate_datetime_single_index(
         inputs["lags"] = torch.as_tensor(lags, dtype=torch.float32)
 
     # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
-    if config_lagged_regressors is not None and max_lags > 0:
+    if config_lagged_regressors is not None:  # and max_lags > 0:
         inputs["covariates"] = get_sample_lagged_regressors(
             df=df, origin_index=origin_index, config_lagged_regressors=config_lagged_regressors
         )
@@ -742,7 +745,18 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
     return mask
 
 
-def create_nan_mask(df, predict_steps, drop_missing, predict_mode, max_lags, n_lags, n_forecasts):
+def create_nan_mask(
+    df,
+    predict_steps,
+    drop_missing,
+    predict_mode,
+    max_lags,
+    n_lags,
+    n_forecasts,
+    config_lagged_regressors,
+    future_regressor_names,
+    event_names,
+):
     """Creates mask for each prediction origin,
     accounting for corresponding input lags / forecast targets containing any NaN values.
 
@@ -753,18 +767,10 @@ def create_nan_mask(df, predict_steps, drop_missing, predict_mode, max_lags, n_l
         predict_steps : int
             number of steps to predict
     """
-    # check y: lags:
-    non_nan = np.ones(len(df), dtype=bool)
+    valid_origins = np.ones(len(df), dtype=bool)
     df_isna = df.isna()
-    if n_lags > 0:
-        # boolean vector, starting at origin_index = n_lags -1
-        y_lags_nan = sliding_window_view(df_isna["y_scaled"], window_shape=n_lags, axis=0).any(axis=-1)
-        # fill first n_lags -1 positions with True
-        y_lags_nan = np.pad(y_lags_nan, pad_width=(n_lags - 1, 0), mode="constant", constant_values=True)
-        y_lags_valid = np.logical_not(y_lags_nan)
-        non_nan = np.logical_and(non_nan, y_lags_valid)
 
-    # Targets
+    # TARGETS
     if predict_mode:
         # Targets not needed
         targets_valid = np.ones(len(df), dtype=bool)
@@ -773,7 +779,7 @@ def create_nan_mask(df, predict_steps, drop_missing, predict_mode, max_lags, n_l
             targets_valid = np.logical_not(df_isna["y_scaled"].values)
         else:
             if n_forecasts == 1:
-                targets_nan = df_isna.loc[1:, "y_scaled"].values
+                targets_nan = df_isna["y_scaled"].values[1:]
                 targets_nan = np.pad(targets_nan, pad_width=(0, 1), mode="constant", constant_values=True)
                 targets_valid = np.logical_not(targets_nan)
             else:  # This is also correct for n_forecasts == 1, but slower.
@@ -783,129 +789,123 @@ def create_nan_mask(df, predict_steps, drop_missing, predict_mode, max_lags, n_l
                 # pad last n_forecasts as missing, as forecast origins will have missing forecast-targets there.
                 targets_nan = np.pad(targets_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
                 targets_valid = np.logical_not(targets_nan)
-        non_nan = np.logical_and(non_nan, targets_valid)
-
-    # TIME: the time at each sample's lags and forecasts
-    if max_lags == 0:  # y-series and origin_index match
-        time_valid = np.logical_not(df_isna["t"].values)
-    else:
-        # TODO: sliding_window_view and pad operations.
-        time_valid = np.ones(len(df), dtype=bool)
-        ## inspiration from tabularization:
-        # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
-        # Note: df.loc is inclusive of slice end, while df.iloc is not.
-        # t = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
-        # inputs["time"] = torch.as_tensor(t, dtype=torch.float32)
-    non_nan = np.logical_and(non_nan, time_valid)
-
-    return non_nan
-
-    # TIME: the time at each sample's lags and forecasts
-    if max_lags == 0:
-        t = df.at[origin_index, "t"]
-        inputs["time"] = torch.tensor(np.expand_dims(t, 0), dtype=torch.float32)
-    else:
-        # extract time value of n_lags steps before  and icluding origin_index and n_forecasts steps after origin_index
-        # Note: df.loc is inclusive of slice end, while df.iloc is not.
-        t = df.loc[origin_index - n_lags + 1 : origin_index + n_forecasts, "t"].values
-        inputs["time"] = torch.as_tensor(t, dtype=torch.float32)
-
-    # COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
-    if config_lagged_regressors is not None and max_lags > 0:
-        inputs["covariates"] = get_sample_lagged_regressors(
-            df=df, origin_index=origin_index, config_lagged_regressors=config_lagged_regressors
-        )
-
-    # SEASONALITIES_
-    if config_seasonality is not None:
-        inputs["seasonalities"] = get_sample_seasonalities(
-            df=df,
-            origin_index=origin_index,
-            n_forecasts=n_forecasts,
-            max_lags=max_lags,
-            n_lags=n_lags,
-            config_seasonality=config_seasonality,
-        )
+    valid_origins = np.logical_and(valid_origins, targets_valid)
 
-    # FUTURE REGRESSORS: get the future regressors features
-    # create numpy array of values of additive and multiplicative regressors, at correct indexes
-    # features dims: (n_forecasts, n_features)
-    any_future_regressors = 0 < len(additive_regressors_names + multiplicative_regressors_names)
-    if any_future_regressors:  # if config_regressors is not None:
-        inputs["regressors"] = get_sample_future_regressors(
-            df=df,
-            origin_index=origin_index,
-            n_forecasts=n_forecasts,
-            max_lags=max_lags,
-            n_lags=n_lags,
-            additive_regressors_names=additive_regressors_names,
-            multiplicative_regressors_names=multiplicative_regressors_names,
-        )
-
-    # FUTURE EVENTS: get the events features
-    # create numpy array of values of additive and multiplicative events, at correct indexes
-    # features dims: (n_forecasts, n_features)
-    any_events = 0 < len(additive_event_and_holiday_names + multiplicative_event_and_holiday_names)
-    if any_events:
-        inputs["events"] = get_sample_future_events(
-            df=df,
-            origin_index=origin_index,
-            n_forecasts=n_forecasts,
-            max_lags=max_lags,
-            n_lags=n_lags,
-            additive_event_and_holiday_names=additive_event_and_holiday_names,
-            multiplicative_event_and_holiday_names=multiplicative_event_and_holiday_names,
-        )
-
-    # IMPORTANT !!
-    # TODO implement actual filtering
-    # return np.ones(len(df), dtype=bool)
-
-    # Create index mapping of sample index to df index
-    # - Filter missing samples (does not actually drop, but creates indexmapping)
-    # -- drop nan analogous to `self.drop_nan_after_init(self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
-    # Note: needs to also account for NANs in lagged inputs or in n_forecasts, not just first target.
-    # Implement a convolutional filter for targets and each lagged regressor.
-    # Also account for future regressors and events.
-
-    # Rewrite to return mask instead of filtering df:
-    nan_idx = []
-    # NaNs in inputs
-    for key, data in self.inputs.items():
-        if isinstance(data, torch.Tensor):
-            nans = torch.where(torch.isnan(data))[0].tolist()
-            if len(nans) > 0:
-                nan_idx += nans
-        elif isinstance(data, dict):
-            for subkey, subdata in data.items():
-                nans = torch.where(torch.isnan(subdata))[0].tolist()
-                if len(nans) > 0:
-                    nan_idx += nans
-
-    # NaNs in targets that are not inserted for prediction at the end
-    nans = torch.where(torch.isnan(self.targets))[0].tolist()
-    if len(nans) > 0:
-        for idx in nans:
-            if idx not in nan_idx and idx < len(self) - predict_steps:
-                nan_idx.append(idx)
-
-    nan_idx = list(set(nan_idx))
-    nan_idx.sort()
-    if drop_missing and len(nan_idx) > 0:
-        log.warning(f"{len(nan_idx)} samples with missing values were dropped from the data. ")
-        for key, data in self.inputs.items():
-            if key not in ["time", "lags"]:  # "time_lagged"
-                for name, features in data.items():
-                    self.inputs[key][name] = np.delete(self.inputs[key][name], nan_idx, 0)
+    # AR LAGS
+    if n_lags > 0:
+        # boolean vector, starting at origin_index = n_lags -1
+        y_lags_nan = sliding_window_view(df_isna["y_scaled"], window_shape=n_lags, axis=0).any(axis=-1)
+        # fill first n_lags -1 positions with True
+        # as there are missing lags for the corresponding origin_indexes
+        y_lags_nan = np.pad(y_lags_nan, pad_width=(n_lags - 1, 0), mode="constant", constant_values=True)
+        y_lags_valid = np.logical_not(y_lags_nan)
+        valid_origins = np.logical_and(valid_origins, y_lags_valid)
+
+    # LAGGED REGRESSORS
+    if config_lagged_regressors is not None:  # and max_lags > 0:
+        reg_lags_valid = np.ones(len(df), dtype=bool)
+        for name in df.columns:
+            if name in config_lagged_regressors:
+                n_reg_lags = config_lagged_regressors[name].n_lags
+                if n_reg_lags > 0:
+                    # boolean vector, starting at origin_index = n_lags -1
+                    reg_lags_nan = sliding_window_view(df_isna[name], window_shape=n_reg_lags, axis=0).any(axis=-1)
+                    # fill first n_reg_lags -1 positions with True,
+                    # as there are missing lags for the corresponding origin_indexes
+                    reg_lags_nan = np.pad(
+                        reg_lags_nan, pad_width=(n_reg_lags - 1, 0), mode="constant", constant_values=True
+                    )
+                    reg_lags_valid_i = np.logical_not(reg_lags_nan)
+                    reg_lags_valid = np.logical_and(reg_lags_valid, reg_lags_valid_i)
+        valid_origins = np.logical_and(valid_origins, reg_lags_valid)
+
+    # TIME: TREND & SEASONALITY: the time at each sample's lags and forecasts
+    # FUTURE REGRESSORS
+    # EVENTS
+    for names in [["t"], future_regressor_names, event_names]:
+        if len(names) > 0:
+            valid_columns = mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts)
+            valid_origins = np.logical_and(valid_origins, valid_columns)
+
+    # # TIME: TREND & SEASONALITY: the time at each sample's lags and forecasts
+    # if max_lags == 0:  # y-series and origin_index match
+    #     time_valid = np.logical_not(df_isna["t"].values)
+    # else:
+    #     time_nan = sliding_window_view(df_isna["t"], window_shape=n_lags+n_forecasts, axis=0).any(axis=-1)
+    #     # first sample is at origin_index = n_lags -1,
+    #     if n_lags == 0: # first sample origin index is at -1
+    #         time_nan = time_nan[1:]
+    #     else:
+    #         time_nan = np.pad(time_nan, pad_width=(n_lags-1, 0), mode="constant", constant_values=True)
+    #     # there are n_forecasts origin_indexes missing at end
+    #     time_nan = np.pad(time_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
+    #     time_valid = np.logical_not(time_nan)
+    # non_nan = np.logical_and(non_nan, time_valid)
+
+    # # FUTURE REGRESSORS
+    # if len(future_regressor_names) > 0:
+    #     if max_lags == 0:
+    #          fut_reg_nan = df_isna.loc[:, future_regressor_names]
+    #          assert len(fut_reg_nan.shape) == 2
+    #          fut_reg_nan = fut_reg_nan.any(axis=-1)
+    #     else:
+    #         fut_reg_nan = sliding_window_view(df_isna.loc[:, future_regressor_names], window_shape=n_lags+n_forecasts, axis=0).any(axis=-1)
+    #         assert len(fut_reg_nan.shape) == 2
+    #         fut_reg_nan = fut_reg_nan.any(axis=-1)
+    #         # first sample is at origin_index = n_lags -1,
+    #         if n_lags == 0: # first sample origin index is at -1
+    #             fut_reg_nan = fut_reg_nan[1:]
+    #         else:
+    #             fut_reg_nan = np.pad(fut_reg_nan, pad_width=(n_lags-1, 0), mode="constant", constant_values=True)
+    #         # there are n_forecasts origin_indexes missing at end
+    #         fut_reg_nan = np.pad(fut_reg_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
+    #     fut_reg_valid = np.logical_not(fut_reg_nan)
+    #     non_nan = np.logical_and(non_nan, fut_reg_valid)
+
+    # # EVENTS
+    # if len(event_names) > 0:
+    #     if max_lags == 0:
+    #          event_nan = df_isna.loc[:, event_names]
+    #          assert len(event_nan.shape) == 2
+    #          event_nan = event_nan.any(axis=-1)
+    #     else:
+    #         event_nan = sliding_window_view(df_isna.loc[:, event_names], window_shape=n_lags+n_forecasts, axis=0).any(axis=-1)
+    #         assert len(event_nan.shape) == 2
+    #         event_nan = event_nan.any(axis=-1)
+    #         # first sample is at origin_index = n_lags -1,
+    #         if n_lags == 0: # first sample origin index is at -1
+    #             event_nan = event_nan[1:]
+    #         else:
+    #             event_nan = np.pad(event_nan, pad_width=(n_lags-1, 0), mode="constant", constant_values=True)
+    #         # there are n_forecasts origin_indexes missing at end
+    #         event_nan = np.pad(event_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
+    #     event_valid = np.logical_not(event_nan)
+    #     non_nan = np.logical_and(non_nan, event_valid)
+
+    return valid_origins
+
+
+def mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts):
+    # assert len(names) > 0
+    contains_nan = df_isna.loc[:, names]
+    if len(contains_nan.shape) > 1:
+        assert len(contains_nan.shape) == 2
+        contains_nan = contains_nan.any(axis=-1)
+    if max_lags > 0:
+        if n_lags == 0 and n_forecasts == 1:
+            contains_nan = contains_nan[1:]
+            contains_nan = np.pad(contains_nan, pad_width=(0, 1), mode="constant", constant_values=True)
+        else:
+            contains_nan = sliding_window_view(contains_nan, window_shape=n_lags + n_forecasts, axis=0).any(axis=-1)
+            # first sample is at origin_index = n_lags -1,
+            if n_lags == 0:  # first sample origin index is at -1
+                contains_nan = contains_nan[1:]
             else:
-                self.inputs[key] = np.delete(self.inputs[key], nan_idx, 0)
-        self.targets = np.delete(self.targets, nan_idx, 0)
-        self.length = self.inputs["time"].shape[0]
-    if not drop_missing and len(nan_idx) > 0:
-        raise ValueError(
-            "Inputs/targets with missing values detected. "
-            "Please either adjust imputation parameters, or set 'drop_missing' to True to drop those samples."
-        )
+                contains_nan = np.pad(contains_nan, pad_width=(n_lags - 1, 0), mode="constant", constant_values=True)
+            # there are n_forecasts origin_indexes missing at end
+            contains_nan = np.pad(contains_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
+    valid_origins = np.logical_not(contains_nan)
+    return valid_origins
 
 
 def sort_regressor_names(config):

From cfb2562c921e15a01aa63747d08831526d65e1f9 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 17:12:14 -0800
Subject: [PATCH 074/128] fix dims

---
 neuralprophet/time_dataset.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index acde711ea..21072c5d3 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -826,6 +826,7 @@ def create_nan_mask(
         if len(names) > 0:
             valid_columns = mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts)
             valid_origins = np.logical_and(valid_origins, valid_columns)
+    return valid_origins
 
     # # TIME: TREND & SEASONALITY: the time at each sample's lags and forecasts
     # if max_lags == 0:  # y-series and origin_index match
@@ -840,7 +841,7 @@ def create_nan_mask(
     #     # there are n_forecasts origin_indexes missing at end
     #     time_nan = np.pad(time_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
     #     time_valid = np.logical_not(time_nan)
-    # non_nan = np.logical_and(non_nan, time_valid)
+    # valid_origins = np.logical_and(valid_origins, time_valid)
 
     # # FUTURE REGRESSORS
     # if len(future_regressor_names) > 0:
@@ -860,7 +861,7 @@ def create_nan_mask(
     #         # there are n_forecasts origin_indexes missing at end
     #         fut_reg_nan = np.pad(fut_reg_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
     #     fut_reg_valid = np.logical_not(fut_reg_nan)
-    #     non_nan = np.logical_and(non_nan, fut_reg_valid)
+    #     valid_origins = np.logical_and(valid_origins, fut_reg_valid)
 
     # # EVENTS
     # if len(event_names) > 0:
@@ -880,9 +881,8 @@ def create_nan_mask(
     #         # there are n_forecasts origin_indexes missing at end
     #         event_nan = np.pad(event_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
     #     event_valid = np.logical_not(event_nan)
-    #     non_nan = np.logical_and(non_nan, event_valid)
-
-    return valid_origins
+    #     valid_origins = np.logical_and(valid_origins, event_valid)
+    # return valid_origins
 
 
 def mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts):
@@ -890,7 +890,7 @@ def mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_fore
     contains_nan = df_isna.loc[:, names]
     if len(contains_nan.shape) > 1:
         assert len(contains_nan.shape) == 2
-        contains_nan = contains_nan.any(axis=-1)
+        contains_nan = contains_nan.any(axis=1)
     if max_lags > 0:
         if n_lags == 0 and n_forecasts == 1:
             contains_nan = contains_nan[1:]

From e320b22a8fa553417885af3902dd0a422a4d645d Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 17:26:09 -0800
Subject: [PATCH 075/128] pass self.df to indexing

---
 neuralprophet/time_dataset.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 21072c5d3..8e1760c76 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -97,7 +97,7 @@ def __init__(
         )
 
         # Construct index map
-        self.sample2index_map, self.length = self.create_sample2index_map(df)
+        self.sample2index_map, self.length = self.create_sample2index_map(self.df)
 
     def __getitem__(self, index):
         """Overrides parent class method to get an item at index.
@@ -888,9 +888,9 @@ def create_nan_mask(
 def mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts):
     # assert len(names) > 0
     contains_nan = df_isna.loc[:, names]
-    if len(contains_nan.shape) > 1:
-        assert len(contains_nan.shape) == 2
-        contains_nan = contains_nan.any(axis=1)
+    # if len(contains_nan.shape) > 1:
+    #     assert len(contains_nan.shape) == 2
+    contains_nan = contains_nan.any(axis=1)
     if max_lags > 0:
         if n_lags == 0 and n_forecasts == 1:
             contains_nan = contains_nan[1:]

From 7f7be5ff1900d4b08fe29b2532eb11cac31b94b2 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 17:47:54 -0800
Subject: [PATCH 076/128] fix zero dim lagged regressors

---
 neuralprophet/forecaster.py | 6 ++----
 tests/test_integration.py   | 7 ++++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index 4193a9ccc..bb5dbfc86 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -510,10 +510,8 @@ def add_lagged_regressor(
         lagged_reg_layers = self.config_model.lagged_reg_layers
 
         if n_lags == 0 or n_lags is None:
-            n_lags = 0
-            log.warning(
-                "Please, set n_lags to a value greater than 0 or to the options 'scalar' or 'auto'. No lags will be "
-                + "added to regressors when n_lags = 0 or n_lags is None"
+            raise ValueError(
+                f"Received n_lags {n_lags} for lagged regressor {names}. Please set n_lags > 0 or use options 'scalar' or 'auto'."
             )
         if n_lags == "auto":
             if self.n_lags is not None and self.n_lags > 0:
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 6be735def..c4fb0a0dd 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -1505,9 +1505,10 @@ def test_n_lags_for_regressors():
         n_forecasts=2,
         n_lags=2,
     )
-    m = m.add_lagged_regressor(names="A", n_lags=0)
-    m = m.add_lagged_regressor(names="B", n_lags=0)
-    with pytest.raises(AssertionError):
+
+    with pytest.raises(ValueError):
+        m = m.add_lagged_regressor(names="A", n_lags=0)
+        m = m.add_lagged_regressor(names="B", n_lags=0)
         m.fit(df1, freq="D")
 
 

From d00d5f92b8a0b256fe88c6e23d4d1977139fccc5 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 17:51:18 -0800
Subject: [PATCH 077/128] close figures in tests

---
 tests/test_plotting.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index 6c13ad55b..1c18df09d 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -4,6 +4,7 @@
 import os
 import pathlib
 
+import matplotlib
 import pandas as pd
 import pytest
 
@@ -72,6 +73,7 @@ def test_plot(plotting_backend):
         fig6.show()
         fig7.show()
         fig8.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -114,6 +116,7 @@ def test_plot_components(plotting_backend):
         fig2.show()
         fig3.show()
         fig4.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -154,6 +157,7 @@ def test_plot_parameters(plotting_backend):
         fig1.show()
         fig2.show()
         fig3.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -222,6 +226,7 @@ def test_plot_global_local_parameters(plotting_backend):
         fig1.show()
         fig2.show()
         fig3.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -290,6 +295,7 @@ def test_plot_events(plotting_backend):
         fig1.show()
         fig2.show()
         fig3.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -356,6 +362,7 @@ def test_plot_events_additive(plotting_backend):
         fig1.show()
         fig2.show()
         fig3.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -394,6 +401,7 @@ def test_plot_events_components(plotting_backend):
         fig1.show()
         fig2.show()
         fig3.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -419,6 +427,7 @@ def test_plot_trend(plotting_backend):
         fig1.show()
         fig2.show()
         fig3.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -490,6 +499,7 @@ def test_plot_seasonality(plotting_backend):
         fig4.show()
         fig5.show()
         fig6.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -519,6 +529,7 @@ def test_plot_daily_seasonality(plotting_backend):
         fig1.show()
         fig2.show()
         fig3.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -556,6 +567,7 @@ def test_plot_lag_reg(plotting_backend):
         fig2.show()
         fig3.show()
         fig4.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -585,6 +597,7 @@ def test_plot_future_reg(plotting_backend):
         fig1.show()
         fig2.show()
         fig3.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -643,6 +656,7 @@ def test_plot_uncertainty(plotting_backend):
         fig5.show()
         fig6.show()
         fig7.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -706,6 +720,7 @@ def test_plot_conformal_prediction(plotting_backend):
             fig3.show()
             fig4.show()
             fig5.show()
+    matplotlib.pyplot.close("all")
 
 
 def test_advanced_conformal_prediction_plots():
@@ -734,6 +749,7 @@ def test_advanced_conformal_prediction_plots():
         fig0 = m.conformal_plot(forecast)
         if PLOT:
             fig0.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -763,6 +779,7 @@ def test_plot_conformal_prediction_asymmetric(plotting_backend):
         fig0.show()
         fig1.show()
         fig2.show()
+    matplotlib.pyplot.close("all")
 
 
 @pytest.mark.parametrize(*decorator_input)
@@ -791,6 +808,7 @@ def test_plot_latest_forecast(plotting_backend):
         fig1.show()
         fig2.show()
         fig3.show()
+    matplotlib.pyplot.close("all")
 
 
 def test_plotting_backend_options():
@@ -842,3 +860,4 @@ def test_plotting_backend_options():
         fig10.show()
         fig11.show()
         fig12.show()
+    matplotlib.pyplot.close("all")

From df5051d9efdd684b01e64a96355a0036e9b0a223 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 17:56:58 -0800
Subject: [PATCH 078/128] fix typings

---
 neuralprophet/time_dataset.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 8e1760c76..e84aeea81 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -1,7 +1,7 @@
 import logging
 from collections import OrderedDict, defaultdict
 from datetime import datetime
-from typing import Optional
+from typing import List, Optional
 
 import numpy as np
 import pandas as pd
@@ -410,10 +410,10 @@ def tabularize_univariate_datetime_single_index(
     n_forecasts: int = 1,
     config_seasonality: Optional[configure.ConfigSeasonality] = None,
     config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None,
-    additive_event_and_holiday_names: list[str] = [],
-    multiplicative_event_and_holiday_names: list[str] = [],
-    additive_regressors_names: list[str] = [],
-    multiplicative_regressors_names: list[str] = [],
+    additive_event_and_holiday_names: List[str] = [],
+    multiplicative_event_and_holiday_names: List[str] = [],
+    additive_regressors_names: List[str] = [],
+    multiplicative_regressors_names: List[str] = [],
 ):
     """Create a tabular data sample from timeseries dataframe, used for mini-batch creation.
     Note

From d3bce01cb037397a5916128f23173642f5f3987c Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 18:06:27 -0800
Subject: [PATCH 079/128] black

---
 tests/test_model_performance.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index 8c68bddf8..9eae4f812 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -335,17 +335,21 @@ def test_EnergyHourlyDeep():
     df["y"] = pd.to_numeric(df["y"], errors="coerce")
 
     df = df.drop("ds", axis=1)
-    df['ds'] = pd.date_range(start="2015-01-01 00:00:00", periods=len(df), freq="H")
+    df["ds"] = pd.date_range(start="2015-01-01 00:00:00", periods=len(df), freq="H")
     df["ID"] = "test"
 
-    df_id = df[['ds', 'y', 'temp']].copy()
-    df_id['ID'] = "test2"
-    df_id['y'] = df_id['y'] * 0.3
-    df_id['temp'] = df_id['temp'] * 0.4
+    df_id = df[["ds", "y", "temp"]].copy()
+    df_id["ID"] = "test2"
+    df_id["y"] = df_id["y"] * 0.3
+    df_id["temp"] = df_id["temp"] * 0.4
     df = pd.concat([df, df_id], ignore_index=True)
 
     # Conditional Seasonality
-    df["winter"] = np.where(df["ds"].dt.month.isin([1]), 1, 0,)
+    df["winter"] = np.where(
+        df["ds"].dt.month.isin([1]),
+        1,
+        0,
+    )
     df["summer"] = np.where(df["ds"].dt.month.isin([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), 1, 0)
     df["winter"] = pd.to_numeric(df["winter"], errors="coerce")
     df["summer"] = pd.to_numeric(df["summer"], errors="coerce")

From dce2f73d49980b9515552b27e5a55ae3523e22b3 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 18:16:58 -0800
Subject: [PATCH 080/128] ruff

---
 neuralprophet/configure.py    | 2 +-
 neuralprophet/event_utils.py  | 2 +-
 neuralprophet/time_dataset.py | 2 +-
 neuralprophet/utils.py        | 2 +-
 tests/test_event_utils.py     | 1 -
 5 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/neuralprophet/configure.py b/neuralprophet/configure.py
index 52b8b3f0a..bb9698782 100644
--- a/neuralprophet/configure.py
+++ b/neuralprophet/configure.py
@@ -13,7 +13,7 @@
 import pandas as pd
 import torch
 
-from neuralprophet import df_utils, np_types, utils, utils_torch
+from neuralprophet import df_utils, np_types, utils_torch
 from neuralprophet.custom_loss_metrics import PinballLoss
 from neuralprophet.event_utils import get_holiday_names
 
diff --git a/neuralprophet/event_utils.py b/neuralprophet/event_utils.py
index 1633cc16c..9deaa8f5d 100644
--- a/neuralprophet/event_utils.py
+++ b/neuralprophet/event_utils.py
@@ -1,5 +1,5 @@
 from collections import defaultdict
-from typing import Iterable, Optional, Union
+from typing import Iterable, Union
 
 import numpy as np
 import pandas as pd
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index e84aeea81..9f2d3fcde 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -1,5 +1,5 @@
 import logging
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict
 from datetime import datetime
 from typing import List, Optional
 
diff --git a/neuralprophet/utils.py b/neuralprophet/utils.py
index c6fec4568..0cabb4e01 100644
--- a/neuralprophet/utils.py
+++ b/neuralprophet/utils.py
@@ -5,7 +5,7 @@
 import os
 import sys
 from collections import OrderedDict
-from typing import TYPE_CHECKING, Iterable, Optional, Union
+from typing import TYPE_CHECKING, Optional
 
 import numpy as np
 import pandas as pd
diff --git a/tests/test_event_utils.py b/tests/test_event_utils.py
index 862c11c2f..8c26a2e49 100644
--- a/tests/test_event_utils.py
+++ b/tests/test_event_utils.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python3
 
-import holidays
 import pytest
 
 from neuralprophet import event_utils

From bedce94c26e703372c6573ddc9df4081f9782247 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 18:18:12 -0800
Subject: [PATCH 081/128] linting

---
 neuralprophet/time_dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 9f2d3fcde..3adfbf136 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -45,12 +45,12 @@ def __init__(
             **kwargs : dict
                 Identical to :meth:`tabularize_univariate_datetime`
         """
-        ## Outcome after a call to init (summary):
+        # Outcome after a call to init (summary):
         # - add events and holidays columns to df
         # - calculated the number of usable samples (accounting for nan and filters)
         # - creates mapping of sample index to df index
 
-        ## Context Notes
+        # Context Notes
         # Currently done to df before it arrives here:
         # -> fit calls prep_or_copy_df, _check_dataframe, and _handle_missing_data, passes to _train
         # -> _train calls prep_or_copy_df, then passes to init_train_loader, which returns the train_loader

From 051e1ad624bfad7a7d280be29a31390adcb7bf34 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 7 Feb 2024 18:27:02 -0800
Subject: [PATCH 082/128] linting

---
 neuralprophet/__main__.py   | 1 +
 neuralprophet/df_utils.py   | 4 +---
 neuralprophet/forecaster.py | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/neuralprophet/__main__.py b/neuralprophet/__main__.py
index 00cf0eaea..62aa59d01 100644
--- a/neuralprophet/__main__.py
+++ b/neuralprophet/__main__.py
@@ -1,6 +1,7 @@
 """
 Invokes neuralprophet when module is run as a script.
 """
+
 import argparse
 
 from neuralprophet._version import __version__
diff --git a/neuralprophet/df_utils.py b/neuralprophet/df_utils.py
index 79c6c4ea6..8b83ad366 100644
--- a/neuralprophet/df_utils.py
+++ b/neuralprophet/df_utils.py
@@ -507,14 +507,12 @@ def check_dataframe(
     for name in columns:
         if name not in df:
             raise ValueError(f"Column {name!r} missing from dataframe")
-        if df.loc[df.loc[:, name].notnull()].shape[0] < 1:
+        if sum(df.loc[:, name].notnull().values) < 1:
             raise ValueError(f"Dataframe column {name!r} only has NaN rows.")
         if not np.issubdtype(df[name].dtype, np.number):
             df[name] = pd.to_numeric(df[name])
         if np.isinf(df.loc[:, name].values).any():
             df.loc[:, name] = df[name].replace([np.inf, -np.inf], np.nan)
-        if df.loc[df.loc[:, name].notnull()].shape[0] < 1:
-            raise ValueError(f"Dataframe column {name!r} only has NaN rows.")
 
     if future:
         return df, regressors_to_remove, lag_regressors_to_remove
diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index bb5dbfc86..a11853595 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -1005,7 +1005,7 @@ def fit(
             # Only display the plot if the session is interactive, eg. do not show in github actions since it
             # causes an error in the Windows and MacOS environment
             if matplotlib.is_interactive():
-                fig
+                fig.show()
 
         self.fitted = True
         return metrics_df

From 0c9cd87846a03d59680ff12666cceec40def6fdd Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 8 Feb 2024 18:09:53 -0800
Subject: [PATCH 083/128] modify logs

---
 neuralprophet/forecaster.py   | 32 +++++++++++++++-----------------
 neuralprophet/time_dataset.py |  4 ++--
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index a11853595..efac5cf8e 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -710,8 +710,9 @@ def add_country_holidays(
         if self.fitted:
             raise Exception("Country must be specified prior to model fitting.")
         if self.config_country_holidays:
-            log.warning(
-                "Country holidays can only be added for a single country. Previous country holidays were overridden."
+            log.error(
+                "Country holidays can only be added once. Previous country holidays will be overridden."
+                "If adding multiple countries, please add as list. "
             )
 
         if regularization is not None:
@@ -906,18 +907,18 @@ def fit(
                 ]
             )
             if reg_enabled:
-                log.warning(
+                log.info(
                     "Early stopping is enabled, but regularization only starts after half the number of configured \
                         epochs. If you see no impact of the regularization, turn off the early_stopping or reduce the \
                         number of epochs to train for."
                 )
 
         if progress == "plot" and metrics is False:
-            log.warning("Progress plot requires metrics to be enabled. Enabling the default metrics.")
+            log.info("Progress plot requires metrics to be enabled. Enabling the default metrics.")
             metrics = utils_metrics.get_metrics(True)
 
         if not self.config_normalization.global_normalization:
-            log.warning("When Global modeling with local normalization, metrics are displayed in normalized scale.")
+            log.info("When Global modeling with local normalization, metrics are displayed in normalized scale.")
 
         if minimal:
             checkpointing = False
@@ -1138,7 +1139,7 @@ def test(self, df: pd.DataFrame, verbose: bool = True):
         val_metrics_df = pd.DataFrame(val_metrics)
         # TODO Check whether supported by Lightning
         if not self.config_normalization.global_normalization:
-            log.warning("Note that the metrics are displayed in normalized scale because of local normalization.")
+            log.info("Note that the metrics are displayed in normalized scale because of local normalization.")
         return val_metrics_df
 
     def split_df(self, df: pd.DataFrame, freq: str = "auto", valid_p: float = 0.2, local_split: bool = False):
@@ -2112,8 +2113,8 @@ def plot_latest_forecast(
             if df_name not in fcst["ID"].unique():
                 assert len(fcst["ID"].unique()) > 1
                 raise Exception(
-                    "Many time series are present in the pd.DataFrame (more than one ID). Please, especify ID to be \
-                        plotted."
+                    "Many time series are present in the pd.DataFrame (more than one ID)."
+                    "Please, especify ID to be plotted."
                 )
             else:
                 fcst = fcst[fcst["ID"] == df_name].copy(deep=True)
@@ -2121,7 +2122,7 @@ def plot_latest_forecast(
         if len(self.config_train.quantiles) > 1:
             log.warning(
                 "Plotting latest forecasts when uncertainty estimation enabled"
-                " plots the forecasts only for the median quantile."
+                " plots only the median quantile forecasts."
             )
         if plot_history_data is None:
             fcst = fcst[-(include_previous_forecasts + self.n_forecasts + self.max_lags) :]
@@ -2174,10 +2175,7 @@ def plot_last_forecast(
         plotting_backend: Optional[str] = None,
     ):
         args = locals()
-        log.warning(
-            "plot_last_forecast() has been renamed to plot_latest_forecast() and is therefore deprecated. "
-            "Please use plot_latst_forecast() in the future"
-        )
+        log.error("plot_last_forecast() is deprecated." "Please use plot_latest_forecast().")
 
         return NeuralProphet.plot_latest_forecast(**args)
 
@@ -2251,8 +2249,8 @@ def plot_components(
             if df_name not in fcst["ID"].unique():
                 assert len(fcst["ID"].unique()) > 1
                 raise Exception(
-                    "Many time series are present in the pd.DataFrame (more than one ID). Please, especify ID to be \
-                        plotted."
+                    "Multiple time series are present in the pd.DataFrame (more than one ID)."
+                    "Please, especify ID to be plotted."
                 )
             else:
                 fcst = fcst[fcst["ID"] == df_name].copy(deep=True)
@@ -2278,8 +2276,8 @@ def plot_components(
         if self.model.config_seasonality is not None:
             if self.model.config_seasonality.global_local == "local" and df_name is None:
                 raise Exception(
-                    "df_name parameter is required for multiple time series and local modeling of at least one \
-                        component."
+                    "df_name parameter is required for multiple time series "
+                    "and local modeling of at least one component."
                 )
 
         # Validate components to be plotted
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 3adfbf136..d942e1410 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -538,8 +538,8 @@ def tabularize_univariate_datetime_single_index(
         )
 
     # ONLY FOR DEBUGGING
-    if log.level == 0:
-        log_input_shapes(inputs)
+    # if log.level == 0:
+    #     log_input_shapes(inputs)
     return inputs, targets
 
 

From f44231a91cd337ddc1a4d700d3d259d13bd29bac Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 8 Feb 2024 23:50:30 -0800
Subject: [PATCH 084/128] add benchmarking script for computational time

---
 tests/utils/benchmark_time_dataset.py | 363 ++++++++++++++++++++++++++
 1 file changed, 363 insertions(+)
 create mode 100644 tests/utils/benchmark_time_dataset.py

diff --git a/tests/utils/benchmark_time_dataset.py b/tests/utils/benchmark_time_dataset.py
new file mode 100644
index 000000000..d76f33c85
--- /dev/null
+++ b/tests/utils/benchmark_time_dataset.py
@@ -0,0 +1,363 @@
+import logging
+import os
+import pathlib
+import time
+from itertools import product
+
+import pandas as pd
+import pytest
+import torch.utils.benchmark as benchmark
+
+from neuralprophet import NeuralProphet, uncertainty_evaluate
+
+log = logging.getLogger("NP.test")
+# log.setLevel("INFO")
+# log.parent.setLevel("INFO")
+# log.setLevel("WARNING")
+# log.parent.setLevel("WARNING")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
+
+DIR = pathlib.Path(__file__).parent.parent.parent.absolute()
+DATA_DIR = os.path.join(DIR, "tests", "test-data")
+PEYTON_FILE = os.path.join(DATA_DIR, "wp_log_peyton_manning.csv")
+AIR_FILE = os.path.join(DATA_DIR, "air_passengers.csv")
+YOS_FILE = os.path.join(DATA_DIR, "yosemite_temps.csv")
+NROWS = 256
+EPOCHS = 10
+BATCH_SIZE = 128
+LR = 1.0
+
+
+def yosemite(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True):
+    # log.info("testing: Uncertainty Estimation Yosemite Temps")
+    df = pd.read_csv(YOS_FILE, nrows=nrows)
+    m = NeuralProphet(
+        n_lags=12,
+        n_forecasts=6,
+        quantiles=[0.01, 0.99],
+        epochs=epochs,
+        batch_size=batch,
+        learning_rate=LR,
+        yearly_seasonality=season,
+        weekly_seasonality=season,
+        daily_seasonality=season,
+    )
+    # tic = time.perf_counter()
+    m.fit(df, freq="5min")
+    # toc = time.perf_counter()
+    # print(f"######## Time: {toc - tic:0.4f} for fit")
+
+    # tic = time.perf_counter()
+    # future = m.make_future_dataframe(df, periods=6, n_historic_predictions=3 * 24 * 12)
+    # toc = time.perf_counter()
+    # print(f"######## Time: {toc - tic:0.4f} for make_future_dataframe")
+
+    # tic = time.perf_counter()
+    # m.predict(future)
+    # toc = time.perf_counter()
+    # print(f"######## Time: {toc - tic:0.4f} for predict")
+
+    m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts)
+
+
+def peyton(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True):
+    # log.info("testing: Uncertainty Estimation Peyton Manning")
+    df = pd.read_csv(PEYTON_FILE, nrows=nrows)
+    playoffs = pd.DataFrame(
+        {
+            "event": "playoff",
+            "ds": pd.to_datetime(
+                [
+                    "2008-01-13",
+                    "2009-01-03",
+                    "2010-01-16",
+                    "2010-01-24",
+                    "2010-02-07",
+                    "2011-01-08",
+                    "2013-01-12",
+                    "2014-01-12",
+                    "2014-01-19",
+                    "2014-02-02",
+                    "2015-01-11",
+                    "2016-01-17",
+                    "2016-01-24",
+                    "2016-02-07",
+                ]
+            ),
+        }
+    )
+    superbowls = pd.DataFrame(
+        {
+            "event": "superbowl",
+            "ds": pd.to_datetime(["2010-02-07", "2014-02-02", "2016-02-07"]),
+        }
+    )
+    events_df = pd.concat((playoffs, superbowls))
+
+    m = NeuralProphet(
+        n_forecasts=1,
+        loss_func="SmoothL1Loss",
+        quantiles=[0.01, 0.99],
+        epochs=epochs,
+        batch_size=batch,
+        learning_rate=LR,
+        yearly_seasonality=season,
+        weekly_seasonality=season,
+        # daily_seasonality=False,
+    )
+
+    # add lagged regressors
+    # # if m.n_lags > 0:
+    #     df["A"] = df["y"].rolling(7, min_periods=1).mean()
+    #     df["B"] = df["y"].rolling(30, min_periods=1).mean()
+    #     m = m.add_lagged_regressor(name="A", n_lags=10)
+    #     m = m.add_lagged_regressor(name="B", only_last_value=True)
+
+    # add events
+    m = m.add_events(["superbowl", "playoff"], lower_window=-1, upper_window=1, regularization=0.1)
+
+    m = m.add_country_holidays("US", mode="additive", regularization=0.1)
+
+    df["C"] = df["y"].rolling(7, min_periods=1).mean()
+    df["D"] = df["y"].rolling(30, min_periods=1).mean()
+
+    m = m.add_future_regressor(name="C", regularization=0.1)
+    m = m.add_future_regressor(name="D", regularization=0.1)
+
+    history_df = m.create_df_with_events(df, events_df)
+
+    m.fit(history_df, freq="D")
+
+    # periods = 90
+    # regressors_future_df = pd.DataFrame(data={"C": df["C"][:periods], "D": df["D"][:periods]})
+    # future_df = m.make_future_dataframe(
+    #     df=history_df,
+    #     regressors_df=regressors_future_df,
+    #     events_df=events_df,
+    #     periods=periods,
+    #     n_historic_predictions=nrows,
+    # )
+    # m.predict(df=future_df)
+
+
+def peyton_minus_events(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True):
+    # log.info("testing: Uncertainty Estimation Peyton Manning")
+    df = pd.read_csv(PEYTON_FILE, nrows=nrows)
+
+    m = NeuralProphet(
+        n_forecasts=1,
+        loss_func="SmoothL1Loss",
+        quantiles=[0.01, 0.99],
+        epochs=epochs,
+        batch_size=batch,
+        learning_rate=LR,
+        yearly_seasonality=season,
+        weekly_seasonality=season,
+        # daily_seasonality=False,
+    )
+
+    # add lagged regressors
+    if m.n_lags > 0:
+        df["A"] = df["y"].rolling(7, min_periods=1).mean()
+        df["B"] = df["y"].rolling(30, min_periods=1).mean()
+        m = m.add_lagged_regressor(name="A")
+        m = m.add_lagged_regressor(name="B", only_last_value=True)
+
+    df["C"] = df["y"].rolling(7, min_periods=1).mean()
+    df["D"] = df["y"].rolling(30, min_periods=1).mean()
+
+    m = m.add_future_regressor(name="C", regularization=0.1)
+    m = m.add_future_regressor(name="D", regularization=0.1)
+
+    history_df = df
+
+    m.fit(history_df, freq="D")
+
+    # periods = 90
+    # regressors_future_df = pd.DataFrame(data={"C": df["C"][:periods], "D": df["D"][:periods]})
+    # future_df = m.make_future_dataframe(
+    #     df=history_df,
+    #     regressors_df=regressors_future_df,
+    #     periods=periods,
+    #     n_historic_predictions=nrows,
+    # )
+    # m.predict(df=future_df)
+
+
+def peyton_minus_regressors(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True):
+    # log.info("testing: Uncertainty Estimation Peyton Manning")
+    df = pd.read_csv(PEYTON_FILE, nrows=nrows)
+    playoffs = pd.DataFrame(
+        {
+            "event": "playoff",
+            "ds": pd.to_datetime(
+                [
+                    "2008-01-13",
+                    "2009-01-03",
+                    "2010-01-16",
+                    "2010-01-24",
+                    "2010-02-07",
+                    "2011-01-08",
+                    "2013-01-12",
+                    "2014-01-12",
+                    "2014-01-19",
+                    "2014-02-02",
+                    "2015-01-11",
+                    "2016-01-17",
+                    "2016-01-24",
+                    "2016-02-07",
+                ]
+            ),
+        }
+    )
+    superbowls = pd.DataFrame(
+        {
+            "event": "superbowl",
+            "ds": pd.to_datetime(["2010-02-07", "2014-02-02", "2016-02-07"]),
+        }
+    )
+    events_df = pd.concat((playoffs, superbowls))
+
+    m = NeuralProphet(
+        n_forecasts=1,
+        loss_func="SmoothL1Loss",
+        quantiles=[0.01, 0.99],
+        epochs=epochs,
+        batch_size=batch,
+        learning_rate=LR,
+        yearly_seasonality=season,
+        weekly_seasonality=season,
+        # daily_seasonality=False,
+    )
+    # add events
+    m = m.add_events(["superbowl", "playoff"], lower_window=-1, upper_window=1, regularization=0.1)
+
+    m = m.add_country_holidays("US", mode="additive", regularization=0.1)
+
+    history_df = m.create_df_with_events(df, events_df)
+
+    m.fit(history_df, freq="D")
+
+    # periods = 90
+    # future_df = m.make_future_dataframe(
+    #     df=history_df,
+    #     events_df=events_df,
+    #     periods=periods,
+    #     n_historic_predictions=nrows,
+    # )
+    # m.predict(df=future_df)
+
+
+#######################################
+# tic = time.perf_counter()
+# test_uncertainty_estimation_yosemite_temps()
+# toc = time.perf_counter()
+# print(f"#### Time: {toc - tic:0.4f} for test_uncertainty_estimation_yosemite_temps")
+
+# tic = time.perf_counter()
+# test_uncertainty_estimation_peyton_manning()
+# toc = time.perf_counter()
+# print(f"#### Time: {toc - tic:0.4f} for test_uncertainty_estimation_peyton_manning")
+
+# tic = time.perf_counter()
+# test_uncertainty_estimation_air_travel()
+# toc = time.perf_counter()
+# print(f"#### Time: {toc - tic:0.4f} for test_uncertainty_estimation_air_travel")
+
+# tic = time.perf_counter()
+# test_uncertainty_estimation_multiple_quantiles()
+# toc = time.perf_counter()
+# print(f"#### Time: {toc - tic:0.4f} for test_uncertainty_estimation_multiple_quantiles")
+
+# tic = time.perf_counter()
+# test_split_conformal_prediction()
+# toc = time.perf_counter()
+# print(f"#### Time: {toc - tic:0.4f} for test_split_conformal_prediction")
+
+# tic = time.perf_counter()
+# test_asymmetrical_quantiles()
+# toc = time.perf_counter()
+# print(f"#### Time: {toc - tic:0.4f} for test_asymmetrical_quantiles")
+
+
+############################33333
+# t0 = benchmark.Timer(
+# stmt='test_uncertainty_estimation_yosemite_temps(x)',
+# setup='from __main__ import test_uncertainty_estimation_yosemite_temps',
+# globals={'x': x}
+# )
+
+# t1 = benchmark.Timer(
+# stmt='test_uncertainty_estimation_peyton_manning(x)',
+# setup='from __main__ import test_uncertainty_estimation_peyton_manning',
+# # globals={'x': x}
+# )
+
+# print(t0.timeit(1))
+# print(t1.timeit(1))
+
+
+###############################
+
+# Compare takes a list of measurements which we'll save in results.
+results = []
+
+epochs = [5]
+sizes = [100, 1000]
+# sizes = [100, 1000, 10000]
+batches = [128]
+seasons = [False, True]
+for ep, nrows, b, season in product(epochs, sizes, batches, seasons):
+    # label and sub_label are the rows
+    # description is the column
+    label = "tests"
+    sub_label = f"[rows: {nrows}, epochs:{ep}, batch:{b}, season:{season}]"
+    for num_threads in [1]:  # [1, 4, 16, 64]
+        results.append(
+            benchmark.Timer(
+                stmt="yosemite(nrows, epochs, batch, season)",
+                setup="from __main__ import yosemite",
+                globals={"epochs": ep, "nrows": nrows, "batch": b, "season": season},
+                num_threads=num_threads,
+                label=label,
+                sub_label=sub_label,
+                description="yosemite",
+            ).blocked_autorange(min_run_time=1)
+        )
+        results.append(
+            benchmark.Timer(
+                stmt="peyton(nrows, epochs, batch, season)",
+                setup="from __main__ import peyton",
+                globals={"nrows": nrows, "epochs": ep, "batch": b, "season": season},
+                num_threads=num_threads,
+                label=label,
+                sub_label=sub_label,
+                description="peyton",
+            ).blocked_autorange(min_run_time=1)
+        )
+        results.append(
+            benchmark.Timer(
+                stmt="peyton_minus_events(nrows, epochs, batch, season)",
+                setup="from __main__ import peyton_minus_events",
+                globals={"nrows": nrows, "epochs": ep, "batch": b, "season": season},
+                num_threads=num_threads,
+                label=label,
+                sub_label=sub_label,
+                description="peyton_minus_events",
+            ).blocked_autorange(min_run_time=1)
+        )
+        results.append(
+            benchmark.Timer(
+                stmt="peyton_minus_regressors(nrows, epochs, batch, season)",
+                setup="from __main__ import peyton_minus_regressors",
+                globals={"nrows": nrows, "epochs": ep, "batch": b, "season": season},
+                num_threads=num_threads,
+                label=label,
+                sub_label=sub_label,
+                description="peyton_minus_regressors",
+            ).blocked_autorange(min_run_time=1)
+        )
+
+compare = benchmark.Compare(results)
+compare.print()

From 2039212abda60d7b91ab5e0da41107b356e75f0c Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 8 Feb 2024 23:56:59 -0800
Subject: [PATCH 085/128] speed up uncertainty tests

---
 tests/test_uncertainty.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_uncertainty.py b/tests/test_uncertainty.py
index 039128cb1..75544fe6d 100644
--- a/tests/test_uncertainty.py
+++ b/tests/test_uncertainty.py
@@ -10,7 +10,7 @@
 from neuralprophet import NeuralProphet, uncertainty_evaluate
 
 log = logging.getLogger("NP.test")
-log.setLevel("DEBUG")
+log.setLevel("WARNING")
 log.parent.setLevel("WARNING")
 
 DIR = pathlib.Path(__file__).parent.parent.absolute()
@@ -26,7 +26,7 @@
 
 def test_uncertainty_estimation_peyton_manning():
     log.info("testing: Uncertainty Estimation Peyton Manning")
-    df = pd.read_csv(PEYTON_FILE)
+    df = pd.read_csv(PEYTON_FILE, nrows=NROWS)
     playoffs = pd.DataFrame(
         {
             "event": "playoff",
@@ -103,7 +103,7 @@ def test_uncertainty_estimation_peyton_manning():
 
 def test_uncertainty_estimation_yosemite_temps():
     log.info("testing: Uncertainty Estimation Yosemite Temps")
-    df = pd.read_csv(YOS_FILE)
+    df = pd.read_csv(YOS_FILE, nrows=NROWS)
     m = NeuralProphet(
         n_lags=12,
         n_forecasts=6,

From d34700fa3cc7940bcbe19e56d1ba6e32e023bdcf Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 9 Feb 2024 00:23:24 -0800
Subject: [PATCH 086/128] fix unit test multiple country

---
 neuralprophet/forecaster.py |  4 ++--
 tests/test_unit.py          | 11 +++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index efac5cf8e..b0de6483c 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -708,9 +708,9 @@ def add_country_holidays(
                 ``additive`` (default) or ``multiplicative``.
         """
         if self.fitted:
-            raise Exception("Country must be specified prior to model fitting.")
+            raise AssertionError("Country must be specified prior to model fitting.")
         if self.config_country_holidays:
-            log.error(
+            raise AssertionError(
                 "Country holidays can only be added once. Previous country holidays will be overridden."
                 "If adding multiple countries, please add as list. "
             )
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 8796abd95..fe1f70c18 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -977,19 +977,18 @@ def test_handle_negative_values_replace():
 
 
 def test_add_country_holiday_multiple_calls_warning(caplog):
-    error_message = (
-        "Country holidays can only be added for a single country. Previous country holidays were overridden."
-    )
     m = NeuralProphet(
         epochs=EPOCHS,
         batch_size=BATCH_SIZE,
         learning_rate=LR,
     )
-    m.add_country_holidays("US")
+    m.add_country_holidays(["US", "Germany"])
+    error_message = "Country holidays can only be added once."
     assert error_message not in caplog.text
 
-    m.add_country_holidays("Germany")
-    assert error_message in caplog.text
+    with pytest.raises(AssertionError):
+        m.add_country_holidays("Germany")
+        # assert error_message in caplog.text
 
 
 def test_multiple_countries():

From 485f5a8c9fdb843a5d501e47d561c965b91b6ff2 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 9 Feb 2024 00:26:51 -0800
Subject: [PATCH 087/128] reduce tests log level to ERROR

---
 tests/test_wrapper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py
index 4749ae038..48b13af9e 100644
--- a/tests/test_wrapper.py
+++ b/tests/test_wrapper.py
@@ -9,8 +9,8 @@
 from neuralprophet import TorchProphet as Prophet
 
 log = logging.getLogger("NP.test")
-log.setLevel("DEBUG")
-log.parent.setLevel("WARNING")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
 
 DIR = pathlib.Path(__file__).parent.parent.absolute()
 DATA_DIR = os.path.join(DIR, "tests", "test-data")

From 8b863daba90ab24f032e872147b5ffc62d1cba4a Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 9 Feb 2024 00:33:17 -0800
Subject: [PATCH 088/128] reduce log level to ERROR and fix adding multiple
 countries

---
 tests/pytest.ini                |  2 +-
 tests/test_glocal.py            |  4 ++--
 tests/test_integration.py       | 12 +++++-------
 tests/test_model_performance.py |  4 ++--
 tests/test_plotting.py          | 16 ++++------------
 tests/test_regularization.py    |  4 ++--
 tests/test_uncertainty.py       |  4 ++--
 tests/test_unit.py              |  4 ++--
 tests/test_utils.py             |  4 ++--
 9 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/tests/pytest.ini b/tests/pytest.ini
index cbb9fe0c0..546920b92 100644
--- a/tests/pytest.ini
+++ b/tests/pytest.ini
@@ -1,6 +1,6 @@
 [pytest]
 log_cli = true
-log_cli_level = DEBUG
+log_cli_level = ERROR
 log_cli_format = %(asctime)s [%(levelname)s]: %(message)s (%(filename)s:%(lineno)s)
 log_cli_date_format = %Y-%m-%d %H:%M:%S
 filterwarnings =
diff --git a/tests/test_glocal.py b/tests/test_glocal.py
index e4cf8309e..771cc1829 100644
--- a/tests/test_glocal.py
+++ b/tests/test_glocal.py
@@ -9,8 +9,8 @@
 from neuralprophet import NeuralProphet
 
 log = logging.getLogger("NP.test")
-log.setLevel("DEBUG")
-log.parent.setLevel("WARNING")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
 
 DIR = pathlib.Path(__file__).parent.parent.absolute()
 DATA_DIR = os.path.join(DIR, "tests", "test-data")
diff --git a/tests/test_integration.py b/tests/test_integration.py
index c4fb0a0dd..3718e6a35 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -15,8 +15,8 @@
 from neuralprophet.data.process import _handle_missing_data, _validate_column_name
 
 log = logging.getLogger("NP.test")
-log.setLevel("DEBUG")
-log.parent.setLevel("WARNING")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
 
 DIR = pathlib.Path(__file__).parent.parent.absolute()
 DATA_DIR = os.path.join(DIR, "tests", "test-data")
@@ -473,12 +473,10 @@ def test_events():
         ["superbowl", "playoff"], lower_window=-1, upper_window=1, mode="multiplicative", regularization=0.5
     )
     # add the country specific holidays
-    m = m.add_country_holidays("US", mode="additive", regularization=0.5)
-    m.add_country_holidays("Indonesia")
+    m = m.add_country_holidays(
+        ["US", "Indonesia", "Philippines", "Pakistan", "Belarus"], mode="additive", regularization=0.5
+    )
     # m.add_country_holidays("Thailand") # holidays package has issue with int input for timedelta. accepts np.float64()
-    m.add_country_holidays("Philippines")
-    m.add_country_holidays("Pakistan")
-    m.add_country_holidays("Belarus")
     history_df = m.create_df_with_events(df, events_df)
     m.fit(history_df, freq="D")
     future = m.make_future_dataframe(df=history_df, events_df=events_df, periods=30, n_historic_predictions=90)
diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index 9eae4f812..93c908d77 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -16,8 +16,8 @@
 from neuralprophet import NeuralProphet, set_random_seed
 
 log = logging.getLogger("NP.test")
-log.setLevel("DEBUG")
-log.parent.setLevel("WARNING")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
 
 DIR = pathlib.Path(__file__).parent.parent.absolute()
 DATA_DIR = os.path.join(DIR, "tests", "test-data")
diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index 1c18df09d..0d24f3530 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -11,8 +11,8 @@
 from neuralprophet import NeuralProphet
 
 log = logging.getLogger("NP.test")
-log.setLevel("DEBUG")
-log.parent.setLevel("WARNING")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
 
 DIR = pathlib.Path(__file__).parent.parent.absolute()
 DATA_DIR = os.path.join(DIR, "tests", "test-data")
@@ -276,11 +276,7 @@ def test_plot_events(plotting_backend):
         ["superbowl", "playoff"], lower_window=-1, upper_window=1, mode="multiplicative", regularization=0.5
     )
     # add the country specific holidays
-    m = m.add_country_holidays("US", mode="multiplicative", regularization=0.5)
-    m.add_country_holidays("Indonesia")
-    m.add_country_holidays("Philippines")
-    m.add_country_holidays("Pakistan")
-    m.add_country_holidays("Belarus")
+    m = m.add_country_holidays(["US", "Indonesia", "Philippines", "Pakistan", "Belarus"], mode="multiplicative")
     history_df = m.create_df_with_events(df, events_df)
     m.fit(history_df, freq="D")
     future = m.make_future_dataframe(df=history_df, events_df=events_df, periods=30, n_historic_predictions=90)
@@ -343,11 +339,7 @@ def test_plot_events_additive(plotting_backend):
     # set event windows
     m = m.add_events(["superbowl", "playoff"], lower_window=-1, upper_window=1, mode="additive", regularization=0.5)
     # add the country specific holidays
-    m = m.add_country_holidays("US", mode="additive", regularization=0.5)
-    m.add_country_holidays("Indonesia")
-    m.add_country_holidays("Philippines")
-    m.add_country_holidays("Pakistan")
-    m.add_country_holidays("Belarus")
+    m = m.add_country_holidays(["US", "Canada", "MEX"], mode="additive", regularization=0.5)
     history_df = m.create_df_with_events(df, events_df)
     m.fit(history_df, freq="D")
     future = m.make_future_dataframe(df=history_df, events_df=events_df, periods=30, n_historic_predictions=90)
diff --git a/tests/test_regularization.py b/tests/test_regularization.py
index 5a56d09f6..931a8fbb5 100644
--- a/tests/test_regularization.py
+++ b/tests/test_regularization.py
@@ -17,8 +17,8 @@
 )
 
 log = logging.getLogger("NP.test")
-log.setLevel("DEBUG")
-log.parent.setLevel("WARNING")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
 
 # Fix random seeds
 torch.manual_seed(0)
diff --git a/tests/test_uncertainty.py b/tests/test_uncertainty.py
index 75544fe6d..1208faa62 100644
--- a/tests/test_uncertainty.py
+++ b/tests/test_uncertainty.py
@@ -10,8 +10,8 @@
 from neuralprophet import NeuralProphet, uncertainty_evaluate
 
 log = logging.getLogger("NP.test")
-log.setLevel("WARNING")
-log.parent.setLevel("WARNING")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
 
 DIR = pathlib.Path(__file__).parent.parent.absolute()
 DATA_DIR = os.path.join(DIR, "tests", "test-data")
diff --git a/tests/test_unit.py b/tests/test_unit.py
index fe1f70c18..41e3dd358 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -15,8 +15,8 @@
 from neuralprophet.data.transform import _normalize
 
 log = logging.getLogger("NP.test")
-log.setLevel("DEBUG")
-log.parent.setLevel("WARNING")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
 
 DIR = pathlib.Path(__file__).parent.parent.absolute()
 DATA_DIR = os.path.join(DIR, "tests", "test-data")
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 3e965e03a..88eced6bb 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -10,8 +10,8 @@
 from neuralprophet import NeuralProphet, df_utils, load, save
 
 log = logging.getLogger("NP.test")
-log.setLevel("DEBUG")
-log.parent.setLevel("WARNING")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
 
 DIR = pathlib.Path(__file__).parent.parent.absolute()
 DATA_DIR = os.path.join(DIR, "tests", "test-data")

From 3226884b87f7d800efe67bbef025898fbab266a0 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 9 Feb 2024 00:38:25 -0800
Subject: [PATCH 089/128] bypass intentional glocal test error log

---
 tests/test_glocal.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_glocal.py b/tests/test_glocal.py
index 771cc1829..75e456940 100644
--- a/tests/test_glocal.py
+++ b/tests/test_glocal.py
@@ -187,6 +187,8 @@ def test_wrong_option_global_local_modeling():
     df2_0["ID"] = "df2"
     df3_0 = df.iloc[256:384, :].copy(deep=True)
     df3_0["ID"] = "df3"
+    prev_level = log.getEffectiveLevel()
+    log.setLevel("CRITICAL")
     m = NeuralProphet(
         n_forecasts=2,
         n_lags=10,
@@ -197,6 +199,7 @@ def test_wrong_option_global_local_modeling():
         season_global_local="glocsl",
         trend_global_local="glocsl",
     )
+    log.setLevel(prev_level)
     train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
     m.fit(train_df)
     future = m.make_future_dataframe(test_df)

From a6eceb2e173d60beffa43e12f6852d99d58d97a5 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 9 Feb 2024 00:40:26 -0800
Subject: [PATCH 090/128] fix prev

---
 tests/test_glocal.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_glocal.py b/tests/test_glocal.py
index 75e456940..848bb68a5 100644
--- a/tests/test_glocal.py
+++ b/tests/test_glocal.py
@@ -187,8 +187,8 @@ def test_wrong_option_global_local_modeling():
     df2_0["ID"] = "df2"
     df3_0 = df.iloc[256:384, :].copy(deep=True)
     df3_0["ID"] = "df3"
-    prev_level = log.getEffectiveLevel()
-    log.setLevel("CRITICAL")
+    prev_level = log.parent.getEffectiveLevel()
+    log.parent.setLevel("CRITICAL")
     m = NeuralProphet(
         n_forecasts=2,
         n_lags=10,
@@ -199,7 +199,7 @@ def test_wrong_option_global_local_modeling():
         season_global_local="glocsl",
         trend_global_local="glocsl",
     )
-    log.setLevel(prev_level)
+    log.parent.setLevel(prev_level)
     train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
     m.fit(train_df)
     future = m.make_future_dataframe(test_df)

From 6cbf17b6abe896ba71b18e4a9aff973ed4a0d967 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 9 Feb 2024 14:45:43 -0800
Subject: [PATCH 091/128] benchmark dataloader time

---
 neuralprophet/forecaster.py           |   4 +-
 neuralprophet/time_dataset.py         |   2 +-
 tests/utils/benchmark_time_dataset.py | 242 +++++++++++++++++++-------
 3 files changed, 180 insertions(+), 68 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index b0de6483c..40ceeac35 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -2544,7 +2544,7 @@ def _init_train_loader(self, df, num_workers=0):
         -------
             torch DataLoader
         """
-        df, _, _, _ = df_utils.prep_or_copy_df(df)
+        df, _, _, _ = df_utils.prep_or_copy_df(df)  # TODO: Can this call be avoided?
         # if not self.fitted:
         self.config_normalization.init_data_params(
             df=df,
@@ -2641,7 +2641,7 @@ def _train(
                 metrics
         """
         # Set up data the training dataloader
-        df, _, _, _ = df_utils.prep_or_copy_df(df)
+        df, _, _, _ = df_utils.prep_or_copy_df(df)  # TODO: Can this call be removed?
         train_loader = self._init_train_loader(df, num_workers)
         dataset_size = len(df)  # train_loader.dataset
 
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index d942e1410..2affa93dc 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -395,7 +395,7 @@ def log_input_shapes(inputs):
             "regressors",
         ]:
             for name, period_features in value.items():
-                tabularized_input_shapes_str += f"    {name} {key} {period_features}\n"
+                tabularized_input_shapes_str += f"    {name} {key} {period_features.shape}\n"
         else:
             tabularized_input_shapes_str += f"    {key} {value.shape} \n"
     log.debug(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
diff --git a/tests/utils/benchmark_time_dataset.py b/tests/utils/benchmark_time_dataset.py
index d76f33c85..c1e9e75fd 100644
--- a/tests/utils/benchmark_time_dataset.py
+++ b/tests/utils/benchmark_time_dataset.py
@@ -7,8 +7,13 @@
 import pandas as pd
 import pytest
 import torch.utils.benchmark as benchmark
+from torch.utils.data import DataLoader
 
-from neuralprophet import NeuralProphet, uncertainty_evaluate
+from neuralprophet import NeuralProphet, df_utils, utils
+from neuralprophet.data.process import _check_dataframe, _create_dataset, _handle_missing_data
+from neuralprophet.data.transform import _normalize
+
+# from neuralprophet.forecaster import
 
 log = logging.getLogger("NP.test")
 # log.setLevel("INFO")
@@ -23,12 +28,114 @@
 PEYTON_FILE = os.path.join(DATA_DIR, "wp_log_peyton_manning.csv")
 AIR_FILE = os.path.join(DATA_DIR, "air_passengers.csv")
 YOS_FILE = os.path.join(DATA_DIR, "yosemite_temps.csv")
-NROWS = 256
-EPOCHS = 10
-BATCH_SIZE = 128
+NROWS = 1000
+EPOCHS = 1
+BATCH_SIZE = 10
 LR = 1.0
 
 
+def print_input_shapes(inputs):
+    tabularized_input_shapes_str = ""
+    for key, value in inputs.items():
+        if key in [
+            "seasonalities",
+            "covariates",
+            "events",
+            "regressors",
+        ]:
+            for name, period_features in value.items():
+                tabularized_input_shapes_str += f"    {name} {key} {period_features.shape}\n"
+        else:
+            tabularized_input_shapes_str += f"    {key} {value.shape} \n"
+    print(f"Tabularized inputs shapes: \n{tabularized_input_shapes_str}")
+
+
+def load(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True, iterations=1):
+    tic = time.perf_counter()
+    df = pd.read_csv(YOS_FILE, nrows=nrows)
+    freq = "5min"
+    num_workers = 0
+
+    m = NeuralProphet(
+        n_lags=12,
+        n_forecasts=6,
+        epochs=epochs,
+        batch_size=batch,
+        learning_rate=LR,
+        yearly_seasonality=season,
+        weekly_seasonality=season,
+        daily_seasonality=season,
+    )
+
+    # Mimick m.fit(df) behavior
+
+    df, _, _, m.id_list = df_utils.prep_or_copy_df(df)
+    df = _check_dataframe(m, df, check_y=True, exogenous=True)
+    m.data_freq = df_utils.infer_frequency(df, n_lags=m.max_lags, freq=freq)
+    df = _handle_missing_data(
+        df=df,
+        freq=m.data_freq,
+        n_lags=m.n_lags,
+        n_forecasts=m.n_forecasts,
+        config_missing=m.config_missing,
+        config_regressors=m.config_regressors,
+        config_lagged_regressors=m.config_lagged_regressors,
+        config_events=m.config_events,
+        config_seasonality=m.config_seasonality,
+        predicting=False,
+    )
+    # mimick _init_train_loader
+    m.config_normalization.init_data_params(
+        df=df,
+        config_lagged_regressors=m.config_lagged_regressors,
+        config_regressors=m.config_regressors,
+        config_events=m.config_events,
+        config_seasonality=m.config_seasonality,
+    )
+    df = _normalize(df=df, config_normalization=m.config_normalization)
+
+    df_merged = df_utils.merge_dataframes(df)
+    m.config_seasonality = utils.set_auto_seasonalities(df_merged, config_seasonality=m.config_seasonality)
+    if m.config_country_holidays is not None:
+        m.config_country_holidays.init_holidays(df_merged)
+
+    dataset = _create_dataset(
+        m, df, predict_mode=False, prediction_frequency=m.prediction_frequency
+    )  # needs to be called after set_auto_seasonalities
+
+    # Determine the max_number of epochs
+    m.config_train.set_auto_batch_epoch(n_data=len(dataset))
+
+    loader = DataLoader(
+        dataset,
+        batch_size=m.config_train.batch_size,
+        shuffle=True,
+        num_workers=num_workers,
+    )
+    # dataset_size = len(df)
+    # print(dataset_size)
+
+    dataloader_iterator = iter(loader)
+    toc = time.perf_counter()
+    print(f"######## Time: {toc - tic:0.4f} for setup")
+    tic = time.perf_counter()
+    for i in range(iterations):
+        data, target, meta = next(dataloader_iterator)
+        # try:
+        #     data, target, meta = next(dataloader_iterator)
+        # except StopIteration:
+        #     dataloader_iterator = iter(loader)
+        #     data, target, meta = next(dataloader_iterator)
+        # do_something()
+    toc = time.perf_counter()
+    # print_input_shapes(data)
+    # print(len(meta["df_name"]))
+    print(f"######## Time: {toc - tic:0.4f} for iterating {iterations} batches of size {batch}")
+
+
+load(nrows=1010, batch=100, iterations=10)
+
+
 def yosemite(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season=True):
     # log.info("testing: Uncertainty Estimation Yosemite Temps")
     df = pd.read_csv(YOS_FILE, nrows=nrows)
@@ -300,64 +407,69 @@ def peyton_minus_regressors(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season
 
 ###############################
 
-# Compare takes a list of measurements which we'll save in results.
-results = []
-
-epochs = [5]
-sizes = [100, 1000]
-# sizes = [100, 1000, 10000]
-batches = [128]
-seasons = [False, True]
-for ep, nrows, b, season in product(epochs, sizes, batches, seasons):
-    # label and sub_label are the rows
-    # description is the column
-    label = "tests"
-    sub_label = f"[rows: {nrows}, epochs:{ep}, batch:{b}, season:{season}]"
-    for num_threads in [1]:  # [1, 4, 16, 64]
-        results.append(
-            benchmark.Timer(
-                stmt="yosemite(nrows, epochs, batch, season)",
-                setup="from __main__ import yosemite",
-                globals={"epochs": ep, "nrows": nrows, "batch": b, "season": season},
-                num_threads=num_threads,
-                label=label,
-                sub_label=sub_label,
-                description="yosemite",
-            ).blocked_autorange(min_run_time=1)
-        )
-        results.append(
-            benchmark.Timer(
-                stmt="peyton(nrows, epochs, batch, season)",
-                setup="from __main__ import peyton",
-                globals={"nrows": nrows, "epochs": ep, "batch": b, "season": season},
-                num_threads=num_threads,
-                label=label,
-                sub_label=sub_label,
-                description="peyton",
-            ).blocked_autorange(min_run_time=1)
-        )
-        results.append(
-            benchmark.Timer(
-                stmt="peyton_minus_events(nrows, epochs, batch, season)",
-                setup="from __main__ import peyton_minus_events",
-                globals={"nrows": nrows, "epochs": ep, "batch": b, "season": season},
-                num_threads=num_threads,
-                label=label,
-                sub_label=sub_label,
-                description="peyton_minus_events",
-            ).blocked_autorange(min_run_time=1)
-        )
-        results.append(
-            benchmark.Timer(
-                stmt="peyton_minus_regressors(nrows, epochs, batch, season)",
-                setup="from __main__ import peyton_minus_regressors",
-                globals={"nrows": nrows, "epochs": ep, "batch": b, "season": season},
-                num_threads=num_threads,
-                label=label,
-                sub_label=sub_label,
-                description="peyton_minus_regressors",
-            ).blocked_autorange(min_run_time=1)
-        )
-
-compare = benchmark.Compare(results)
-compare.print()
+
+def measure_times():
+    # Compare takes a list of measurements which we'll save in results.
+    results = []
+
+    epochs = [5]
+    sizes = [100, 1000]
+    # sizes = [100, 1000, 10000]
+    batches = [128]
+    seasons = [False, True]
+    for ep, nrows, b, season in product(epochs, sizes, batches, seasons):
+        # label and sub_label are the rows
+        # description is the column
+        label = "tests"
+        sub_label = f"[rows: {nrows}, epochs:{ep}, batch:{b}, season:{season}]"
+        for num_threads in [1]:  # [1, 4, 16, 64]
+            results.append(
+                benchmark.Timer(
+                    stmt="yosemite(nrows, epochs, batch, season)",
+                    setup="from __main__ import yosemite",
+                    globals={"epochs": ep, "nrows": nrows, "batch": b, "season": season},
+                    num_threads=num_threads,
+                    label=label,
+                    sub_label=sub_label,
+                    description="yosemite",
+                ).blocked_autorange(min_run_time=1)
+            )
+            results.append(
+                benchmark.Timer(
+                    stmt="peyton(nrows, epochs, batch, season)",
+                    setup="from __main__ import peyton",
+                    globals={"nrows": nrows, "epochs": ep, "batch": b, "season": season},
+                    num_threads=num_threads,
+                    label=label,
+                    sub_label=sub_label,
+                    description="peyton",
+                ).blocked_autorange(min_run_time=1)
+            )
+            results.append(
+                benchmark.Timer(
+                    stmt="peyton_minus_events(nrows, epochs, batch, season)",
+                    setup="from __main__ import peyton_minus_events",
+                    globals={"nrows": nrows, "epochs": ep, "batch": b, "season": season},
+                    num_threads=num_threads,
+                    label=label,
+                    sub_label=sub_label,
+                    description="peyton_minus_events",
+                ).blocked_autorange(min_run_time=1)
+            )
+            results.append(
+                benchmark.Timer(
+                    stmt="peyton_minus_regressors(nrows, epochs, batch, season)",
+                    setup="from __main__ import peyton_minus_regressors",
+                    globals={"nrows": nrows, "epochs": ep, "batch": b, "season": season},
+                    num_threads=num_threads,
+                    label=label,
+                    sub_label=sub_label,
+                    description="peyton_minus_regressors",
+                ).blocked_autorange(min_run_time=1)
+            )
+
+    compare = benchmark.Compare(results)
+    compare.print()
+
+
+# measure_times()

From 0c16eb1a233ac41b259e86ae28a62b9a73d37920 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 14 Feb 2024 16:57:22 -0800
Subject: [PATCH 092/128] remove hourly energy test

---
 tests/test_model_performance.py | 92 ---------------------------------
 1 file changed, 92 deletions(-)

diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index 93c908d77..3d72c186c 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -322,95 +322,3 @@ def test_EnergyDailyDeep():
     # Training & Predict
     _ = m.fit(df=df_train, freq="D", num_workers=4)
     _ = m.predict(df_test)
-
-
-# TODO: adapt to hourly dataset with multiple IDs
-def test_EnergyHourlyDeep():
-    ### Temporary Test for on-the-fly sampling - very time consuming!
-
-    df = pd.read_csv(ENERGY_PRICE_DAILY_FILE)
-    df["temp"] = df["temperature"]
-    df = df.drop(columns="temperature")
-    df["ds"] = pd.to_datetime(df["ds"])
-    df["y"] = pd.to_numeric(df["y"], errors="coerce")
-
-    df = df.drop("ds", axis=1)
-    df["ds"] = pd.date_range(start="2015-01-01 00:00:00", periods=len(df), freq="H")
-    df["ID"] = "test"
-
-    df_id = df[["ds", "y", "temp"]].copy()
-    df_id["ID"] = "test2"
-    df_id["y"] = df_id["y"] * 0.3
-    df_id["temp"] = df_id["temp"] * 0.4
-    df = pd.concat([df, df_id], ignore_index=True)
-
-    # Conditional Seasonality
-    df["winter"] = np.where(
-        df["ds"].dt.month.isin([1]),
-        1,
-        0,
-    )
-    df["summer"] = np.where(df["ds"].dt.month.isin([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), 1, 0)
-    df["winter"] = pd.to_numeric(df["winter"], errors="coerce")
-    df["summer"] = pd.to_numeric(df["summer"], errors="coerce")
-
-    # Normalize Temperature
-    df["temp"] = (df["temp"] - 65.0) / 50.0
-
-    # df
-    df = df[["ID", "ds", "y", "temp", "winter", "summer"]]
-
-    # Hyperparameter
-    tuned_params = {
-        "n_lags": 24 * 15,
-        "newer_samples_weight": 2.0,
-        "n_changepoints": 0,
-        "yearly_seasonality": 10,
-        "weekly_seasonality": True,
-        "daily_seasonality": False,  # due to conditional daily seasonality
-        "batch_size": 128,
-        "ar_layers": [32, 64, 32, 16],
-        "lagged_reg_layers": [32, 32],
-        # not tuned
-        "n_forecasts": 33,
-        "learning_rate": 0.001,
-        "epochs": 30,
-        "trend_global_local": "global",
-        "season_global_local": "global",
-        "drop_missing": True,
-        "normalize": "standardize",
-    }
-
-    # Uncertainty Quantification
-    confidence_lv = 0.98
-    quantile_list = [round(((1 - confidence_lv) / 2), 2), round((confidence_lv + (1 - confidence_lv) / 2), 2)]
-
-    # Check if GPU is available
-    use_gpu = torch.cuda.is_available()
-
-    # Set trainer configuration
-    trainer_configs = {
-        "accelerator": "gpu" if use_gpu else "cpu",
-    }
-    print(f"Using {'GPU' if use_gpu else 'CPU'}")
-
-    # Model
-    m = NeuralProphet(**tuned_params, **trainer_configs, quantiles=quantile_list)
-
-    # Lagged Regressor
-    m.add_lagged_regressor(names="temp", n_lags=33, normalize="standardize")
-
-    # Conditional Seasonality
-    m.add_seasonality(name="winter", period=1, fourier_order=6, condition_name="winter")
-    m.add_seasonality(name="summer", period=1, fourier_order=6, condition_name="summer")
-
-    # Holidays
-    m.add_country_holidays(country_name="US", lower_window=-1, upper_window=1)
-
-    # Split
-    df_train = df[df["ds"] < "2015-03-01"]
-    df_test = df[df["ds"] >= "2015-03-01"]
-
-    # Training & Predict
-    _ = m.fit(df=df_train, freq="H", num_workers=4, early_stopping=True)
-    _ = m.predict(df_test)

From b5845fd972e95450cd308f0110966ea259a50270 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 14 Feb 2024 16:59:11 -0800
Subject: [PATCH 093/128] add debug notebook for energy hourly

---
 tests/metrics/debug-energy-price-hourly.ipynb | 2529 +++++++++++++++++
 1 file changed, 2529 insertions(+)
 create mode 100644 tests/metrics/debug-energy-price-hourly.ipynb

diff --git a/tests/metrics/debug-energy-price-hourly.ipynb b/tests/metrics/debug-energy-price-hourly.ipynb
new file mode 100644
index 000000000..14a09c93e
--- /dev/null
+++ b/tests/metrics/debug-energy-price-hourly.ipynb
@@ -0,0 +1,2529 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import pathlib\n",
+    "import torch\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import plotly.graph_objects as go\n",
+    "from plotly.subplots import make_subplots\n",
+    "from plotly_resampler import unregister_plotly_resampler\n",
+    "\n",
+    "from neuralprophet import NeuralProphet, set_random_seed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_metrics_plot(metrics):\n",
+    "    # Deactivate the resampler since it is not compatible with kaleido (image export)\n",
+    "    unregister_plotly_resampler()\n",
+    "\n",
+    "    # Plotly params\n",
+    "    prediction_color = \"#2d92ff\"\n",
+    "    actual_color = \"black\"\n",
+    "    line_width = 2\n",
+    "    xaxis_args = {\"showline\": True, \"mirror\": True, \"linewidth\": 1.5, \"showgrid\": False}\n",
+    "    yaxis_args = {\n",
+    "        \"showline\": True,\n",
+    "        \"mirror\": True,\n",
+    "        \"linewidth\": 1.5,\n",
+    "        \"showgrid\": False,\n",
+    "        \"rangemode\": \"tozero\",\n",
+    "        \"type\": \"log\",\n",
+    "    }\n",
+    "    layout_args = {\n",
+    "        \"autosize\": True,\n",
+    "        \"template\": \"plotly_white\",\n",
+    "        \"margin\": go.layout.Margin(l=0, r=10, b=0, t=30, pad=0),\n",
+    "        \"font\": dict(size=10),\n",
+    "        \"title\": dict(font=dict(size=10)),\n",
+    "        \"width\": 1000,\n",
+    "        \"height\": 200,\n",
+    "    }\n",
+    "\n",
+    "    metric_cols = [col for col in metrics.columns if not (\"_val\" in col or col == \"RegLoss\" or col == \"epoch\")]\n",
+    "    fig = make_subplots(rows=1, cols=len(metric_cols), subplot_titles=metric_cols)\n",
+    "    for i, metric in enumerate(metric_cols):\n",
+    "        fig.add_trace(\n",
+    "            go.Scatter(\n",
+    "                y=metrics[metric],\n",
+    "                name=metric,\n",
+    "                mode=\"lines\",\n",
+    "                line=dict(color=prediction_color, width=line_width),\n",
+    "                legendgroup=metric,\n",
+    "            ),\n",
+    "            row=1,\n",
+    "            col=i + 1,\n",
+    "        )\n",
+    "        if f\"{metric}_val\" in metrics.columns:\n",
+    "            fig.add_trace(\n",
+    "                go.Scatter(\n",
+    "                    y=metrics[f\"{metric}_val\"],\n",
+    "                    name=f\"{metric}_val\",\n",
+    "                    mode=\"lines\",\n",
+    "                    line=dict(color=actual_color, width=line_width),\n",
+    "                    legendgroup=metric,\n",
+    "                ),\n",
+    "                row=1,\n",
+    "                col=i + 1,\n",
+    "            )\n",
+    "        if metric == \"Loss\":\n",
+    "            fig.add_trace(\n",
+    "                go.Scatter(\n",
+    "                    y=metrics[\"RegLoss\"],\n",
+    "                    name=\"RegLoss\",\n",
+    "                    mode=\"lines\",\n",
+    "                    line=dict(color=actual_color, width=line_width),\n",
+    "                    legendgroup=metric,\n",
+    "                ),\n",
+    "                row=1,\n",
+    "                col=i + 1,\n",
+    "            )\n",
+    "    fig.update_xaxes(xaxis_args)\n",
+    "    fig.update_yaxes(yaxis_args)\n",
+    "    fig.update_layout(layout_args)\n",
+    "    return fig"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DIR = \"~/github/neural_prophet\"\n",
+    "DATA_DIR = os.path.join(DIR, \"tests\", \"test-data\")\n",
+    "PEYTON_FILE = os.path.join(DATA_DIR, \"wp_log_peyton_manning.csv\")\n",
+    "AIR_FILE = os.path.join(DATA_DIR, \"air_passengers.csv\")\n",
+    "YOS_FILE = os.path.join(DATA_DIR, \"yosemite_temps.csv\")\n",
+    "ENERGY_PRICE_DAILY_FILE = os.path.join(DATA_DIR, \"tutorial04_kaggle_energy_daily_temperature.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(ENERGY_PRICE_DAILY_FILE)\n",
+    "df[\"temp\"] = df[\"temperature\"]\n",
+    "df = df.drop(columns=\"temperature\")\n",
+    "df[\"ds\"] = pd.to_datetime(df[\"ds\"])\n",
+    "df[\"y\"] = pd.to_numeric(df[\"y\"], errors=\"coerce\")\n",
+    "\n",
+    "df = df.drop(\"ds\", axis=1)\n",
+    "df[\"ds\"] = pd.date_range(start=\"2015-01-01 00:00:00\", periods=len(df), freq=\"H\")\n",
+    "df[\"ID\"] = \"test\"\n",
+    "\n",
+    "df_id = df[[\"ds\", \"y\", \"temp\"]].copy()\n",
+    "df_id[\"ID\"] = \"test2\"\n",
+    "df_id[\"y\"] = df_id[\"y\"] * 0.3\n",
+    "df_id[\"temp\"] = df_id[\"temp\"] * 0.4\n",
+    "df = pd.concat([df, df_id], ignore_index=True)\n",
+    "\n",
+    "# Conditional Seasonality\n",
+    "df[\"winter\"] = np.where(\n",
+    "    df[\"ds\"].dt.month.isin([1]),\n",
+    "    1,\n",
+    "    0,\n",
+    ")\n",
+    "df[\"summer\"] = np.where(df[\"ds\"].dt.month.isin([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]), 1, 0)\n",
+    "df[\"winter\"] = pd.to_numeric(df[\"winter\"], errors=\"coerce\")\n",
+    "df[\"summer\"] = pd.to_numeric(df[\"summer\"], errors=\"coerce\")\n",
+    "\n",
+    "# Normalize Temperature\n",
+    "df[\"temp\"] = (df[\"temp\"] - 65.0) / 50.0\n",
+    "\n",
+    "# df\n",
+    "df = df[[\"ID\", \"ds\", \"y\", \"temp\", \"winter\", \"summer\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using CPU\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<neuralprophet.forecaster.NeuralProphet at 0x7b48ec932690>"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "### Temporary Test for on-the-fly sampling - very time consuming!\n",
+    "\n",
+    "\n",
+    "# Hyperparameter\n",
+    "tuned_params = {\n",
+    "    \"n_lags\": 10,\n",
+    "    \"newer_samples_weight\": 2.0,\n",
+    "    \"n_changepoints\": 0,\n",
+    "    \"yearly_seasonality\": 10,\n",
+    "    \"weekly_seasonality\": True,\n",
+    "    \"daily_seasonality\": False,  # due to conditional daily seasonality\n",
+    "    \"batch_size\": 128,\n",
+    "    \"ar_layers\": [8, 4],\n",
+    "    \"lagged_reg_layers\": [8],\n",
+    "    # not tuned\n",
+    "    \"n_forecasts\": 5,\n",
+    "    \"learning_rate\": 0.001,\n",
+    "    \"epochs\": 10,\n",
+    "    \"trend_global_local\": \"global\",\n",
+    "    \"season_global_local\": \"global\",\n",
+    "    \"drop_missing\": True,\n",
+    "    \"normalize\": \"standardize\",\n",
+    "}\n",
+    "\n",
+    "# Uncertainty Quantification\n",
+    "confidence_lv = 0.98\n",
+    "quantile_list = [round(((1 - confidence_lv) / 2), 2), round((confidence_lv + (1 - confidence_lv) / 2), 2)]\n",
+    "\n",
+    "# Check if GPU is available\n",
+    "use_gpu = torch.cuda.is_available()\n",
+    "\n",
+    "# Set trainer configuration\n",
+    "trainer_configs = {\n",
+    "    \"accelerator\": \"gpu\" if use_gpu else \"cpu\",\n",
+    "}\n",
+    "print(f\"Using {'GPU' if use_gpu else 'CPU'}\")\n",
+    "\n",
+    "# Model\n",
+    "m = NeuralProphet(**tuned_params, **trainer_configs, quantiles=quantile_list)\n",
+    "\n",
+    "# Lagged Regressor\n",
+    "m.add_lagged_regressor(names=\"temp\", n_lags=33, normalize=\"standardize\")\n",
+    "\n",
+    "# Conditional Seasonality\n",
+    "m.add_seasonality(name=\"winter\", period=1, fourier_order=6, condition_name=\"winter\")\n",
+    "m.add_seasonality(name=\"summer\", period=1, fourier_order=6, condition_name=\"summer\")\n",
+    "\n",
+    "# Holidays\n",
+    "m.add_country_holidays(country_name=\"US\", lower_window=-1, upper_window=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO - (NP.forecaster.fit) - When Global modeling with local normalization, metrics are displayed in normalized scale.\n",
+      "INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.929% of the data.\n",
+      "INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H\n",
+      "INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.929% of the data.\n",
+      "INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H\n",
+      "INFO - (NP.utils.configure_trainer) - Using accelerator cpu with 1 device(s).\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "aa26aaf9191f401b9c69ebafca381bab",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Training: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8f924e854e154a2a9e9e86640d0298db",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "babd196a4ca640adaa6302bdba9682b1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4c4b3ce470a7482f83b3118343efa35e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "43e00642d3674fac82ab23cd4d56ab3c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9091c01f25ff475bb3e16f402bdcb08b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a86321ee392d4c42b7e078509adb4efd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a659c5f8d73f49d2ba37897c1d604989",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "76eb4a72f44b47048d9e23746a6baf41",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f6400786ccbb4d899b117881cae52eb6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "914fe3ff48524bf38e9f5892bd897646",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Validation: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Split\n",
+    "df_train = df[df[\"ds\"] < \"2015-03-01\"]\n",
+    "df_test = df[df[\"ds\"] >= \"2015-03-01\"]\n",
+    "\n",
+    "# Training & Predict\n",
+    "metrics = m.fit(df=df_train, validation_df=df_test, freq=\"H\", num_workers=4, early_stopping=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "legendgroup": "MAE",
+         "line": {
+          "color": "#2d92ff",
+          "width": 2
+         },
+         "mode": "lines",
+         "name": "MAE",
+         "type": "scatter",
+         "xaxis": "x",
+         "y": [
+          1.6991313695907593,
+          1.5541504621505737,
+          1.2866111993789673,
+          1.0485198497772217,
+          0.9603586792945862,
+          0.933108389377594,
+          0.9244528412818909,
+          0.9177840948104858,
+          0.9132021069526672,
+          0.9105463027954102
+         ],
+         "yaxis": "y"
+        },
+        {
+         "legendgroup": "MAE",
+         "line": {
+          "color": "black",
+          "width": 2
+         },
+         "mode": "lines",
+         "name": "MAE_val",
+         "type": "scatter",
+         "xaxis": "x",
+         "y": [
+          1.9174306392669678,
+          2.133635997772217,
+          2.1361277103424072,
+          1.954904317855835,
+          1.8205108642578125,
+          1.7834810018539429,
+          1.7635681629180908,
+          1.7493915557861328,
+          1.7418491840362549,
+          1.7389646768569946
+         ],
+         "yaxis": "y"
+        },
+        {
+         "legendgroup": "RMSE",
+         "line": {
+          "color": "#2d92ff",
+          "width": 2
+         },
+         "mode": "lines",
+         "name": "RMSE",
+         "type": "scatter",
+         "xaxis": "x2",
+         "y": [
+          2.249849557876587,
+          2.062807083129883,
+          1.6801131963729858,
+          1.344346523284912,
+          1.2270969152450562,
+          1.1934525966644287,
+          1.1826142072677612,
+          1.1741188764572144,
+          1.169130563735962,
+          1.1649360656738281
+         ],
+         "yaxis": "y2"
+        },
+        {
+         "legendgroup": "RMSE",
+         "line": {
+          "color": "black",
+          "width": 2
+         },
+         "mode": "lines",
+         "name": "RMSE_val",
+         "type": "scatter",
+         "xaxis": "x2",
+         "y": [
+          2.1282451152801514,
+          2.287360668182373,
+          2.3184731006622314,
+          2.140346050262451,
+          2.0008866786956787,
+          1.962218999862671,
+          1.9410110712051392,
+          1.9257516860961914,
+          1.9175572395324707,
+          1.914405107498169
+         ],
+         "yaxis": "y2"
+        },
+        {
+         "legendgroup": "Loss",
+         "line": {
+          "color": "#2d92ff",
+          "width": 2
+         },
+         "mode": "lines",
+         "name": "Loss",
+         "type": "scatter",
+         "xaxis": "x3",
+         "y": [
+          3.4565775394439697,
+          3.047083854675293,
+          2.3058581352233887,
+          1.710412621498108,
+          1.4448997974395752,
+          1.353717565536499,
+          1.3267676830291748,
+          1.3102833032608032,
+          1.2921112775802612,
+          1.2888280153274536
+         ],
+         "yaxis": "y3"
+        },
+        {
+         "legendgroup": "Loss",
+         "line": {
+          "color": "black",
+          "width": 2
+         },
+         "mode": "lines",
+         "name": "Loss_val",
+         "type": "scatter",
+         "xaxis": "x3",
+         "y": [
+          4.821254730224609,
+          4.705277919769287,
+          4.240411758422852,
+          3.7221953868865967,
+          3.4264442920684814,
+          3.345188617706299,
+          3.2992584705352783,
+          3.2648608684539795,
+          3.246990919113159,
+          3.2401645183563232
+         ],
+         "yaxis": "y3"
+        },
+        {
+         "legendgroup": "Loss",
+         "line": {
+          "color": "black",
+          "width": 2
+         },
+         "mode": "lines",
+         "name": "RegLoss",
+         "type": "scatter",
+         "xaxis": "x3",
+         "y": [
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0,
+          0
+         ],
+         "yaxis": "y3"
+        }
+       ],
+       "layout": {
+        "annotations": [
+         {
+          "font": {
+           "size": 16
+          },
+          "showarrow": false,
+          "text": "MAE",
+          "x": 0.14444444444444446,
+          "xanchor": "center",
+          "xref": "paper",
+          "y": 1,
+          "yanchor": "bottom",
+          "yref": "paper"
+         },
+         {
+          "font": {
+           "size": 16
+          },
+          "showarrow": false,
+          "text": "RMSE",
+          "x": 0.5,
+          "xanchor": "center",
+          "xref": "paper",
+          "y": 1,
+          "yanchor": "bottom",
+          "yref": "paper"
+         },
+         {
+          "font": {
+           "size": 16
+          },
+          "showarrow": false,
+          "text": "Loss",
+          "x": 0.8555555555555556,
+          "xanchor": "center",
+          "xref": "paper",
+          "y": 1,
+          "yanchor": "bottom",
+          "yref": "paper"
+         }
+        ],
+        "autosize": true,
+        "font": {
+         "size": 10
+        },
+        "height": 200,
+        "margin": {
+         "b": 0,
+         "l": 0,
+         "pad": 0,
+         "r": 10,
+         "t": 30
+        },
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "white",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "#C8D4E3",
+             "linecolor": "#C8D4E3",
+             "minorgridcolor": "#C8D4E3",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "heatmapgl": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmapgl"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "white",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "#C8D4E3"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "white",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "radialaxis": {
+            "gridcolor": "#EBF0F8",
+            "linecolor": "#EBF0F8",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "yaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           },
+           "zaxis": {
+            "backgroundcolor": "white",
+            "gridcolor": "#DFE8F3",
+            "gridwidth": 2,
+            "linecolor": "#EBF0F8",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "#EBF0F8"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           },
+           "bgcolor": "white",
+           "caxis": {
+            "gridcolor": "#DFE8F3",
+            "linecolor": "#A2B1C6",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "#EBF0F8",
+           "linecolor": "#EBF0F8",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "#EBF0F8",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "font": {
+          "size": 10
+         }
+        },
+        "width": 1000,
+        "xaxis": {
+         "anchor": "y",
+         "domain": [
+          0,
+          0.2888888888888889
+         ],
+         "linewidth": 1.5,
+         "mirror": true,
+         "showgrid": false,
+         "showline": true
+        },
+        "xaxis2": {
+         "anchor": "y2",
+         "domain": [
+          0.35555555555555557,
+          0.6444444444444445
+         ],
+         "linewidth": 1.5,
+         "mirror": true,
+         "showgrid": false,
+         "showline": true
+        },
+        "xaxis3": {
+         "anchor": "y3",
+         "domain": [
+          0.7111111111111111,
+          1
+         ],
+         "linewidth": 1.5,
+         "mirror": true,
+         "showgrid": false,
+         "showline": true
+        },
+        "yaxis": {
+         "anchor": "x",
+         "domain": [
+          0,
+          1
+         ],
+         "linewidth": 1.5,
+         "mirror": true,
+         "rangemode": "tozero",
+         "showgrid": false,
+         "showline": true,
+         "type": "log"
+        },
+        "yaxis2": {
+         "anchor": "x2",
+         "domain": [
+          0,
+          1
+         ],
+         "linewidth": 1.5,
+         "mirror": true,
+         "rangemode": "tozero",
+         "showgrid": false,
+         "showline": true,
+         "type": "log"
+        },
+        "yaxis3": {
+         "anchor": "x3",
+         "domain": [
+          0,
+          1
+         ],
+         "linewidth": 1.5,
+         "mirror": true,
+         "rangemode": "tozero",
+         "showgrid": false,
+         "showline": true,
+         "type": "log"
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "create_metrics_plot(metrics)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'MAE_val': 1.7389646768569946,\n",
+       " 'RMSE_val': 1.914405107498169,\n",
+       " 'Loss_val': 3.2401645183563232,\n",
+       " 'RegLoss_val': 0.0,\n",
+       " 'epoch': 9,\n",
+       " 'MAE': 0.9105463027954102,\n",
+       " 'RMSE': 1.1649360656738281,\n",
+       " 'Loss': 1.2888280153274536,\n",
+       " 'RegLoss': 0.0}"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "metrics.to_dict(\"records\")[-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MAE_val</th>\n",
+       "      <th>RMSE_val</th>\n",
+       "      <th>Loss_val</th>\n",
+       "      <th>RegLoss_val</th>\n",
+       "      <th>epoch</th>\n",
+       "      <th>MAE</th>\n",
+       "      <th>RMSE</th>\n",
+       "      <th>Loss</th>\n",
+       "      <th>RegLoss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>1.738965</td>\n",
+       "      <td>1.914405</td>\n",
+       "      <td>3.240165</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>9</td>\n",
+       "      <td>0.910546</td>\n",
+       "      <td>1.164936</td>\n",
+       "      <td>1.288828</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    MAE_val  RMSE_val  Loss_val  RegLoss_val  epoch       MAE      RMSE  \\\n",
+       "9  1.738965  1.914405  3.240165          0.0      9  0.910546  1.164936   \n",
+       "\n",
+       "       Loss  RegLoss  \n",
+       "9  1.288828      0.0  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "metrics.tail(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.932% of the data.\n",
+      "INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H\n",
+      "INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.932% of the data.\n",
+      "INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.932% of the data.\n",
+      "INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H\n",
+      "INFO - (NP.data.processing._handle_missing_data) - Dropped 5 rows at the end with NaNs in 'y' column.\n",
+      "INFO - (NP.df_utils._infer_frequency) - Major frequency H corresponds to 99.932% of the data.\n",
+      "INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - H\n",
+      "INFO - (NP.data.processing._handle_missing_data) - Dropped 5 rows at the end with NaNs in 'y' column.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "78600faef98442c3bcae260cf6a78232",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Predicting: 22it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a621592c80404313838c8ae9250a41c3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Predicting: 22it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "forecast = m.predict(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO - (NP.forecaster.plot) - Plotting data from ID test\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4d1fc6fc6bca4a459a6484e0f9bec945",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FigureWidgetResampler({\n",
+       "    'data': [{'fillcolor': 'rgba(45, 146, 255, 0.2)',\n",
+       "              'line': {'color': 'rgba(45, 146, 255, 0.2)', 'width': 1},\n",
+       "              'mode': 'lines',\n",
+       "              'name': '<b style=\"color:sandybrown\">[R]</b> yhat5 1.0% <i style=\"color:#fc9944\">~1h</i>',\n",
+       "              'type': 'scatter',\n",
+       "              'uid': 'aebc484d-c130-47bd-8870-268071f0b3d5',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 13, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 14, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 15, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 18, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'y': array([62.35801 , 58.90128 , 49.21923 , ..., 50.683945, 56.553596, 58.41175 ],\n",
+       "                         dtype=float32)},\n",
+       "             {'fill': 'tonexty',\n",
+       "              'fillcolor': 'rgba(45, 146, 255, 0.2)',\n",
+       "              'line': {'color': 'rgba(45, 146, 255, 0.2)', 'width': 1},\n",
+       "              'mode': 'lines',\n",
+       "              'name': '<b style=\"color:sandybrown\">[R]</b> yhat5 99.0% <i style=\"color:#fc9944\">~1h</i>',\n",
+       "              'type': 'scatter',\n",
+       "              'uid': 'c62aca2a-cbb9-4e43-915f-156387e57092',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 13, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 14, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 15, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 19, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'y': array([80.960884, 76.19124 , 64.98064 , ..., 55.83882 , 67.100685, 64.74074 ],\n",
+       "                         dtype=float32)},\n",
+       "             {'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': '<b style=\"color:sandybrown\">[R]</b> Predicted <i style=\"color:#fc9944\">~1h</i>',\n",
+       "              'type': 'scatter',\n",
+       "              'uid': 'aeae0371-af61-428b-bac3-3d7c9675a881',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 13, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 14, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 15, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 18, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'y': array([62.35801 , 58.90128 , 49.21923 , ..., 50.683945, 56.553596, 58.41175 ],\n",
+       "                         dtype=float32)},\n",
+       "             {'marker': {'color': 'blue', 'size': 4, 'symbol': 'x'},\n",
+       "              'mode': 'markers',\n",
+       "              'name': '<b style=\"color:sandybrown\">[R]</b> Predicted <i style=\"color:#fc9944\">~1h</i>',\n",
+       "              'type': 'scatter',\n",
+       "              'uid': 'fdc61ccb-a79c-4487-bdf9-b9be5d0159d8',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 13, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 14, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 15, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 18, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'y': array([62.35801 , 58.90128 , 49.21923 , ..., 50.683945, 56.553596, 58.41175 ],\n",
+       "                         dtype=float32)},\n",
+       "             {'marker': {'color': 'black', 'size': 4},\n",
+       "              'mode': 'markers',\n",
+       "              'name': '<b style=\"color:sandybrown\">[R]</b> Actual <i style=\"color:#fc9944\">~1h</i>',\n",
+       "              'type': 'scatter',\n",
+       "              'uid': 'f3b8bafe-c1a6-4a00-b6d8-94ad845ee178',\n",
+       "              'x': array([datetime.datetime(2015, 1, 1, 0, 0),\n",
+       "                          datetime.datetime(2015, 1, 1, 1, 0),\n",
+       "                          datetime.datetime(2015, 1, 1, 2, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 18, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'y': array([64.92, 58.46, 63.35, ..., 68.61, 60.22, 60.32])}],\n",
+       "    'layout': {'autosize': True,\n",
+       "               'font': {'size': 10},\n",
+       "               'height': 420,\n",
+       "               'hovermode': 'x unified',\n",
+       "               'margin': {'b': 0, 'l': 0, 'pad': 0, 'r': 10, 't': 10},\n",
+       "               'showlegend': True,\n",
+       "               'template': '...',\n",
+       "               'title': {'font': {'size': 12}},\n",
+       "               'width': 700,\n",
+       "               'xaxis': {'linewidth': 1.5,\n",
+       "                         'mirror': True,\n",
+       "                         'rangeselector': {'buttons': [{'count': 7, 'label': '1w', 'step': 'day', 'stepmode': 'backward'},\n",
+       "                                                       {'count': 1,\n",
+       "                                                        'label': '1m',\n",
+       "                                                        'step': 'month',\n",
+       "                                                        'stepmode': 'backward'},\n",
+       "                                                       {'count': 6,\n",
+       "                                                        'label': '6m',\n",
+       "                                                        'step': 'month',\n",
+       "                                                        'stepmode': 'backward'},\n",
+       "                                                       {'count': 1, 'label': '1y', 'step': 'year', 'stepmode': 'backward'},\n",
+       "                                                       {'step': 'all'}]},\n",
+       "                         'rangeslider': {'visible': True},\n",
+       "                         'showline': True,\n",
+       "                         'title': {'text': 'ds'},\n",
+       "                         'type': 'date'},\n",
+       "               'yaxis': {'linewidth': 1.5, 'mirror': True, 'showline': True, 'title': {'text': 'y'}}}\n",
+       "})"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.highlight_nth_step_ahead_of_each_forecast(m.n_forecasts)\n",
+    "m.plot(forecast, df_name=\"test\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO - (NP.forecaster.plot_components) - Plotting data from ID test\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0851c9188ffb4c94bc7948103985aee2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FigureWidgetResampler({\n",
+       "    'data': [{'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': '<b style=\"color:sandybrown\">[R]</b> Trend <i style=\"color:#fc9944\">~1h</i>',\n",
+       "              'showlegend': False,\n",
+       "              'type': 'scatter',\n",
+       "              'uid': 'a971f8c1-1e2e-428f-bbae-b1e366f40f84',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 9, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 10, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 11, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 19, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'xaxis': 'x',\n",
+       "              'y': array([41.138184, 41.136326, 41.134468, ..., 38.49218 , 38.488464, 38.486603],\n",
+       "                         dtype=float32),\n",
+       "              'yaxis': 'y'},\n",
+       "             {'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': ('<b style=\"color:sandybrown\">[R' ... ' style=\"color:#fc9944\">~1h</i>'),\n",
+       "              'showlegend': False,\n",
+       "              'type': 'scatter',\n",
+       "              'uid': '896ec7a5-4db3-4572-9615-c92ff0a440c2',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 9, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 10, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 11, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 19, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'xaxis': 'x2',\n",
+       "              'y': array([7.7610316, 7.77278  , 7.782315 , ..., 8.327494 , 8.318201 , 8.315492 ],\n",
+       "                         dtype=float32),\n",
+       "              'yaxis': 'y2'},\n",
+       "             {'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': ('<b style=\"color:sandybrown\">[R' ... ' style=\"color:#fc9944\">~1h</i>'),\n",
+       "              'showlegend': False,\n",
+       "              'type': 'scatter',\n",
+       "              'uid': '4e7558c0-22c3-42b6-b38b-64828e95911f',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 9, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 10, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 11, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 19, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'xaxis': 'x3',\n",
+       "              'y': array([ 0.36878857,  0.30485797,  0.2463306 , ..., -0.56539005,  0.4600458 ,\n",
+       "                           0.93207777], dtype=float32),\n",
+       "              'yaxis': 'y3'},\n",
+       "             {'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': ('<b style=\"color:sandybrown\">[R' ... ' style=\"color:#fc9944\">~1h</i>'),\n",
+       "              'showlegend': False,\n",
+       "              'type': 'scatter',\n",
+       "              'uid': '421f3dfd-0361-48bd-b035-27b85837e7d1',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 9, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 10, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 11, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 18, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'xaxis': 'x4',\n",
+       "              'y': array([ 6.8369484 ,  8.779529  , -0.55572075, ...,  0.        ,  0.        ,\n",
+       "                           0.        ], dtype=float32),\n",
+       "              'yaxis': 'y4'},\n",
+       "             {'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': ('<b style=\"color:sandybrown\">[R' ... ' style=\"color:#fc9944\">~1h</i>'),\n",
+       "              'showlegend': False,\n",
+       "              'type': 'scatter',\n",
+       "              'uid': '34dff3a4-3054-4a91-b79e-f965aa8d3284',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 9, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 10, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 11, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 18, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'xaxis': 'x5',\n",
+       "              'y': array([0.       , 0.       , 0.       , ..., 2.5935924, 7.5037613, 4.810857 ],\n",
+       "                         dtype=float32),\n",
+       "              'yaxis': 'y5'},\n",
+       "             {'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': ('<b style=\"color:sandybrown\">[R' ... ' style=\"color:#fc9944\">~1h</i>'),\n",
+       "              'showlegend': False,\n",
+       "              'type': 'scatter',\n",
+       "              'uid': 'cd6c048f-2b7f-47c5-8f34-50aed056ee0b',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 13, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 14, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 15, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 18, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'xaxis': 'x6',\n",
+       "              'y': array([14.265438  ,  6.3923936 , -0.08357577, ...,  0.        ,  0.4089267 ,\n",
+       "                           4.4793005 ], dtype=float32),\n",
+       "              'yaxis': 'y6'},\n",
+       "             {'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': ('<b style=\"color:sandybrown\">[R' ... ' style=\"color:#fc9944\">~1h</i>'),\n",
+       "              'showlegend': False,\n",
+       "              'type': 'scatter',\n",
+       "              'uid': '61f91c94-2944-4efb-9907-709ee2fb6c77',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 13, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 14, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 15, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 18, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'xaxis': 'x7',\n",
+       "              'y': array([1.765334 , 3.7883697, 4.1204934, ..., 1.8360679, 1.8920995, 0.       ],\n",
+       "                         dtype=float32),\n",
+       "              'yaxis': 'y7'},\n",
+       "             {'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': ('<b style=\"color:sandybrown\">[R' ... ' style=\"color:#fc9944\">~1h</i>'),\n",
+       "              'showlegend': False,\n",
+       "              'type': 'scatter',\n",
+       "              'uid': '885dd23f-b889-4004-9dcf-70c1ca00a53d',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 9, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 10, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 11, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 18, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'xaxis': 'x8',\n",
+       "              'y': array([0., 0., 0., ..., 0., 0., 0.], dtype=float32),\n",
+       "              'yaxis': 'y8'},\n",
+       "             {'fill': 'tozeroy',\n",
+       "              'fillcolor': 'rgba(45, 146, 255, 0.2)',\n",
+       "              'line': {'color': 'rgba(45, 146, 255, 0.2)', 'width': 1},\n",
+       "              'mode': 'lines',\n",
+       "              'name': '<b style=\"color:sandybrown\">[R]</b> yhat5 1.0% <i style=\"color:#fc9944\">~1h</i>',\n",
+       "              'showlegend': True,\n",
+       "              'type': 'scatter',\n",
+       "              'uid': 'b180a45d-ef7d-43be-9180-b0d60ffffff7',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 13, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 14, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 15, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 19, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'xaxis': 'x9',\n",
+       "              'y': array([ 37.975266 ,  31.622068 ,  -3.693409 , ..., -14.12719  ,  -2.9433403,\n",
+       "                           -0.9949646], dtype=float32),\n",
+       "              'yaxis': 'y9'},\n",
+       "             {'fill': 'tozeroy',\n",
+       "              'fillcolor': 'rgba(45, 146, 255, 0.2)',\n",
+       "              'line': {'color': 'rgba(45, 146, 255, 0.2)', 'width': 1},\n",
+       "              'mode': 'lines',\n",
+       "              'name': '<b style=\"color:sandybrown\">[R]</b> yhat5 99.0% <i style=\"color:#fc9944\">~1h</i>',\n",
+       "              'showlegend': True,\n",
+       "              'type': 'scatter',\n",
+       "              'uid': '94ed8664-debf-4a84-b7bf-74bc18ea9a66',\n",
+       "              'x': array([datetime.datetime(2015, 1, 2, 13, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 14, 0),\n",
+       "                          datetime.datetime(2015, 1, 2, 15, 0), ...,\n",
+       "                          datetime.datetime(2015, 3, 2, 17, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 19, 0),\n",
+       "                          datetime.datetime(2015, 3, 2, 20, 0)], dtype=object),\n",
+       "              'xaxis': 'x9',\n",
+       "              'y': array([56.57814  , 48.912025 , 12.067997 , ..., -8.972313 ,  5.452839 ,\n",
+       "                           5.3340225], dtype=float32),\n",
+       "              'yaxis': 'y9'}],\n",
+       "    'layout': {'autosize': True,\n",
+       "               'barmode': 'overlay',\n",
+       "               'font': {'size': 10},\n",
+       "               'height': 1890,\n",
+       "               'hovermode': 'x unified',\n",
+       "               'legend': {'traceorder': 'reversed', 'y': 0.1},\n",
+       "               'margin': {'b': 0, 'l': 0, 'pad': 0, 'r': 10, 't': 10},\n",
+       "               'template': '...',\n",
+       "               'title': {'font': {'size': 12}},\n",
+       "               'width': 700,\n",
+       "               'xaxis': {'anchor': 'y',\n",
+       "                         'domain': [0.0, 1.0],\n",
+       "                         'linewidth': 1.5,\n",
+       "                         'mirror': True,\n",
+       "                         'range': [2014-12-30 10:00:00, 2015-03-05 19:00:00],\n",
+       "                         'showline': True,\n",
+       "                         'title': {'text': 'ds'},\n",
+       "                         'type': 'date'},\n",
+       "               'xaxis2': {'anchor': 'y2',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [2014-12-30 10:00:00, 2015-03-05 19:00:00],\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'ds'},\n",
+       "                          'type': 'date'},\n",
+       "               'xaxis3': {'anchor': 'y3',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [2014-12-30 10:00:00, 2015-03-05 19:00:00],\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'ds'},\n",
+       "                          'type': 'date'},\n",
+       "               'xaxis4': {'anchor': 'y4',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [2014-12-30 10:00:00, 2015-03-05 19:00:00],\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'ds'},\n",
+       "                          'type': 'date'},\n",
+       "               'xaxis5': {'anchor': 'y5',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [2014-12-30 10:00:00, 2015-03-05 19:00:00],\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'ds'},\n",
+       "                          'type': 'date'},\n",
+       "               'xaxis6': {'anchor': 'y6',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [2014-12-30 14:00:00, 2015-03-05 19:00:00],\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'ds'},\n",
+       "                          'type': 'date'},\n",
+       "               'xaxis7': {'anchor': 'y7',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [2014-12-30 14:00:00, 2015-03-05 19:00:00],\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'ds'},\n",
+       "                          'type': 'date'},\n",
+       "               'xaxis8': {'anchor': 'y8',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [2014-12-30 10:00:00, 2015-03-05 19:00:00],\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'ds'},\n",
+       "                          'type': 'date'},\n",
+       "               'xaxis9': {'anchor': 'y9',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [2014-12-30 14:00:00, 2015-03-05 19:00:00],\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'ds'},\n",
+       "                          'type': 'date'},\n",
+       "               'yaxis': {'anchor': 'x',\n",
+       "                         'domain': [0.9185185185185185, 1.0],\n",
+       "                         'linewidth': 1.5,\n",
+       "                         'mirror': True,\n",
+       "                         'rangemode': 'normal',\n",
+       "                         'showline': True,\n",
+       "                         'title': {'text': 'Trend'}},\n",
+       "               'yaxis2': {'anchor': 'x2',\n",
+       "                          'domain': [0.8037037037037038, 0.8851851851851853],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'tozero',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'yearly seasonality'}},\n",
+       "               'yaxis3': {'anchor': 'x3',\n",
+       "                          'domain': [0.6888888888888889, 0.7703703703703704],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'tozero',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'weekly seasonality'}},\n",
+       "               'yaxis4': {'anchor': 'x4',\n",
+       "                          'domain': [0.5740740740740741, 0.6555555555555556],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'tozero',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'winter seasonality'}},\n",
+       "               'yaxis5': {'anchor': 'x5',\n",
+       "                          'domain': [0.45925925925925926, 0.5407407407407407],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'tozero',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'summer seasonality'}},\n",
+       "               'yaxis6': {'anchor': 'x6',\n",
+       "                          'domain': [0.34444444444444444, 0.42592592592592593],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'tozero',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'AR (5)-ahead'}},\n",
+       "               'yaxis7': {'anchor': 'x7',\n",
+       "                          'domain': [0.22962962962962963, 0.3111111111111111],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'tozero',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Lagged Regressor \"temp\" (5)-ahead'}},\n",
+       "               'yaxis8': {'anchor': 'x8',\n",
+       "                          'domain': [0.11481481481481481, 0.1962962962962963],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'tozero',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Additive Events'}},\n",
+       "               'yaxis9': {'anchor': 'x9',\n",
+       "                          'domain': [0.0, 0.08148148148148149],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'tozero',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Uncertainty'}}}\n",
+       "})"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.plot_components(forecast, df_name=\"test\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "653b60479a0244c394b8e68cea26b341",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FigureWidgetResampler({\n",
+       "    'data': [{'fill': 'none',\n",
+       "              'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': 'Trend',\n",
+       "              'type': 'scatter',\n",
+       "              'uid': 'f6f21f4d-8199-49f7-a49a-9951dd269bd9',\n",
+       "              'x': array([datetime.datetime(2015, 1, 1, 0, 0),\n",
+       "                          datetime.datetime(2015, 2, 28, 23, 0)], dtype=object),\n",
+       "              'xaxis': 'x',\n",
+       "              'y': array([41.1995 , 38.57022], dtype=float32),\n",
+       "              'yaxis': 'y'},\n",
+       "             {'fill': 'none',\n",
+       "              'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': 'yearly',\n",
+       "              'type': 'scatter',\n",
+       "              'uid': 'f0adc090-2190-4a3b-9c9b-97c8cede02e2',\n",
+       "              'x': array([datetime.datetime(2017, 1, 1, 0, 0),\n",
+       "                          datetime.datetime(2017, 1, 2, 0, 0),\n",
+       "                          datetime.datetime(2017, 1, 3, 0, 0), ...,\n",
+       "                          datetime.datetime(2017, 12, 29, 0, 0),\n",
+       "                          datetime.datetime(2017, 12, 30, 0, 0),\n",
+       "                          datetime.datetime(2017, 12, 31, 0, 0)], dtype=object),\n",
+       "              'xaxis': 'x2',\n",
+       "              'y': array([4.0829487 , 5.187225  , 6.208157  , ..., 0.19168049, 1.4080983 ,\n",
+       "                          2.6177309 ], dtype=float32),\n",
+       "              'yaxis': 'y2'},\n",
+       "             {'fill': 'none',\n",
+       "              'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': 'weekly',\n",
+       "              'type': 'scatter',\n",
+       "              'uid': '17c4c727-ac96-43b0-8e36-acc3caab9c2d',\n",
+       "              'x': array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,\n",
+       "                           14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,\n",
+       "                           28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,\n",
+       "                           42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,\n",
+       "                           56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,\n",
+       "                           70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,\n",
+       "                           84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,\n",
+       "                           98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,\n",
+       "                          112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,\n",
+       "                          126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,\n",
+       "                          140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,\n",
+       "                          154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167]),\n",
+       "              'xaxis': 'x3',\n",
+       "              'y': array([-4.4598384 , -4.2069    , -3.939087  , -3.623846  , -3.2824173 ,\n",
+       "                          -2.955236  , -2.6178632 , -2.2747667 , -1.9053857 , -1.5644891 ,\n",
+       "                          -1.2317951 , -0.9106357 , -0.58533347, -0.28182796, -0.02269452,\n",
+       "                           0.21292625,  0.42130318,  0.6071059 ,  0.7497983 ,  0.8622262 ,\n",
+       "                           0.9443581 ,  0.99504983,  1.0032893 ,  0.9738326 ,  0.9262827 ,\n",
+       "                           0.85167503,  0.74685544,  0.6115421 ,  0.4558556 ,  0.2885901 ,\n",
+       "                           0.11641604, -0.07034495, -0.27396792, -0.47582942, -0.66030794,\n",
+       "                          -0.8382371 , -1.0191802 , -1.1867032 , -1.3330028 , -1.4465324 ,\n",
+       "                          -1.5369219 , -1.6096568 , -1.6520382 , -1.6572822 , -1.6354212 ,\n",
+       "                          -1.5726112 , -1.4764075 , -1.3572443 , -1.2015358 , -1.011647  ,\n",
+       "                          -0.80462766, -0.5479135 , -0.2638637 ,  0.02303137,  0.33831146,\n",
+       "                           0.64510536,  0.9981383 ,  1.3464724 ,  1.6830701 ,  2.0331054 ,\n",
+       "                           2.3555207 ,  2.69649   ,  3.008644  ,  3.2858677 ,  3.5552442 ,\n",
+       "                           3.7721593 ,  3.9756846 ,  4.1423197 ,  4.264748  ,  4.3529058 ,\n",
+       "                           4.389515  ,  4.3708434 ,  4.3175454 ,  4.2146673 ,  4.072025  ,\n",
+       "                           3.8747027 ,  3.6363995 ,  3.340743  ,  3.0221694 ,  2.664729  ,\n",
+       "                           2.2731943 ,  1.8473105 ,  1.4062825 ,  0.9366955 ,  0.42765933,\n",
+       "                          -0.07621501, -0.56814903, -1.059046  , -1.5562105 , -2.0614126 ,\n",
+       "                          -2.5020652 , -2.9485521 , -3.352801  , -3.7507663 , -4.1075444 ,\n",
+       "                          -4.387843  , -4.6441846 , -4.8436375 , -4.9982753 , -5.098551  ,\n",
+       "                          -5.1361027 , -5.115603  , -5.042491  , -4.910203  , -4.7105756 ,\n",
+       "                          -4.472313  , -4.1741037 , -3.8366807 , -3.4286702 , -2.970053  ,\n",
+       "                          -2.5127845 , -2.0011377 , -1.4785788 , -0.89798415, -0.33239934,\n",
+       "                           0.24315366,  0.8609969 ,  1.4341363 ,  2.0365245 ,  2.5912929 ,\n",
+       "                           3.1261334 ,  3.631675  ,  4.12216   ,  4.5922604 ,  4.9916644 ,\n",
+       "                           5.357276  ,  5.6596603 ,  5.918557  ,  6.127722  ,  6.2664776 ,\n",
+       "                           6.3465624 ,  6.3701463 ,  6.3298607 ,  6.2247863 ,  6.0684857 ,\n",
+       "                           5.8579984 ,  5.5947313 ,  5.2599936 ,  4.8867702 ,  4.4837136 ,\n",
+       "                           4.040872  ,  3.5451972 ,  3.0077634 ,  2.463368  ,  1.9197478 ,\n",
+       "                           1.3607975 ,  0.7956455 ,  0.19293702, -0.40335596, -0.9517328 ,\n",
+       "                          -1.4785621 , -1.9893605 , -2.4982615 , -2.9505756 , -3.3692324 ,\n",
+       "                          -3.7429237 , -4.1006875 , -4.402756  , -4.6468496 , -4.8368144 ,\n",
+       "                          -4.987736  , -5.0905485 , -5.1344304 , -5.1309776 , -5.0739946 ,\n",
+       "                          -4.993806  , -4.8492026 , -4.6650367 ], dtype=float32),\n",
+       "              'yaxis': 'y3'},\n",
+       "             {'fill': 'none',\n",
+       "              'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': 'winter',\n",
+       "              'type': 'scatter',\n",
+       "              'uid': '22616063-3eff-4306-a74a-dc4965de0de9',\n",
+       "              'x': array([  0,   1,   2, ..., 285, 286, 287]),\n",
+       "              'xaxis': 'x4',\n",
+       "              'y': array([-4.292418 , -3.5483618, -3.0230176, ..., -5.4796743, -5.2587185,\n",
+       "                          -4.8447485], dtype=float32),\n",
+       "              'yaxis': 'y4'},\n",
+       "             {'fill': 'none',\n",
+       "              'line': {'color': '#2d92ff', 'width': 2},\n",
+       "              'mode': 'lines',\n",
+       "              'name': 'summer',\n",
+       "              'type': 'scatter',\n",
+       "              'uid': '277a9945-45f4-45ca-91c5-7e8e3f338811',\n",
+       "              'x': array([  0,   1,   2, ..., 285, 286, 287]),\n",
+       "              'xaxis': 'x5',\n",
+       "              'y': array([-1.6798731 , -2.3781397 , -2.901272  , ..., -0.19541107, -0.51879483,\n",
+       "                          -1.117872  ], dtype=float32),\n",
+       "              'yaxis': 'y5'},\n",
+       "             {'marker': {'color': '#2d92ff'},\n",
+       "              'name': 'AR',\n",
+       "              'type': 'bar',\n",
+       "              'uid': 'fa756dff-2aaa-47cc-b16e-9266593c0172',\n",
+       "              'width': 0.8,\n",
+       "              'x': array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1]),\n",
+       "              'xaxis': 'x6',\n",
+       "              'y': array([-0.03951903,  0.41645312,  0.02179232, -0.2604984 , -0.06300073,\n",
+       "                          -0.06662486, -0.08233377, -0.03597524,  0.08927898, -0.07381544],\n",
+       "                         dtype=float32),\n",
+       "              'yaxis': 'y6'},\n",
+       "             {'marker': {'color': '#2d92ff'},\n",
+       "              'name': 'Lagged Regressor \"temp\"',\n",
+       "              'type': 'bar',\n",
+       "              'uid': '041656ea-aaa1-4e1b-abce-a1e942a626bb',\n",
+       "              'width': 0.8,\n",
+       "              'x': array([33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,\n",
+       "                          15, 14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1]),\n",
+       "              'xaxis': 'x7',\n",
+       "              'y': array([ 0.20808354,  0.3050754 ,  0.52504927,  0.02816455, -0.2267277 ,\n",
+       "                          -0.18377087,  0.34080964,  0.00188361, -0.14284115,  0.06430382,\n",
+       "                           0.31131235, -0.09880974,  0.06406495,  0.25881714,  0.08779721,\n",
+       "                          -0.18321382,  0.2451885 , -0.23906691, -0.233605  , -0.05307174,\n",
+       "                           0.17820123,  0.12141816,  0.0911953 , -0.10566162,  0.07743413,\n",
+       "                           0.21802229,  0.35458654,  0.06151056,  0.23792064, -0.12219968,\n",
+       "                          -0.2825721 , -0.09865767,  0.25742164], dtype=float32),\n",
+       "              'yaxis': 'y7'},\n",
+       "             {'marker': {'color': '#2d92ff'},\n",
+       "              'name': 'Additive event',\n",
+       "              'type': 'bar',\n",
+       "              'uid': '849f3240-aaa3-4374-b3fb-c1bd7cc14cfa',\n",
+       "              'width': 0.8,\n",
+       "              'x': array(['Veterans Day_+0', 'Veterans Day_+1', 'Veterans Day_-1',\n",
+       "                          \"Washington's Birthday_+0\", \"Washington's Birthday_+1\",\n",
+       "                          \"Washington's Birthday_-1\", 'Christmas Day_+0', 'Christmas Day_+1',\n",
+       "                          'Christmas Day_-1', 'Thanksgiving_+0', 'Thanksgiving_+1',\n",
+       "                          'Thanksgiving_-1', 'Martin Luther King Jr. Day_+0',\n",
+       "                          'Martin Luther King Jr. Day_+1', 'Martin Luther King Jr. Day_-1',\n",
+       "                          'Memorial Day_+0', 'Memorial Day_+1', 'Memorial Day_-1',\n",
+       "                          \"New Year's Day_+0\", \"New Year's Day_+1\", \"New Year's Day_-1\",\n",
+       "                          'Labor Day_+0', 'Labor Day_+1', 'Labor Day_-1', 'Independence Day_+0',\n",
+       "                          'Independence Day_+1', 'Independence Day_-1', 'Columbus Day_+0',\n",
+       "                          'Columbus Day_+1', 'Columbus Day_-1'], dtype=object),\n",
+       "              'xaxis': 'x8',\n",
+       "              'y': [1.7690346240997314, -4.356875419616699, -2.5583579540252686,\n",
+       "                    3.7520101070404053, 1.3547093868255615, -1.4862573146820068,\n",
+       "                    4.024331092834473, -0.7799521684646606, -1.7819913625717163,\n",
+       "                    -2.080281972885132, 0.33075717091560364, 4.571771144866943,\n",
+       "                    2.3425700664520264, 1.175431251525879, 2.4367449283599854,\n",
+       "                    -2.1346323490142822, 3.684549331665039, 0.6624831557273865,\n",
+       "                    -2.1663002967834473, -2.142958164215088, 5.068490505218506,\n",
+       "                    -0.09585778415203094, 2.920788288116455, 3.8810973167419434,\n",
+       "                    0.36290690302848816, -1.381648063659668, 1.097022533416748,\n",
+       "                    2.787872552871704, 1.5658684968948364, 1.4216945171356201],\n",
+       "              'yaxis': 'y8'}],\n",
+       "    'layout': {'autosize': True,\n",
+       "               'font': {'size': 10},\n",
+       "               'height': 1680,\n",
+       "               'hovermode': 'x unified',\n",
+       "               'margin': {'b': 0, 'l': 0, 'pad': 0, 'r': 10, 't': 10},\n",
+       "               'showlegend': False,\n",
+       "               'template': '...',\n",
+       "               'title': {'font': {'size': 12}},\n",
+       "               'width': 700,\n",
+       "               'xaxis': {'anchor': 'y',\n",
+       "                         'domain': [0.0, 1.0],\n",
+       "                         'linewidth': 1.5,\n",
+       "                         'mirror': True,\n",
+       "                         'range': [2014-12-29 00:00:00, 2015-03-03 00:00:00],\n",
+       "                         'showline': True,\n",
+       "                         'title': {'text': 'ds'},\n",
+       "                         'type': 'date'},\n",
+       "               'xaxis2': {'anchor': 'y2',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [2016-12-14 00:00:00, 2018-01-18 00:00:00],\n",
+       "                          'showline': True,\n",
+       "                          'tickformat': '%B %e',\n",
+       "                          'title': {'text': 'Day of year'}},\n",
+       "               'xaxis3': {'anchor': 'y3',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [-8, 175],\n",
+       "                          'showline': True,\n",
+       "                          'tickmode': 'array',\n",
+       "                          'ticktext': [Sunday, Monday, Tuesday, Wednesday,\n",
+       "                                       Thursday, Friday, Saturday, Sunday, Sunday],\n",
+       "                          'tickvals': [0, 24, 48, 72, 96, 120, 144, 168, 192],\n",
+       "                          'title': {'text': 'Day of week'}},\n",
+       "               'xaxis4': {'anchor': 'y4',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [-14, 301],\n",
+       "                          'showline': True,\n",
+       "                          'tickmode': 'array',\n",
+       "                          'ticktext': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,\n",
+       "                                       13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,\n",
+       "                                       24],\n",
+       "                          'tickvals': [0, 12, 24, 36, 48, 60, 72, 84, 96, 108,\n",
+       "                                       120, 132, 144, 156, 168, 180, 192, 204, 216,\n",
+       "                                       228, 240, 252, 264, 276, 288],\n",
+       "                          'title': {'text': 'Hour of day'}},\n",
+       "               'xaxis5': {'anchor': 'y5',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [-14, 301],\n",
+       "                          'showline': True,\n",
+       "                          'tickmode': 'array',\n",
+       "                          'ticktext': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,\n",
+       "                                       13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,\n",
+       "                                       24],\n",
+       "                          'tickvals': [0, 12, 24, 36, 48, 60, 72, 84, 96, 108,\n",
+       "                                       120, 132, 144, 156, 168, 180, 192, 204, 216,\n",
+       "                                       228, 240, 252, 264, 276, 288],\n",
+       "                          'title': {'text': 'Hour of day'}},\n",
+       "               'xaxis6': {'anchor': 'y6',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [0, 11],\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'AR lag number'}},\n",
+       "               'xaxis7': {'anchor': 'y7',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'range': [-2, 36],\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Lagged Regressor \"temp\" lag number'}},\n",
+       "               'xaxis8': {'anchor': 'y8',\n",
+       "                          'domain': [0.0, 1.0],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Additive event name'}},\n",
+       "               'yaxis': {'anchor': 'x',\n",
+       "                         'domain': [0.9078124999999999, 0.9999999999999999],\n",
+       "                         'linewidth': 1.5,\n",
+       "                         'mirror': True,\n",
+       "                         'rangemode': 'normal',\n",
+       "                         'showline': True,\n",
+       "                         'title': {'text': 'Trend'}},\n",
+       "               'yaxis2': {'anchor': 'x2',\n",
+       "                          'domain': [0.778125, 0.8703124999999999],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'normal',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Seasonality: yearly'}},\n",
+       "               'yaxis3': {'anchor': 'x3',\n",
+       "                          'domain': [0.6484375, 0.740625],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'normal',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Seasonality: weekly'}},\n",
+       "               'yaxis4': {'anchor': 'x4',\n",
+       "                          'domain': [0.51875, 0.6109375],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'normal',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Seasonality: winter'}},\n",
+       "               'yaxis5': {'anchor': 'x5',\n",
+       "                          'domain': [0.38906250000000003, 0.48125000000000007],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'normal',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Seasonality: summer'}},\n",
+       "               'yaxis6': {'anchor': 'x6',\n",
+       "                          'domain': [0.259375, 0.3515625],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'normal',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'AR weight (5)-ahead'}},\n",
+       "               'yaxis7': {'anchor': 'x7',\n",
+       "                          'domain': [0.1296875, 0.22187500000000002],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'normal',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Lagged Regressor \"temp\" weight (5)-ahead'}},\n",
+       "               'yaxis8': {'anchor': 'x8',\n",
+       "                          'domain': [0.0, 0.0921875],\n",
+       "                          'linewidth': 1.5,\n",
+       "                          'mirror': True,\n",
+       "                          'rangemode': 'normal',\n",
+       "                          'showline': True,\n",
+       "                          'title': {'text': 'Additive event weight'}}}\n",
+       "})"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.plot_parameters()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.0rc1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 712dcf0b49ca66ce95c26d1e65430dc21efe066e Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 14 Feb 2024 17:03:18 -0800
Subject: [PATCH 094/128] set to log model performance INFO

---
 tests/test_model_performance.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index 3d72c186c..55e2bdf5f 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -16,8 +16,8 @@
 from neuralprophet import NeuralProphet, set_random_seed
 
 log = logging.getLogger("NP.test")
-log.setLevel("ERROR")
-log.parent.setLevel("ERROR")
+log.setLevel("INFO")
+log.parent.setLevel("INFO")
 
 DIR = pathlib.Path(__file__).parent.parent.absolute()
 DATA_DIR = os.path.join(DIR, "tests", "test-data")

From c0b3cdddf502b0d53b7a711d945b3e9ee2a8f8c2 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Wed, 14 Feb 2024 17:07:05 -0800
Subject: [PATCH 095/128] address config_regressors.regressors

---
 neuralprophet/time_dataset.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 2affa93dc..ba58ac0a6 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -511,7 +511,7 @@ def tabularize_univariate_datetime_single_index(
     # create numpy array of values of additive and multiplicative regressors, at correct indexes
     # features dims: (n_forecasts, n_features)
     any_future_regressors = 0 < len(additive_regressors_names + multiplicative_regressors_names)
-    if any_future_regressors:  # if config_regressors is not None:
+    if any_future_regressors:  # if config_regressors.regressors is not None:
         inputs["regressors"] = get_sample_future_regressors(
             df=df,
             origin_index=origin_index,
@@ -911,12 +911,10 @@ def mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_fore
 def sort_regressor_names(config):
     additive_regressors_names = []
     multiplicative_regressors_names = []
-    if config is not None:
+    if config is not None and config.regressors is not None:
         # sort and divide regressors into multiplicative and additive
-        additive_regressors_names = []
-        multiplicative_regressors_names = []
-        for reg in sorted(list(config.keys())):
-            mode = config[reg].mode
+        for reg in sorted(list(config.regressors.keys())):
+            mode = config.regressors[reg].mode
             if mode == "additive":
                 additive_regressors_names.append(reg)
             else:

From 88264fcb0b72a8c3ee6fd36a456550ac58aff6b5 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 15 Feb 2024 11:05:59 -0800
Subject: [PATCH 096/128] clean up create_nan_mask

---
 neuralprophet/time_dataset.py | 69 ++++-------------------------------
 1 file changed, 8 insertions(+), 61 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index ba58ac0a6..2ed656790 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -821,68 +821,15 @@ def create_nan_mask(
 
     # TIME: TREND & SEASONALITY: the time at each sample's lags and forecasts
     # FUTURE REGRESSORS
-    # EVENTS
-    for names in [["t"], future_regressor_names, event_names]:
-        if len(names) > 0:
-            valid_columns = mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts)
-            valid_origins = np.logical_and(valid_origins, valid_columns)
-    return valid_origins
-
-    # # TIME: TREND & SEASONALITY: the time at each sample's lags and forecasts
-    # if max_lags == 0:  # y-series and origin_index match
-    #     time_valid = np.logical_not(df_isna["t"].values)
-    # else:
-    #     time_nan = sliding_window_view(df_isna["t"], window_shape=n_lags+n_forecasts, axis=0).any(axis=-1)
-    #     # first sample is at origin_index = n_lags -1,
-    #     if n_lags == 0: # first sample origin index is at -1
-    #         time_nan = time_nan[1:]
-    #     else:
-    #         time_nan = np.pad(time_nan, pad_width=(n_lags-1, 0), mode="constant", constant_values=True)
-    #     # there are n_forecasts origin_indexes missing at end
-    #     time_nan = np.pad(time_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
-    #     time_valid = np.logical_not(time_nan)
-    # valid_origins = np.logical_and(valid_origins, time_valid)
-
-    # # FUTURE REGRESSORS
-    # if len(future_regressor_names) > 0:
-    #     if max_lags == 0:
-    #          fut_reg_nan = df_isna.loc[:, future_regressor_names]
-    #          assert len(fut_reg_nan.shape) == 2
-    #          fut_reg_nan = fut_reg_nan.any(axis=-1)
-    #     else:
-    #         fut_reg_nan = sliding_window_view(df_isna.loc[:, future_regressor_names], window_shape=n_lags+n_forecasts, axis=0).any(axis=-1)
-    #         assert len(fut_reg_nan.shape) == 2
-    #         fut_reg_nan = fut_reg_nan.any(axis=-1)
-    #         # first sample is at origin_index = n_lags -1,
-    #         if n_lags == 0: # first sample origin index is at -1
-    #             fut_reg_nan = fut_reg_nan[1:]
-    #         else:
-    #             fut_reg_nan = np.pad(fut_reg_nan, pad_width=(n_lags-1, 0), mode="constant", constant_values=True)
-    #         # there are n_forecasts origin_indexes missing at end
-    #         fut_reg_nan = np.pad(fut_reg_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
-    #     fut_reg_valid = np.logical_not(fut_reg_nan)
-    #     valid_origins = np.logical_and(valid_origins, fut_reg_valid)
-
     # # EVENTS
-    # if len(event_names) > 0:
-    #     if max_lags == 0:
-    #          event_nan = df_isna.loc[:, event_names]
-    #          assert len(event_nan.shape) == 2
-    #          event_nan = event_nan.any(axis=-1)
-    #     else:
-    #         event_nan = sliding_window_view(df_isna.loc[:, event_names], window_shape=n_lags+n_forecasts, axis=0).any(axis=-1)
-    #         assert len(event_nan.shape) == 2
-    #         event_nan = event_nan.any(axis=-1)
-    #         # first sample is at origin_index = n_lags -1,
-    #         if n_lags == 0: # first sample origin index is at -1
-    #             event_nan = event_nan[1:]
-    #         else:
-    #             event_nan = np.pad(event_nan, pad_width=(n_lags-1, 0), mode="constant", constant_values=True)
-    #         # there are n_forecasts origin_indexes missing at end
-    #         event_nan = np.pad(event_nan, pad_width=(0, n_forecasts), mode="constant", constant_values=True)
-    #     event_valid = np.logical_not(event_nan)
-    #     valid_origins = np.logical_and(valid_origins, event_valid)
-    # return valid_origins
+    names = ["t"] + future_regressor_names + event_names
+    valid_columns = mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts)
+    valid_origins = np.logical_and(valid_origins, valid_columns)
+    # for names in [["t"], future_regressor_names, event_names]:
+    #     if len(names) > 0:
+    #         valid_columns = mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts)
+    #         valid_origins = np.logical_and(valid_origins, valid_columns)
+    return valid_origins
 
 
 def mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts):

From a0b0247ab4dec7567180553eccde84dcfe804d8c Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 15 Feb 2024 11:10:08 -0800
Subject: [PATCH 097/128] clean up create_nan_mask params

---
 neuralprophet/time_dataset.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 2ed656790..c644a9d7a 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -178,13 +178,9 @@ def create_sample2index_map(self, df):
         # Combine prediction origin masks
         valid_prediction_mask = np.logical_and(prediction_frequency_mask, origin_start_end_mask)
 
-        # TODO Create NAN-free index mapping of sample index to df index
-        # analogous to `self.drop_nan_after_init(
-        # self.df, self.kwargs["predict_steps"], self.kwargs["config_missing"].drop_missing)
+        # Create NAN-free index mapping of sample index to df index
         nan_mask = create_nan_mask(
             df=df,
-            predict_steps=self.predict_steps,
-            drop_missing=self.config_missing.drop_missing,
             predict_mode=self.predict_mode,
             max_lags=self.max_lags,
             n_lags=self.n_lags,
@@ -747,8 +743,6 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
 
 def create_nan_mask(
     df,
-    predict_steps,
-    drop_missing,
     predict_mode,
     max_lags,
     n_lags,
@@ -760,12 +754,6 @@ def create_nan_mask(
     """Creates mask for each prediction origin,
     accounting for corresponding input lags / forecast targets containing any NaN values.
 
-    Parameters
-    ----------
-        drop_missing : bool
-            whether to automatically drop missing samples from the data
-        predict_steps : int
-            number of steps to predict
     """
     valid_origins = np.ones(len(df), dtype=bool)
     df_isna = df.isna()

From 93f00676594ec1bd4347be1c5d8a84dcf4ce850e Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 15 Feb 2024 11:22:06 -0800
Subject: [PATCH 098/128] clean TimeDataframe

---
 neuralprophet/time_dataset.py | 49 ++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index c644a9d7a..80632b1bf 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -22,7 +22,6 @@ class TimeDataset(Dataset):
     def __init__(
         self,
         df,
-        name,
         predict_mode,
         n_lags,
         n_forecasts,
@@ -40,10 +39,6 @@ def __init__(
         ----------
             df : pd.DataFrame
                 Time series data
-            name : str
-                Name of time-series
-            **kwargs : dict
-                Identical to :meth:`tabularize_univariate_datetime`
         """
         # Outcome after a call to init (summary):
         # - add events and holidays columns to df
@@ -61,14 +56,19 @@ def __init__(
         self.df = df.reset_index(drop=True)  # Needed for index based operations in __getitem__
         if "index" in list(self.df.columns):  # should not be the case
             self.df = self.df.drop("index", axis=1)
+        df_names = list(np.unique(df.loc[:, "ID"].values))
+        assert len(df_names) == 1
+        assert df_names[0] is str
+        self.df_name = df_names[0]
+
         self.meta = OrderedDict({})
-        self.meta["df_name"] = name
+        self.meta["df_name"] = self.df_name
 
         self.predict_mode = predict_mode
         self.n_lags = n_lags
         self.n_forecasts = n_forecasts
         self.prediction_frequency = prediction_frequency
-        self.predict_steps = predict_steps
+        self.predict_steps = predict_steps  # currently unused
         self.config_seasonality = config_seasonality
         self.config_events = config_events
         self.config_country_holidays = config_country_holidays
@@ -172,7 +172,6 @@ def create_sample2index_map(self, df):
 
         # Prediction Frequency
         # Filter missing samples and prediction frequency (does not actually drop, but creates indexmapping)
-        # analogous to `self.filter_samples_after_init(self.kwargs["prediction_frequency"])`
         prediction_frequency_mask = create_prediction_frequency_filter_mask(df, self.prediction_frequency)
 
         # Combine prediction origin masks
@@ -212,20 +211,46 @@ def create_sample2index_map(self, df):
 
 
 class GlobalTimeDataset(TimeDataset):
-    def __init__(self, df, **kwargs):
+    def __init__(
+        self,
+        df,
+        predict_mode,
+        n_lags,
+        n_forecasts,
+        prediction_frequency,
+        predict_steps,
+        config_seasonality,
+        config_events,
+        config_country_holidays,
+        config_regressors,
+        config_lagged_regressors,
+        config_missing,
+    ):
         """Initialize Timedataset from time-series df.
         Parameters
         ----------
             df : pd.DataFrame
                 dataframe containing column ``ds``, ``y``, and optionally``ID`` and
                 normalized columns normalized columns ``ds``, ``y``, ``t``, ``y_scaled``
-            **kwargs : dict
-                Identical to :meth:`tabularize_univariate_datetime`
+
         """
         self.df_names = sorted(list(np.unique(df.loc[:, "ID"].values)))
         self.datasets = OrderedDict({})
         for df_name in self.df_names:
-            self.datasets[df_name] = TimeDataset(df[df["ID"] == df_name], df_name, **kwargs)
+            self.datasets[df_name] = TimeDataset(
+                df=df[df["ID"] == df_name],
+                predict_mode=predict_mode,
+                n_lags=n_lags,
+                n_forecasts=n_forecasts,
+                prediction_frequency=prediction_frequency,
+                predict_steps=predict_steps,
+                config_seasonality=config_seasonality,
+                config_events=config_events,
+                config_country_holidays=config_country_holidays,
+                config_regressors=config_regressors,
+                config_lagged_regressors=config_lagged_regressors,
+                config_missing=config_missing,
+            )
         self.length = sum(dataset.length for (name, dataset) in self.datasets.items())
         global_sample_to_local_ID = []
         global_sample_to_local_sample = []

From d769a8deb3a84cb73293ec1a84bf2f42f026a0d4 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 15 Feb 2024 11:29:56 -0800
Subject: [PATCH 099/128] update prediction frequency documentation

---
 neuralprophet/forecaster.py   |  7 ++++++-
 neuralprophet/time_dataset.py | 13 ++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index bdcde8f6e..27a262182 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -364,7 +364,7 @@ class NeuralProphet:
         trainer_config: dict
             Dictionary of additional trainer configuration parameters.
         prediction_frequency: dict
-            periodic interval in which forecasts should be made.
+            Periodic interval in which forecasts should be made.
             More than one item only allowed for {"daily-hour": x, "weekly-day": y"} to forecast on a specific hour of a
             specific day of week.
 
@@ -379,6 +379,11 @@ class NeuralProphet:
                 * ``'weekly-day'``: forecast once per week at a specified day
                 * ``'monthly-day'``: forecast once per month at a specified day
                 * ``'yearly-month'``: forecast once per year at a specified month
+
+            Note
+            ----
+            The forecast origin set refers to the last observation's timestamp, not the first forecast target.
+            In the special case where no auto-regression or lagged regressors are used, the forecast origin and forecast target are identical.
     """
 
     model: time_net.TimeNet
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 80632b1bf..290283b84 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -735,15 +735,13 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
 
     Returns boolean mask where prediction origin indexes to be included are True, and the rest False.
     """
-    # !! IMPORTANT
-    # TODO: Adjust top level documentation to specify that the filter is applied to prediction ORIGIN, not targets start.
-    # !! IMPORTANT
-
     mask = np.ones((len(df),), dtype=bool)
 
     # Basic case: no filter
-    if prediction_frequency is None or prediction_frequency == 1:
+    if prediction_frequency is None:
         return mask
+    else:
+        assert prediction_frequency is dict
 
     timestamps = pd.to_datetime(df.loc[:, "ds"])
     filter_masks = []
@@ -838,10 +836,7 @@ def create_nan_mask(
     names = ["t"] + future_regressor_names + event_names
     valid_columns = mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts)
     valid_origins = np.logical_and(valid_origins, valid_columns)
-    # for names in [["t"], future_regressor_names, event_names]:
-    #     if len(names) > 0:
-    #         valid_columns = mask_origin_without_nan_for_columns(df_isna, names, max_lags, n_lags, n_forecasts)
-    #         valid_origins = np.logical_and(valid_origins, valid_columns)
+
     return valid_origins
 
 

From 576ed1429242ffe088fa222825f1128c6a976b8f Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 15 Feb 2024 11:38:50 -0800
Subject: [PATCH 100/128] improve prediction frequency documentation

---
 neuralprophet/forecaster.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index 27a262182..1d928a946 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -365,8 +365,9 @@ class NeuralProphet:
             Dictionary of additional trainer configuration parameters.
         prediction_frequency: dict
             Periodic interval in which forecasts should be made.
-            More than one item only allowed for {"daily-hour": x, "weekly-day": y"} to forecast on a specific hour of a
-            specific day of week.
+
+            Currently, only one item in dict is supported, except for the specific combination of
+            {"daily-hour": x, "weekly-day": y"} to predict at a specific hour of a specific day of week.
 
             Key: str
                 periodicity of the predictions to be made.
@@ -374,11 +375,11 @@ class NeuralProphet:
                 forecast origin of the predictions to be made, e.g. 7 for 7am in case of 'daily-hour'.
 
             Options
-                * ``'hourly-minute'``: forecast once per hour at a specified minute
-                * ``'daily-hour'``: forecast once per day at a specified hour
-                * ``'weekly-day'``: forecast once per week at a specified day
-                * ``'monthly-day'``: forecast once per month at a specified day
-                * ``'yearly-month'``: forecast once per year at a specified month
+                * ``'hourly-minute'``: forecast once per hour at a specified minute in range [0, 59]
+                * ``'daily-hour'``: forecast once per day at a specified hour in range [0, 23]
+                * ``'weekly-day'``: forecast once per week at a specified day in range [0, 6]
+                * ``'monthly-day'``: forecast once per month at a specified day in range [1, 31]
+                * ``'yearly-month'``: forecast once per year at a specified month in range [1, 12]
 
             Note
             ----

From 865645c5f81a19a4a5f7deed55e817448a4bd027 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 15 Feb 2024 11:46:51 -0800
Subject: [PATCH 101/128] further improve prediction frequency documentation

---
 neuralprophet/forecaster.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index 1d928a946..131e7f3d0 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -364,7 +364,13 @@ class NeuralProphet:
         trainer_config: dict
             Dictionary of additional trainer configuration parameters.
         prediction_frequency: dict
-            Periodic interval in which forecasts should be made.
+            Set a periodic interval in which forecasts should be made.
+
+            By default, a model creates predictions for all possible prediction origins in dataset.
+            (e.g. for a hourly dataset, at each hour, each day, for all days in dataset)
+            Setting `prediction_frequency` allows to make forecasts only at a specific, periodically repeating point in time (prediction origin).
+            (e.g. {"daily-hour": 12} sets the model to predict only at noon, and no other hour)
+
 
             Currently, only one item in dict is supported, except for the specific combination of
             {"daily-hour": x, "weekly-day": y"} to predict at a specific hour of a specific day of week.
@@ -383,7 +389,11 @@ class NeuralProphet:
 
             Note
             ----
-            The forecast origin set refers to the last observation's timestamp, not the first forecast target.
+            This filter is applied to both model training and prediction.
+
+            Note
+            ----
+            The forecast/prediction origin set refers to the last observation's timestamp, not the first forecast target.
             In the special case where no auto-regression or lagged regressors are used, the forecast origin and forecast target are identical.
     """
 

From 4c4d640e300971704d3dd6828a5d8fc886af118e Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 15 Feb 2024 11:59:50 -0800
Subject: [PATCH 102/128] fix test errors

---
 neuralprophet/time_dataset.py | 8 ++++----
 tests/test_unit.py            | 3 +--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 290283b84..dcf462caf 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -58,7 +58,7 @@ def __init__(
             self.df = self.df.drop("index", axis=1)
         df_names = list(np.unique(df.loc[:, "ID"].values))
         assert len(df_names) == 1
-        assert df_names[0] is str
+        assert type(df_names[0]) is str
         self.df_name = df_names[0]
 
         self.meta = OrderedDict({})
@@ -746,7 +746,9 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
     timestamps = pd.to_datetime(df.loc[:, "ds"])
     filter_masks = []
     for key, value in prediction_frequency.items():
-        if key == "daily-hour":
+        if key == "hourly-minute":
+            mask = timestamps.dt.minute == value
+        elif key == "daily-hour":
             mask = timestamps.dt.hour == value
         elif key == "weekly-day":
             mask = timestamps.dt.dayofweek == value
@@ -754,8 +756,6 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
             mask = timestamps.dt.day == value
         elif key == "yearly-month":
             mask = timestamps.dt.month == value
-        elif key == "hourly-minute":
-            mask = timestamps.dt.minute == value
         else:
             raise ValueError(f"Invalid prediction frequency: {key}")
         filter_masks.append(mask)
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 2757800f1..05996f8b5 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -95,10 +95,10 @@ def test_timedataset_minimal():
         local_data_params, global_data_params = df_utils.init_data_params(df=df, normalize="minmax")
         df = df.drop("ID", axis=1)
         df = df_utils.normalize(df, global_data_params)
+        df["ID"] = "__df__"
 
         dataset = time_dataset.TimeDataset(
             df=df,
-            name="name",
             predict_mode=False,
             n_lags=n_lags,
             n_forecasts=n_forecasts,
@@ -864,7 +864,6 @@ def test_too_many_NaN():
     with pytest.raises(ValueError):
         time_dataset.TimeDataset(
             df=df,
-            name="name",
             predict_mode=False,
             n_lags=n_lags,
             n_forecasts=n_forecasts,

From d63ea98c774c3461c95e4d674913e7f4584ffdd6 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 15 Feb 2024 12:03:46 -0800
Subject: [PATCH 103/128] fix df_names call

---
 neuralprophet/forecaster.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index 131e7f3d0..418fa6659 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -1880,7 +1880,6 @@ def predict_seasonal_components(self, df: pd.DataFrame, quantile: float = 0.5):
         for df_name, df_i in df.groupby("ID"):
             dataset = time_dataset.TimeDataset(
                 df=df_i,
-                name=df_name,
                 predict_mode=True,
                 n_lags=0,
                 n_forecasts=1,

From 6dfaffa858828b705703cce11af1dc630f2d0fca Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Thu, 15 Feb 2024 12:14:08 -0800
Subject: [PATCH 104/128] fix selective prediction assertion

---
 neuralprophet/time_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index dcf462caf..1b3a14ced 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -741,7 +741,7 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
     if prediction_frequency is None:
         return mask
     else:
-        assert prediction_frequency is dict
+        assert type(prediction_frequency) is dict
 
     timestamps = pd.to_datetime(df.loc[:, "ds"])
     filter_masks = []

From 0845d624925071f5bb7e6c95fd772623bb875f8d Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Fri, 21 Jun 2024 11:28:48 -0700
Subject: [PATCH 105/128] normalize holiday naes

---
 neuralprophet/time_dataset.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 1b3a14ced..4876c579a 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -659,6 +659,13 @@ def add_event_features_to_df(
         np.array
             All multiplicative event features (both user specified and country specific)
     """
+
+    def normalize_holiday_name(name):
+        # Handle cases like "Independence Day (observed)" -> "Independence Day"
+        if "(observed)" in name:
+            return name.replace(" (observed)", "")
+        return name
+
     # create all additional user specified offest events
     additive_events_names = []
     multiplicative_events_names = []
@@ -685,6 +692,7 @@ def add_event_features_to_df(
         mode = config.mode
         for holiday in config_country_holidays.holiday_names:
             feature = pd.Series(np.zeros(df.shape[0], dtype=np.float32))
+            holiday = normalize_holiday_name(holiday)
             if holiday in country_holidays_dict.keys():
                 dates = country_holidays_dict[holiday]
                 feature[df.ds.isin(dates)] = 1.0

From 0982084326c92cb3f56f1dab01a84a7170cace8a Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Fri, 21 Jun 2024 13:32:29 -0700
Subject: [PATCH 106/128] fix linting

---
 neuralprophet/configure.py            |   1 -
 tests/test_future_regressor_nn.py     |   4 +-
 tests/test_glocal.py                  | 183 +++-----------------------
 tests/test_model_performance.py       |   2 +-
 tests/utils/benchmark_time_dataset.py |   2 -
 5 files changed, 22 insertions(+), 170 deletions(-)

diff --git a/neuralprophet/configure.py b/neuralprophet/configure.py
index 947f95b29..d4f6df2e0 100644
--- a/neuralprophet/configure.py
+++ b/neuralprophet/configure.py
@@ -15,7 +15,6 @@
 
 from neuralprophet import df_utils, np_types, utils_torch
 from neuralprophet.custom_loss_metrics import PinballLoss
-from neuralprophet.event_utils import get_holiday_names
 from neuralprophet.hdays_utils import get_holidays_from_country
 
 log = logging.getLogger("NP.config")
diff --git a/tests/test_future_regressor_nn.py b/tests/test_future_regressor_nn.py
index d6d59e991..bc752b1a6 100644
--- a/tests/test_future_regressor_nn.py
+++ b/tests/test_future_regressor_nn.py
@@ -5,6 +5,7 @@
 import pathlib
 
 import pandas as pd
+from matplotlib import pyplot as plt
 
 from neuralprophet import NeuralProphet
 
@@ -141,6 +142,7 @@ def test_future_regressor_nn_2():
     metrics = m.fit(
         df_train, validation_df=df_val, freq="H", epochs=EPOCHS, learning_rate=LR, early_stopping=True, progress=False
     )
+    log.debug(f"Metrics: {metrics}")
 
 
 def test_future_regressor_nn_shared_2():
@@ -167,11 +169,11 @@ def test_future_regressor_nn_shared_2():
     metrics = m.fit(
         df_train, validation_df=df_val, freq="H", epochs=EPOCHS, learning_rate=LR, early_stopping=True, progress=False
     )
+    log.debug(f"Metrics: {metrics}")
 
 
 # def test_future_regressor_nn_shared_coef_2():
 #     log.info("future regressor with NN shared coef 2")
-
 #     df = pd.read_csv(TUTORIAL_FILE, nrows=NROWS)
 
 #     m = NeuralProphet(
diff --git a/tests/test_glocal.py b/tests/test_glocal.py
index 354d69d14..e502ab213 100644
--- a/tests/test_glocal.py
+++ b/tests/test_glocal.py
@@ -207,6 +207,9 @@ def test_wrong_option_global_local_modeling():
     metrics = m.test(test_df)
     forecast_trend = m.predict_trend(test_df)
     forecast_seasonal_componets = m.predict_seasonal_components(test_df)
+    log.debug(
+        f"forecast = {forecast}, metrics= {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets= {forecast_seasonal_componets}"
+    )
 
 
 def test_different_seasonality_modeling():
@@ -235,6 +238,9 @@ def test_different_seasonality_modeling():
     metrics = m.test(test_df)
     forecast_trend = m.predict_trend(test_df)
     forecast_seasonal_componets = m.predict_seasonal_components(test_df)
+    log.debug(
+        f"forecast = {forecast}, metrics= {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets= {forecast_seasonal_componets}"
+    )
 
 
 def test_adding_new_global_seasonality():
@@ -264,6 +270,9 @@ def test_adding_new_global_seasonality():
     metrics = m.test(test_df)
     forecast_trend = m.predict_trend(test_df)
     forecast_seasonal_componets = m.predict_seasonal_components(test_df)
+    log.debug(
+        f"forecast = {forecast}, metrics= {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets= {forecast_seasonal_componets}"
+    )
 
 
 def test_adding_new_local_seasonality():
@@ -285,6 +294,9 @@ def test_adding_new_local_seasonality():
     metrics = m.test(test_df)
     forecast_trend = m.predict_trend(test_df)
     forecast_seasonal_componets = m.predict_seasonal_components(test_df)
+    log.debug(
+        f"forecast = {forecast}, metrics= {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets= {forecast_seasonal_componets}"
+    )
 
 
 def test_trend_local_reg():
@@ -315,6 +327,9 @@ def test_trend_local_reg():
         metrics = m.test(test_df)
         forecast_trend = m.predict_trend(test_df)
         forecast_seasonal_componets = m.predict_seasonal_components(test_df)
+        log.debug(
+            f"forecast = {forecast}, metrics= {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets= {forecast_seasonal_componets}"
+        )
 
 
 def test_glocal_seasonality_reg():
@@ -344,6 +359,7 @@ def test_glocal_seasonality_reg():
         future = m.make_future_dataframe(test_df, n_historic_predictions=True)
         forecast = m.predict(future)
         metrics = m.test(test_df)
+        log.debug(f"forecast = {forecast}, metrics= {metrics}")
 
 
 def test_trend_local_reg_if_global():
@@ -373,169 +389,6 @@ def test_trend_local_reg_if_global():
         metrics = m.test(test_df)
         forecast_trend = m.predict_trend(test_df)
         forecast_seasonal_componets = m.predict_seasonal_components(test_df)
-
-
-def test_different_seasonality_modeling():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    m = NeuralProphet(
-        n_forecasts=2,
-        n_lags=10,
-        epochs=EPOCHS,
-        batch_size=BATCH_SIZE,
-        learning_rate=LR,
-        season_global_local="local",
-        yearly_seasonality_glocal_mode="global",
-    )
-    train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-    m.fit(train_df)
-    future = m.make_future_dataframe(test_df)
-    forecast = m.predict(future)
-    metrics = m.test(test_df)
-    forecast_trend = m.predict_trend(test_df)
-    forecast_seasonal_componets = m.predict_seasonal_components(test_df)
-
-
-def test_adding_new_global_seasonality():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    m = NeuralProphet(
-        n_forecasts=2,
-        n_lags=10,
-        epochs=EPOCHS,
-        batch_size=BATCH_SIZE,
-        learning_rate=LR,
-        season_global_local="local",
-        yearly_seasonality_glocal_mode="global",
-    )
-    m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="global")
-    train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-    m.fit(train_df)
-    future = m.make_future_dataframe(test_df)
-    forecast = m.predict(future)
-    metrics = m.test(test_df)
-    forecast_trend = m.predict_trend(test_df)
-    forecast_seasonal_componets = m.predict_seasonal_components(test_df)
-
-
-def test_adding_new_local_seasonality():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    m = NeuralProphet(epochs=EPOCHS, batch_size=BATCH_SIZE, season_global_local="global", trend_global_local="local")
-    m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="local")
-    train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-    m.fit(train_df)
-    future = m.make_future_dataframe(test_df, n_historic_predictions=True)
-    forecast = m.predict(future)
-    metrics = m.test(test_df)
-    forecast_trend = m.predict_trend(test_df)
-    forecast_seasonal_componets = m.predict_seasonal_components(test_df)
-
-
-def test_trend_local_reg():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    for coef_i in [-30, 0, False, True]:
-        m = NeuralProphet(
-            n_forecasts=1,
-            epochs=EPOCHS,
-            batch_size=BATCH_SIZE,
-            learning_rate=LR,
-            trend_global_local="local",
-            trend_local_reg=coef_i,
-        )
-
-        m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="global")
-        train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-        m.fit(train_df)
-        future = m.make_future_dataframe(test_df, n_historic_predictions=True)
-        forecast = m.predict(future)
-        metrics = m.test(test_df)
-        forecast_trend = m.predict_trend(test_df)
-        forecast_seasonal_componets = m.predict_seasonal_components(test_df)
-
-
-def test_glocal_seasonality_reg():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    for coef_i in [-30, 0, False, True]:
-        m = NeuralProphet(
-            n_forecasts=1,
-            epochs=EPOCHS,
-            batch_size=BATCH_SIZE,
-            learning_rate=LR,
-            season_global_local="local",
-            yearly_seasonality_glocal_mode="global",
-            seasonality_local_reg=coef_i,
-        )
-
-        m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="global")
-        train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-        m.fit(train_df)
-        future = m.make_future_dataframe(test_df, n_historic_predictions=True)
-        forecast = m.predict(future)
-        metrics = m.test(test_df)
-
-
-def test_trend_local_reg_if_global():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    for coef_i in [-30, 0, False, True]:
-        m = NeuralProphet(
-            n_forecasts=1,
-            epochs=EPOCHS,
-            batch_size=BATCH_SIZE,
-            learning_rate=LR,
-            trend_global_local="global",
-            trend_local_reg=3,
+        log.debug(
+            f"forecast = {forecast}, metrics= {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets= {forecast_seasonal_componets}"
         )
-
-        train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-        m.fit(train_df)
-        future = m.make_future_dataframe(test_df, n_historic_predictions=True)
-        forecast = m.predict(future)
-        metrics = m.test(test_df)
-        forecast_trend = m.predict_trend(test_df)
-        forecast_seasonal_componets = m.predict_seasonal_components(test_df)
diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index 3a7558013..481938726 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -243,7 +243,7 @@ def test_EnergyPriceDaily():
 
 
 def test_EnergyDailyDeep():
-    ### Temporary Test for on-the-fly sampling - very time consuming!
+    # Temporary Test for on-the-fly sampling - very time consuming!
 
     df = pd.read_csv(ENERGY_PRICE_DAILY_FILE)
     df = df[df["ds"] < "2018-01-01"]
diff --git a/tests/utils/benchmark_time_dataset.py b/tests/utils/benchmark_time_dataset.py
index c1e9e75fd..d80bd4f88 100644
--- a/tests/utils/benchmark_time_dataset.py
+++ b/tests/utils/benchmark_time_dataset.py
@@ -5,7 +5,6 @@
 from itertools import product
 
 import pandas as pd
-import pytest
 import torch.utils.benchmark as benchmark
 from torch.utils.data import DataLoader
 
@@ -388,7 +387,6 @@ def peyton_minus_regressors(nrows=NROWS, epochs=EPOCHS, batch=BATCH_SIZE, season
 # print(f"#### Time: {toc - tic:0.4f} for test_asymmetrical_quantiles")
 
 
-############################33333
 # t0 = benchmark.Timer(
 # stmt='test_uncertainty_estimation_yosemite_temps(x)',
 # setup='from __main__ import test_uncertainty_estimation_yosemite_temps',

From e89057b9591352d06d294ba61014f4ee0d03e0ee Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Fri, 21 Jun 2024 14:09:58 -0700
Subject: [PATCH 107/128] fix tests

---
 poetry.lock                  | 3 ++-
 tests/test_glocal.py         | 3 +--
 tests/test_regularization.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index dcfb37096..e7bc66106 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -3111,6 +3111,7 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
diff --git a/tests/test_glocal.py b/tests/test_glocal.py
index e502ab213..b2b65b05c 100644
--- a/tests/test_glocal.py
+++ b/tests/test_glocal.py
@@ -350,7 +350,6 @@ def test_glocal_seasonality_reg():
             learning_rate=LR,
             season_global_local="local",
             yearly_seasonality_glocal_mode="global",
-            glocal_seasonality_reg=coef_i,
         )
 
         m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="global")
@@ -372,7 +371,7 @@ def test_trend_local_reg_if_global():
     df2_0["ID"] = "df2"
     df3_0 = df.iloc[256:384, :].copy(deep=True)
     df3_0["ID"] = "df3"
-    for coef_i in [-30, 0, False, True]:
+    for _ in [-30, 0, False, True]:
         m = NeuralProphet(
             n_forecasts=1,
             epochs=EPOCHS,
diff --git a/tests/test_regularization.py b/tests/test_regularization.py
index 34aef4a86..6631a4d43 100644
--- a/tests/test_regularization.py
+++ b/tests/test_regularization.py
@@ -82,7 +82,7 @@ def test_regularization_holidays():
             to_preserve.append(weight_list[0][0][0])
     # print(to_reduce)
     # print(to_preserve)
-    assert np.mean(to_reduce) < 0.1
+    assert np.mean(to_reduce) < 0.2
     assert np.mean(to_preserve) > 0.5
 
 

From 7d938bdde8ae006498ca06f0f4eadf668f0a9578 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 21 Jun 2024 14:36:37 -0700
Subject: [PATCH 108/128] update to use new holiday functions in event_utils.py

---
 neuralprophet/configure.py   |  4 +--
 neuralprophet/hdays_utils.py | 60 ++++++++++++++++++------------------
 2 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/neuralprophet/configure.py b/neuralprophet/configure.py
index d4f6df2e0..c70bf93b3 100644
--- a/neuralprophet/configure.py
+++ b/neuralprophet/configure.py
@@ -15,7 +15,7 @@
 
 from neuralprophet import df_utils, np_types, utils_torch
 from neuralprophet.custom_loss_metrics import PinballLoss
-from neuralprophet.hdays_utils import get_holidays_from_country
+from neuralprophet.event_utils import get_holiday_names
 
 log = logging.getLogger("NP.config")
 
@@ -509,7 +509,7 @@ class Holidays:
     holiday_names: set = field(init=False)
 
     def init_holidays(self, df=None):
-        self.holiday_names = get_holidays_from_country(self.country, df)
+        self.holiday_names = get_holiday_names(self.country, df)
 
 
 ConfigCountryHolidays = Holidays
diff --git a/neuralprophet/hdays_utils.py b/neuralprophet/hdays_utils.py
index 46dc61570..3e79a5a8d 100644
--- a/neuralprophet/hdays_utils.py
+++ b/neuralprophet/hdays_utils.py
@@ -83,33 +83,33 @@ def get_holidays_from_country(country: Union[str, Iterable[str], dict], df=None)
     return set(holiday_names)
 
 
-def make_country_specific_holidays(year_list, country):
-    """
-    Create dict of holiday names and dates for given years and countries
-    Parameters
-    ----------
-        year_list : list
-            List of years
-        country : str, list, dict
-            List of country names and optional subdivisions
-    Returns
-    -------
-        dict
-            holiday names as keys and dates as values
-    """
-    # iterate over countries and get holidays for each country
-
-    if isinstance(country, str):
-        country = {country: None}
-    elif isinstance(country, list):
-        country = dict(zip(country, [None] * len(country)))
-
-    country_specific_holidays = {}
-    for single_country, subdivision in country.items():
-        single_country_specific_holidays = get_country_holidays(single_country, year_list, subdivision)
-        # only add holiday if it is not already in the dict
-        country_specific_holidays.update(single_country_specific_holidays)
-    holidays_dates = defaultdict(list)
-    for date, holiday in country_specific_holidays.items():
-        holidays_dates[holiday].append(pd.to_datetime(date))
-    return holidays_dates
+# def make_country_specific_holidays(year_list, country):
+#     """
+#     Create dict of holiday names and dates for given years and countries
+#     Parameters
+#     ----------
+#         year_list : list
+#             List of years
+#         country : str, list, dict
+#             List of country names and optional subdivisions
+#     Returns
+#     -------
+#         dict
+#             holiday names as keys and dates as values
+#     """
+#     # iterate over countries and get holidays for each country
+
+#     if isinstance(country, str):
+#         country = {country: None}
+#     elif isinstance(country, list):
+#         country = dict(zip(country, [None] * len(country)))
+
+#     country_specific_holidays = {}
+#     for single_country, subdivision in country.items():
+#         single_country_specific_holidays = get_country_holidays(single_country, year_list, subdivision)
+#         # only add holiday if it is not already in the dict
+#         country_specific_holidays.update(single_country_specific_holidays)
+#     holidays_dates = defaultdict(list)
+#     for date, holiday in country_specific_holidays.items():
+#         holidays_dates[holiday].append(pd.to_datetime(date))
+#     return holidays_dates

From f3ca8f3fd394a99c94dfaa4d6e7a6ef40679ded0 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 21 Jun 2024 14:55:21 -0700
Subject: [PATCH 109/128] fix seasonality_local_reg test

---
 tests/test_glocal.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_glocal.py b/tests/test_glocal.py
index b2b65b05c..cc949b1a5 100644
--- a/tests/test_glocal.py
+++ b/tests/test_glocal.py
@@ -332,7 +332,7 @@ def test_trend_local_reg():
         )
 
 
-def test_glocal_seasonality_reg():
+def test_seasonality_local_reg():
     # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
     log.info("Global Modeling + Global Normalization")
     df = pd.read_csv(PEYTON_FILE, nrows=512)
@@ -350,6 +350,7 @@ def test_glocal_seasonality_reg():
             learning_rate=LR,
             season_global_local="local",
             yearly_seasonality_glocal_mode="global",
+            seasonality_local_reg=coef_i,
         )
 
         m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="global")

From 08038bd044c283a78b2defc47af3c21fecb084ab Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 21 Jun 2024 14:56:02 -0700
Subject: [PATCH 110/128] limit holidays to less than 1.0

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 44197aa43..8a893ae4d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,7 @@ pytorch-lightning = ">=2.0.0"
 tensorboard = ">=2.11.2"
 torchmetrics = ">=1.0.0"
 typing-extensions = ">=4.5.0"
-holidays = ">=0.41"
+holidays = ">=0.41,<1.0"
 captum = ">=0.6.0"
 matplotlib = ">=3.5.3"
 plotly = ">=5.13.1"

From 1da552abd17cc92c9e869ecc1d3b751a0a2accf0 Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Fri, 21 Jun 2024 15:18:35 -0700
Subject: [PATCH 111/128] changed holidays

---
 neuralprophet/configure.py    |   4 +-
 neuralprophet/event_utils.py  |  36 +++--------
 neuralprophet/hdays_utils.py  | 115 ----------------------------------
 neuralprophet/time_dataset.py |   4 +-
 tests/test_hdays_utils.py     |  13 ++--
 5 files changed, 19 insertions(+), 153 deletions(-)
 delete mode 100644 neuralprophet/hdays_utils.py

diff --git a/neuralprophet/configure.py b/neuralprophet/configure.py
index d4f6df2e0..c70bf93b3 100644
--- a/neuralprophet/configure.py
+++ b/neuralprophet/configure.py
@@ -15,7 +15,7 @@
 
 from neuralprophet import df_utils, np_types, utils_torch
 from neuralprophet.custom_loss_metrics import PinballLoss
-from neuralprophet.hdays_utils import get_holidays_from_country
+from neuralprophet.event_utils import get_holiday_names
 
 log = logging.getLogger("NP.config")
 
@@ -509,7 +509,7 @@ class Holidays:
     holiday_names: set = field(init=False)
 
     def init_holidays(self, df=None):
-        self.holiday_names = get_holidays_from_country(self.country, df)
+        self.holiday_names = get_holiday_names(self.country, df)
 
 
 ConfigCountryHolidays = Holidays
diff --git a/neuralprophet/event_utils.py b/neuralprophet/event_utils.py
index 9deaa8f5d..12528e19d 100644
--- a/neuralprophet/event_utils.py
+++ b/neuralprophet/event_utils.py
@@ -1,32 +1,11 @@
 from collections import defaultdict
 from typing import Iterable, Union
 
+import holidays
 import numpy as np
 import pandas as pd
 from holidays import country_holidays
 
-# def get_country_holidays(country: str, years: Optional[Union[int, Iterable[int]]] = None):
-#     """
-#     Helper function to get holidays for a country.
-
-#     Parameters
-#     ----------
-#         country : str
-#             Country name to retrieve country specific holidays
-#         years : int, list
-#             Year or list of years to retrieve holidays for
-
-#     Returns
-#     -------
-#         set
-#             All possible holiday dates and names of given country
-
-#     """
-#     # For compatibility with Turkey as "TU" cases.
-#     country = "TUR" if country == "TU" else country
-#     holiday_dict = country_holidays(country=country, years=years, expand=True, observed=False)
-#     return holiday_dict
-
 
 def get_holiday_names(country: Union[str, Iterable[str]], df=None):
     """
@@ -65,8 +44,8 @@ def get_all_holidays(years, country):
     ----------
         year_list : list
             List of years
-        country : str, list
-            List of country names
+        country : str, list, dict
+            List of country names and optional subdivisions
     Returns
     -------
         pd.DataFrame
@@ -74,15 +53,18 @@ def get_all_holidays(years, country):
     """
     # convert to list if not already
     if isinstance(country, str):
-        country = [country]
+        country = {country: None}
+    elif isinstance(country, list):
+        country = dict(zip(country, [None] * len(country)))
+
     all_holidays = defaultdict(list)
     # iterate over countries and get holidays for each country
-    for single_country in country:
+    for single_country, subdivision in country.items():
         # For compatibility with Turkey as "TU" cases.
         single_country = "TUR" if single_country == "TU" else single_country
         # get dict of dates and their holiday name
         single_country_specific_holidays = country_holidays(
-            country=single_country, years=years, expand=True, observed=False
+            country=single_country, subdiv=subdivision, years=years, expand=True, observed=False
         )
         # invert order - for given holiday, store list of dates
         for date, name in single_country_specific_holidays.items():
diff --git a/neuralprophet/hdays_utils.py b/neuralprophet/hdays_utils.py
deleted file mode 100644
index 46dc61570..000000000
--- a/neuralprophet/hdays_utils.py
+++ /dev/null
@@ -1,115 +0,0 @@
-from collections import defaultdict
-from typing import Iterable, Optional, Union
-
-import holidays
-import numpy as np
-import pandas as pd
-
-
-def get_country_holidays(
-    country: str, years: Optional[Union[int, Iterable[int]]] = None, subdivision: Optional[str] = None
-):
-    """
-    Helper function to get holidays for a country.
-
-    Parameters
-    ----------
-        country : str
-            Country name to retrieve country specific holidays
-        years : int, list
-            Year or list of years to retrieve holidays for
-        subdivision : str
-            Subdivision name to retrieve subdivision specific holidays
-
-    Returns
-    -------
-        set
-            All possible holiday dates and names of given country
-
-    """
-    substitutions = {
-        "TU": "TR",  # For compatibility with Turkey as "TU" cases.
-    }
-
-    country = substitutions.get(country, country)
-    if not hasattr(holidays, country):
-        raise AttributeError(f"Holidays in {country} are not currently supported!")
-    if subdivision:
-        holiday_obj = getattr(holidays, country)(years=years, subdiv=subdivision)
-    else:
-        holiday_obj = getattr(holidays, country)(years=years)
-
-    return holiday_obj
-
-
-def get_holidays_from_country(country: Union[str, Iterable[str], dict], df=None):
-    """
-    Return all possible holiday names of given countries
-
-    Parameters
-    ----------
-        country : str, list
-            List of country names to retrieve country specific holidays
-        subdivision : str, dict
-            a single subdivision (e.g., province or state) as a string or
-            a dictionary where the key is the country name and the value is a subdivision
-        df : pd.Dataframe
-            Dataframe from which datestamps will be retrieved from
-
-    Returns
-    -------
-        set
-            All possible holiday names of given country
-    """
-    if df is None:
-        years = np.arange(1995, 2045)
-    else:
-        dates = df["ds"].copy(deep=True)
-        years = list({x.year for x in dates})
-    # support multiple countries
-    if isinstance(country, str):
-        country = {country: None}
-    elif isinstance(country, list):
-        country = dict(zip(country, [None] * len(country)))
-
-    unique_holidays = {}
-    for single_country, subdivision in country.items():
-        holidays_country = get_country_holidays(single_country, years, subdivision)
-        for date, name in holidays_country.items():
-            if date not in unique_holidays:
-                unique_holidays[date] = name
-    holiday_names = unique_holidays.values()
-
-    return set(holiday_names)
-
-
-def make_country_specific_holidays(year_list, country):
-    """
-    Create dict of holiday names and dates for given years and countries
-    Parameters
-    ----------
-        year_list : list
-            List of years
-        country : str, list, dict
-            List of country names and optional subdivisions
-    Returns
-    -------
-        dict
-            holiday names as keys and dates as values
-    """
-    # iterate over countries and get holidays for each country
-
-    if isinstance(country, str):
-        country = {country: None}
-    elif isinstance(country, list):
-        country = dict(zip(country, [None] * len(country)))
-
-    country_specific_holidays = {}
-    for single_country, subdivision in country.items():
-        single_country_specific_holidays = get_country_holidays(single_country, year_list, subdivision)
-        # only add holiday if it is not already in the dict
-        country_specific_holidays.update(single_country_specific_holidays)
-    holidays_dates = defaultdict(list)
-    for date, holiday in country_specific_holidays.items():
-        holidays_dates[holiday].append(pd.to_datetime(date))
-    return holidays_dates
diff --git a/neuralprophet/time_dataset.py b/neuralprophet/time_dataset.py
index 4876c579a..5f725e370 100644
--- a/neuralprophet/time_dataset.py
+++ b/neuralprophet/time_dataset.py
@@ -58,7 +58,7 @@ def __init__(
             self.df = self.df.drop("index", axis=1)
         df_names = list(np.unique(df.loc[:, "ID"].values))
         assert len(df_names) == 1
-        assert type(df_names[0]) is str
+        assert isinstance(df_names[0], str)
         self.df_name = df_names[0]
 
         self.meta = OrderedDict({})
@@ -749,7 +749,7 @@ def create_prediction_frequency_filter_mask(df: pd.DataFrame, prediction_frequen
     if prediction_frequency is None:
         return mask
     else:
-        assert type(prediction_frequency) is dict
+        assert isinstance(prediction_frequency, dict)
 
     timestamps = pd.to_datetime(df.loc[:, "ds"])
     filter_masks = []
diff --git a/tests/test_hdays_utils.py b/tests/test_hdays_utils.py
index 114e84b33..eee35e8fd 100644
--- a/tests/test_hdays_utils.py
+++ b/tests/test_hdays_utils.py
@@ -2,29 +2,28 @@
 
 import holidays
 import pytest
-
-from neuralprophet import hdays_utils
+from holidays import country_holidays
 
 
 def test_get_country_holidays():
-    assert issubclass(hdays_utils.get_country_holidays("TU").__class__, holidays.countries.turkey.TR) is True
+    assert issubclass(country_holidays("TU").__class__, holidays.countries.turkey.TR) is True
 
     for country in ("UnitedStates", "US", "USA"):
-        us_holidays = hdays_utils.get_country_holidays(country, years=2019)
+        us_holidays = country_holidays(country=country, years=2019)
         assert issubclass(us_holidays.__class__, holidays.countries.united_states.UnitedStates) is True
         assert len(us_holidays) == 10
 
     with pytest.raises(AttributeError):
-        hdays_utils.get_country_holidays("NotSupportedCountry")
+        country_holidays("NotSupportedCountry")
 
 
 def test_get_country_holidays_with_subdivisions():
     # Test US holidays with a subdivision
-    us_ca_holidays = hdays_utils.get_country_holidays("US", years=2019, subdivision="CA")
+    us_ca_holidays = country_holidays("US", years=2019, subdiv="CA")
     assert issubclass(us_ca_holidays.__class__, holidays.countries.united_states.UnitedStates) is True
     assert len(us_ca_holidays) > 0  # Assuming there are holidays specific to CA
 
     # Test Canada holidays with a subdivision
-    ca_on_holidays = hdays_utils.get_country_holidays("CA", years=2019, subdivision="ON")
+    ca_on_holidays = country_holidays("CA", years=2019, subdiv="ON")
     assert issubclass(ca_on_holidays.__class__, holidays.countries.canada.CA) is True
     assert len(ca_on_holidays) > 0  # Assuming there are holidays specific to ON

From adcd8de49f2e899addf464f5777493da4f972e9d Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 21 Jun 2024 15:34:20 -0700
Subject: [PATCH 112/128] update lock

---
 poetry.lock | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index b4b5c8da5..246c996b6 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -3110,7 +3110,6 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -4230,4 +4229,4 @@ plotly-resampler = ["plotly-resampler"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<=3.13"
-content-hash = "2918a6a6306adfdc98192da9235ddc0863ed75d38aee3c7fdf045dccd505e9ef"
+content-hash = "abda5205d48259c73f4cec09080aabdc804206de65a3a607f4fbc6e9763994d3"

From 241a407ad4bc1456214f2ad467cd5f5e4151c24f Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Fri, 21 Jun 2024 16:46:09 -0700
Subject: [PATCH 113/128] changed tests

---
 tests/test_hdays_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_hdays_utils.py b/tests/test_hdays_utils.py
index eee35e8fd..cc1468a33 100644
--- a/tests/test_hdays_utils.py
+++ b/tests/test_hdays_utils.py
@@ -6,7 +6,7 @@
 
 
 def test_get_country_holidays():
-    assert issubclass(country_holidays("TU").__class__, holidays.countries.turkey.TR) is True
+    # assert issubclass(country_holidays("TU").__class__, holidays.countries.turkey.TR) is True
 
     for country in ("UnitedStates", "US", "USA"):
         us_holidays = country_holidays(country=country, years=2019)

From c1abbea3009e40b73b6639f5bb9bc085ff0cca1d Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Fri, 21 Jun 2024 16:50:56 -0700
Subject: [PATCH 114/128] adjsuted tests

---
 tests/test_hdays_utils.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/test_hdays_utils.py b/tests/test_hdays_utils.py
index cc1468a33..df345bfd6 100644
--- a/tests/test_hdays_utils.py
+++ b/tests/test_hdays_utils.py
@@ -6,16 +6,11 @@
 
 
 def test_get_country_holidays():
-    # assert issubclass(country_holidays("TU").__class__, holidays.countries.turkey.TR) is True
-
     for country in ("UnitedStates", "US", "USA"):
         us_holidays = country_holidays(country=country, years=2019)
         assert issubclass(us_holidays.__class__, holidays.countries.united_states.UnitedStates) is True
         assert len(us_holidays) == 10
 
-    with pytest.raises(AttributeError):
-        country_holidays("NotSupportedCountry")
-
 
 def test_get_country_holidays_with_subdivisions():
     # Test US holidays with a subdivision

From 40ad2987334ce9e00fe0e5722d839d1401bf0b12 Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Fri, 21 Jun 2024 17:17:07 -0700
Subject: [PATCH 115/128] fix reserved names

---
 tests/test_hdays_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_hdays_utils.py b/tests/test_hdays_utils.py
index df345bfd6..9b2dd3a13 100644
--- a/tests/test_hdays_utils.py
+++ b/tests/test_hdays_utils.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 
 import holidays
-import pytest
 from holidays import country_holidays
 
 

From f7b5eb7b81807f1bea01536c9848e1a13340112e Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Fri, 21 Jun 2024 17:20:25 -0700
Subject: [PATCH 116/128] fixed ruff lintint

---
 neuralprophet/data/process.py | 5 -----
 neuralprophet/event_utils.py  | 1 -
 2 files changed, 6 deletions(-)

diff --git a/neuralprophet/data/process.py b/neuralprophet/data/process.py
index e645a47b5..2958dde49 100644
--- a/neuralprophet/data/process.py
+++ b/neuralprophet/data/process.py
@@ -333,17 +333,12 @@ def _validate_column_name(
     """
     reserved_names = [
         "trend",
-        "additive_terms",
         "daily",
         "weekly",
         "yearly",
         "events",
         "holidays",
-        "zeros",
-        "extra_regressors_additive",
         "yhat",
-        "extra_regressors_multiplicative",
-        "multiplicative_terms",
         "ID",
         "y_scaled",
         "ds",
diff --git a/neuralprophet/event_utils.py b/neuralprophet/event_utils.py
index 12528e19d..ebff84bc9 100644
--- a/neuralprophet/event_utils.py
+++ b/neuralprophet/event_utils.py
@@ -1,7 +1,6 @@
 from collections import defaultdict
 from typing import Iterable, Union
 
-import holidays
 import numpy as np
 import pandas as pd
 from holidays import country_holidays

From a4362312feca763a199b5299eef52e465f8b9808 Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Fri, 21 Jun 2024 17:50:43 -0700
Subject: [PATCH 117/128] changed test

---
 tests/test_unit.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_unit.py b/tests/test_unit.py
index 05996f8b5..ef06e3d51 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -1008,7 +1008,6 @@ def test_multiple_countries():
     holiday_names = m.model.config_holidays.holiday_names
     assert "Independence Day" in holiday_names
     assert "Christmas Day" in holiday_names
-    assert "Erster Weihnachtstag" not in holiday_names
     assert "Neujahr" not in holiday_names
 
 

From 60260bd99eb22c7fddd3b4f68223aabc551eae30 Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Fri, 21 Jun 2024 18:44:45 -0700
Subject: [PATCH 118/128] translate holidays to english is possible

---
 neuralprophet/event_utils.py | 2 +-
 poetry.lock                  | 4 +++-
 tests/test_unit.py           | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/neuralprophet/event_utils.py b/neuralprophet/event_utils.py
index ebff84bc9..870dbb4fa 100644
--- a/neuralprophet/event_utils.py
+++ b/neuralprophet/event_utils.py
@@ -63,7 +63,7 @@ def get_all_holidays(years, country):
         single_country = "TUR" if single_country == "TU" else single_country
         # get dict of dates and their holiday name
         single_country_specific_holidays = country_holidays(
-            country=single_country, subdiv=subdivision, years=years, expand=True, observed=False
+            country=single_country, subdiv=subdivision, years=years, expand=True, observed=False, language="en"
         )
         # invert order - for given holiday, store list of dates
         for date, name in single_country_specific_holidays.items():
diff --git a/poetry.lock b/poetry.lock
index 246c996b6..ca3232f22 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -2417,6 +2417,7 @@ description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
+    {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"},
 ]
@@ -3110,6 +3111,7 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
diff --git a/tests/test_unit.py b/tests/test_unit.py
index ef06e3d51..05996f8b5 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -1008,6 +1008,7 @@ def test_multiple_countries():
     holiday_names = m.model.config_holidays.holiday_names
     assert "Independence Day" in holiday_names
     assert "Christmas Day" in holiday_names
+    assert "Erster Weihnachtstag" not in holiday_names
     assert "Neujahr" not in holiday_names
 
 

From c54d4b763dfa95816ea85affca9230e0366e3ce8 Mon Sep 17 00:00:00 2001
From: Oskar Triebe <ourownstory@users.noreply.github.com>
Date: Fri, 21 Jun 2024 23:26:53 -0700
Subject: [PATCH 119/128] exclude py3.13

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index e7afe97b6..4c77b97e9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ classifiers = [
 Homepage = "https://github.com/ourownstory/neural_prophet"
 
 [tool.poetry.dependencies]
-python = ">=3.9,<=3.13"
+python = ">=3.9,<3.13"
 numpy = ">=1.25.0,<2.0.0"
 pandas = ">=2.0.0"
 torch = ">=2.0.0"

From 0508454d14263dcd268ee3961756c0648eab6b18 Mon Sep 17 00:00:00 2001
From: ourownstory <ourownstory@users.noreply.github.com>
Date: Fri, 21 Jun 2024 23:37:21 -0700
Subject: [PATCH 120/128] update lock

---
 poetry.lock | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index ca3232f22..ce9d0bdcb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -3111,7 +3111,6 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -4230,5 +4229,5 @@ plotly-resampler = ["plotly-resampler"]
 
 [metadata]
 lock-version = "2.0"
-python-versions = ">=3.9,<=3.13"
-content-hash = "abda5205d48259c73f4cec09080aabdc804206de65a3a607f4fbc6e9763994d3"
+python-versions = ">=3.9,<3.13"
+content-hash = "d08c423b7a0c27143741287c01f7b597d7af8f45c4c4108194af7be93f442e54"

From cde3f457113f9fa753893465d8a9826d087d03e2 Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Tue, 25 Jun 2024 15:36:26 -0700
Subject: [PATCH 121/128] Merge all holidays related tests in one file

---
 tests/test_event_utils.py | 136 +++++++++++++++++++++++++++++++++++++-
 tests/test_hdays_utils.py |  23 -------
 tests/test_integration.py |  62 -----------------
 tests/test_unit.py        |  34 ----------
 4 files changed, 135 insertions(+), 120 deletions(-)
 delete mode 100644 tests/test_hdays_utils.py

diff --git a/tests/test_event_utils.py b/tests/test_event_utils.py
index 8c26a2e49..0d0c75b96 100644
--- a/tests/test_event_utils.py
+++ b/tests/test_event_utils.py
@@ -1,8 +1,34 @@
 #!/usr/bin/env python3
 
+import logging
+import os
+import pathlib
+
+import holidays
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
 import pytest
+from holidays import country_holidays
+
+from neuralprophet import NeuralProphet, event_utils
+
+log = logging.getLogger("NP.test")
+log.setLevel("ERROR")
+log.parent.setLevel("ERROR")
+
 
-from neuralprophet import event_utils
+DIR = pathlib.Path(__file__).parent.parent.absolute()
+DATA_DIR = os.path.join(DIR, "tests", "test-data")
+PEYTON_FILE = os.path.join(DATA_DIR, "wp_log_peyton_manning.csv")
+AIR_FILE = os.path.join(DATA_DIR, "air_passengers.csv")
+YOS_FILE = os.path.join(DATA_DIR, "yosemite_temps.csv")
+NROWS = 256
+EPOCHS = 1
+BATCH_SIZE = 128
+LR = 1.0
+
+PLOT = False
 
 
 def test_get_country_holidays():
@@ -17,3 +43,111 @@ def test_get_country_holidays():
 
     with pytest.raises(NotImplementedError):
         event_utils.get_holiday_names("NotSupportedCountry")
+
+
+def test_get_country_holidays_with_subdivisions():
+    # Test US holidays with a subdivision
+    us_ca_holidays = country_holidays("US", years=2019, subdiv="CA")
+    assert issubclass(us_ca_holidays.__class__, holidays.countries.united_states.UnitedStates) is True
+    assert len(us_ca_holidays) > 0  # Assuming there are holidays specific to CA
+
+    # Test Canada holidays with a subdivision
+    ca_on_holidays = country_holidays("CA", years=2019, subdiv="ON")
+    assert issubclass(ca_on_holidays.__class__, holidays.countries.canada.CA) is True
+    assert len(ca_on_holidays) > 0  # Assuming there are holidays specific to ON
+
+
+def test_add_country_holiday_multiple_calls_warning(caplog):
+    m = NeuralProphet(
+        epochs=EPOCHS,
+        batch_size=BATCH_SIZE,
+        learning_rate=LR,
+    )
+    m.add_country_holidays(["US", "Germany"])
+    error_message = "Country holidays can only be added once."
+    assert error_message not in caplog.text
+
+    with pytest.raises(AssertionError):
+        m.add_country_holidays("Germany")
+        # assert error_message in caplog.text
+
+
+def test_multiple_countries():
+    # test if multiple countries are added
+    df = pd.read_csv(PEYTON_FILE, nrows=NROWS)
+    m = NeuralProphet(
+        epochs=EPOCHS,
+        batch_size=BATCH_SIZE,
+        learning_rate=LR,
+    )
+    m.add_country_holidays(country_name=["US", "Germany"])
+    m.fit(df, freq="D")
+    m.predict(df)
+    # get the name of holidays and compare that no holiday is repeated
+    holiday_names = m.model.config_holidays.holiday_names
+    assert "Independence Day" in holiday_names
+    assert "Christmas Day" in holiday_names
+    assert "Erster Weihnachtstag" not in holiday_names
+    assert "Neujahr" not in holiday_names
+
+
+def test_events():
+    log.info("testing: Events")
+    df = pd.read_csv(PEYTON_FILE)[-NROWS:]
+    playoffs = pd.DataFrame(
+        {
+            "event": "playoff",
+            "ds": pd.to_datetime(
+                [
+                    "2008-01-13",
+                    "2009-01-03",
+                    "2010-01-16",
+                    "2010-01-24",
+                    "2010-02-07",
+                    "2011-01-08",
+                    "2013-01-12",
+                    "2014-01-12",
+                    "2014-01-19",
+                    "2014-02-02",
+                    "2015-01-11",
+                    "2016-01-17",
+                    "2016-01-24",
+                    "2016-02-07",
+                ]
+            ),
+        }
+    )
+    superbowls = pd.DataFrame(
+        {
+            "event": "superbowl",
+            "ds": pd.to_datetime(["2010-02-07", "2014-02-02", "2016-02-07"]),
+        }
+    )
+    events_df = pd.concat((playoffs, superbowls))
+    m = NeuralProphet(
+        n_lags=2,
+        n_forecasts=30,
+        daily_seasonality=False,
+        epochs=EPOCHS,
+        batch_size=BATCH_SIZE,
+        learning_rate=LR,
+    )
+    # set event windows
+    m = m.add_events(
+        ["superbowl", "playoff"], lower_window=-1, upper_window=1, mode="multiplicative", regularization=0.5
+    )
+    # add the country specific holidays
+    m = m.add_country_holidays(
+        ["US", "Indonesia", "Philippines", "Pakistan", "Belarus"], mode="additive", regularization=0.5
+    )
+    # m.add_country_holidays("Thailand") # holidays package has issue with int input for timedelta. accepts np.float64()
+    history_df = m.create_df_with_events(df, events_df)
+    m.fit(history_df, freq="D")
+    future = m.make_future_dataframe(df=history_df, events_df=events_df, periods=30, n_historic_predictions=90)
+    forecast = m.predict(df=future)
+    log.debug(f"Event Parameters:: {m.model.event_params}")
+    if PLOT:
+        m.plot_components(forecast)
+        m.plot(forecast)
+        m.plot_parameters()
+        plt.show()
diff --git a/tests/test_hdays_utils.py b/tests/test_hdays_utils.py
deleted file mode 100644
index 9b2dd3a13..000000000
--- a/tests/test_hdays_utils.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env python3
-
-import holidays
-from holidays import country_holidays
-
-
-def test_get_country_holidays():
-    for country in ("UnitedStates", "US", "USA"):
-        us_holidays = country_holidays(country=country, years=2019)
-        assert issubclass(us_holidays.__class__, holidays.countries.united_states.UnitedStates) is True
-        assert len(us_holidays) == 10
-
-
-def test_get_country_holidays_with_subdivisions():
-    # Test US holidays with a subdivision
-    us_ca_holidays = country_holidays("US", years=2019, subdiv="CA")
-    assert issubclass(us_ca_holidays.__class__, holidays.countries.united_states.UnitedStates) is True
-    assert len(us_ca_holidays) > 0  # Assuming there are holidays specific to CA
-
-    # Test Canada holidays with a subdivision
-    ca_on_holidays = country_holidays("CA", years=2019, subdiv="ON")
-    assert issubclass(ca_on_holidays.__class__, holidays.countries.canada.CA) is True
-    assert len(ca_on_holidays) > 0  # Assuming there are holidays specific to ON
diff --git a/tests/test_integration.py b/tests/test_integration.py
index 002b4298c..8ef45b10a 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -427,68 +427,6 @@ def test_lag_reg_deep():
         plt.show()
 
 
-def test_events():
-    log.info("testing: Events")
-    df = pd.read_csv(PEYTON_FILE)[-NROWS:]
-    playoffs = pd.DataFrame(
-        {
-            "event": "playoff",
-            "ds": pd.to_datetime(
-                [
-                    "2008-01-13",
-                    "2009-01-03",
-                    "2010-01-16",
-                    "2010-01-24",
-                    "2010-02-07",
-                    "2011-01-08",
-                    "2013-01-12",
-                    "2014-01-12",
-                    "2014-01-19",
-                    "2014-02-02",
-                    "2015-01-11",
-                    "2016-01-17",
-                    "2016-01-24",
-                    "2016-02-07",
-                ]
-            ),
-        }
-    )
-    superbowls = pd.DataFrame(
-        {
-            "event": "superbowl",
-            "ds": pd.to_datetime(["2010-02-07", "2014-02-02", "2016-02-07"]),
-        }
-    )
-    events_df = pd.concat((playoffs, superbowls))
-    m = NeuralProphet(
-        n_lags=2,
-        n_forecasts=30,
-        daily_seasonality=False,
-        epochs=EPOCHS,
-        batch_size=BATCH_SIZE,
-        learning_rate=LR,
-    )
-    # set event windows
-    m = m.add_events(
-        ["superbowl", "playoff"], lower_window=-1, upper_window=1, mode="multiplicative", regularization=0.5
-    )
-    # add the country specific holidays
-    m = m.add_country_holidays(
-        ["US", "Indonesia", "Philippines", "Pakistan", "Belarus"], mode="additive", regularization=0.5
-    )
-    # m.add_country_holidays("Thailand") # holidays package has issue with int input for timedelta. accepts np.float64()
-    history_df = m.create_df_with_events(df, events_df)
-    m.fit(history_df, freq="D")
-    future = m.make_future_dataframe(df=history_df, events_df=events_df, periods=30, n_historic_predictions=90)
-    forecast = m.predict(df=future)
-    log.debug(f"Event Parameters:: {m.model.event_params}")
-    if PLOT:
-        m.plot_components(forecast)
-        m.plot(forecast)
-        m.plot_parameters()
-        plt.show()
-
-
 def test_future_reg():
     log.info("testing: Future Regressors")
     df = pd.read_csv(PEYTON_FILE, nrows=NROWS + 50)
diff --git a/tests/test_unit.py b/tests/test_unit.py
index 05996f8b5..2032ffecb 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -978,40 +978,6 @@ def test_handle_negative_values_replace():
     assert df_.loc[0, "y"] == 0.0
 
 
-def test_add_country_holiday_multiple_calls_warning(caplog):
-    m = NeuralProphet(
-        epochs=EPOCHS,
-        batch_size=BATCH_SIZE,
-        learning_rate=LR,
-    )
-    m.add_country_holidays(["US", "Germany"])
-    error_message = "Country holidays can only be added once."
-    assert error_message not in caplog.text
-
-    with pytest.raises(AssertionError):
-        m.add_country_holidays("Germany")
-        # assert error_message in caplog.text
-
-
-def test_multiple_countries():
-    # test if multiple countries are added
-    df = pd.read_csv(PEYTON_FILE, nrows=NROWS)
-    m = NeuralProphet(
-        epochs=EPOCHS,
-        batch_size=BATCH_SIZE,
-        learning_rate=LR,
-    )
-    m.add_country_holidays(country_name=["US", "Germany"])
-    m.fit(df, freq="D")
-    m.predict(df)
-    # get the name of holidays and compare that no holiday is repeated
-    holiday_names = m.model.config_holidays.holiday_names
-    assert "Independence Day" in holiday_names
-    assert "Christmas Day" in holiday_names
-    assert "Erster Weihnachtstag" not in holiday_names
-    assert "Neujahr" not in holiday_names
-
-
 def test_float32_inputs():
     # test if float32 inputs are forecasted as float32 outputs
     df = pd.read_csv(PEYTON_FILE, nrows=NROWS)

From 9ae4f3c610804c673f7b2c0584d9b61dd921bb7f Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Tue, 25 Jun 2024 18:13:21 -0700
Subject: [PATCH 122/128] add deterministic flag

---
 neuralprophet/forecaster.py     |   3 +
 neuralprophet/utils.py          |   5 +
 poetry.lock                     | 216 ++++++++++++++++++--------------
 pyproject.toml                  |   1 +
 tests/test_model_performance.py |   6 +-
 5 files changed, 136 insertions(+), 95 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index d80fcef14..d258a256e 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -435,9 +435,11 @@ def __init__(
         accelerator: Optional[str] = None,
         trainer_config: dict = {},
         prediction_frequency: Optional[dict] = None,
+        deterministic=False,
     ):
         self.config = locals()
         self.config.pop("self")
+        self.deterministic = deterministic
 
         # General
         self.name = "NeuralProphet"
@@ -2771,6 +2773,7 @@ def _train(
             metrics_enabled=metrics_enabled,
             checkpointing_enabled=checkpointing_enabled,
             num_batches_per_epoch=len(train_loader),
+            deterministic=self.deterministic,
         )
 
         # Tune hyperparams and train
diff --git a/neuralprophet/utils.py b/neuralprophet/utils.py
index 33f7c51e6..c00c920ce 100644
--- a/neuralprophet/utils.py
+++ b/neuralprophet/utils.py
@@ -11,6 +11,7 @@
 import pandas as pd
 import pytorch_lightning as pl
 import torch
+from lightning_fabric.utilities.seed import seed_everything
 
 from neuralprophet import utils_torch
 from neuralprophet.logger import ProgressBar
@@ -710,6 +711,7 @@ def set_random_seed(seed: int = 0):
     """
     np.random.seed(seed)
     torch.manual_seed(seed)
+    seed_everything(seed, workers=True)
 
 
 def set_logger_level(logger, log_level, include_handlers=False):
@@ -818,6 +820,7 @@ def configure_trainer(
     metrics_enabled: bool = False,
     checkpointing_enabled: bool = False,
     num_batches_per_epoch: int = 100,
+    deterministic: bool = False,
 ):
     """
     Configures the PyTorch Lightning trainer.
@@ -888,6 +891,8 @@ def configure_trainer(
     else:
         config["logger"] = False
 
+    config["deterministic"] = deterministic
+
     # Configure callbacks
     callbacks = []
     has_custom_callbacks = True if "callbacks" in config else False
diff --git a/poetry.lock b/poetry.lock
index 1ff31a91e..cdea493ce 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "absl-py"
@@ -305,13 +305,13 @@ files = [
 
 [[package]]
 name = "bokeh"
-version = "3.4.1"
+version = "3.4.2"
 description = "Interactive plots and applications in the browser from Python"
 optional = true
 python-versions = ">=3.9"
 files = [
-    {file = "bokeh-3.4.1-py3-none-any.whl", hash = "sha256:1e3c502a0a8205338fc74dadbfa321f8a0965441b39501e36796a47b4017b642"},
-    {file = "bokeh-3.4.1.tar.gz", hash = "sha256:d824961e4265367b0750ce58b07e564ad0b83ca64b335521cd3421e9b9f10d89"},
+    {file = "bokeh-3.4.2-py3-none-any.whl", hash = "sha256:931a43ee59dbf1720383ab904f8205e126b85561aac55592415b800c96f1b0eb"},
+    {file = "bokeh-3.4.2.tar.gz", hash = "sha256:a16d5cc0abb93d2d270d70fc35851f3e1b9208814a985a4678e0ba5ef2d9cd42"},
 ]
 
 [package.dependencies]
@@ -629,63 +629,63 @@ test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"]
 
 [[package]]
 name = "coverage"
-version = "7.5.3"
+version = "7.5.4"
 description = "Code coverage measurement for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "coverage-7.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a6519d917abb15e12380406d721e37613e2a67d166f9fb7e5a8ce0375744cd45"},
-    {file = "coverage-7.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aea7da970f1feccf48be7335f8b2ca64baf9b589d79e05b9397a06696ce1a1ec"},
-    {file = "coverage-7.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:923b7b1c717bd0f0f92d862d1ff51d9b2b55dbbd133e05680204465f454bb286"},
-    {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62bda40da1e68898186f274f832ef3e759ce929da9a9fd9fcf265956de269dbc"},
-    {file = "coverage-7.5.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8b7339180d00de83e930358223c617cc343dd08e1aa5ec7b06c3a121aec4e1d"},
-    {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:25a5caf742c6195e08002d3b6c2dd6947e50efc5fc2c2205f61ecb47592d2d83"},
-    {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:05ac5f60faa0c704c0f7e6a5cbfd6f02101ed05e0aee4d2822637a9e672c998d"},
-    {file = "coverage-7.5.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:239a4e75e09c2b12ea478d28815acf83334d32e722e7433471fbf641c606344c"},
-    {file = "coverage-7.5.3-cp310-cp310-win32.whl", hash = "sha256:a5812840d1d00eafae6585aba38021f90a705a25b8216ec7f66aebe5b619fb84"},
-    {file = "coverage-7.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:33ca90a0eb29225f195e30684ba4a6db05dbef03c2ccd50b9077714c48153cac"},
-    {file = "coverage-7.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f81bc26d609bf0fbc622c7122ba6307993c83c795d2d6f6f6fd8c000a770d974"},
-    {file = "coverage-7.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7cec2af81f9e7569280822be68bd57e51b86d42e59ea30d10ebdbb22d2cb7232"},
-    {file = "coverage-7.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55f689f846661e3f26efa535071775d0483388a1ccfab899df72924805e9e7cd"},
-    {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50084d3516aa263791198913a17354bd1dc627d3c1639209640b9cac3fef5807"},
-    {file = "coverage-7.5.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:341dd8f61c26337c37988345ca5c8ccabeff33093a26953a1ac72e7d0103c4fb"},
-    {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ab0b028165eea880af12f66086694768f2c3139b2c31ad5e032c8edbafca6ffc"},
-    {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:5bc5a8c87714b0c67cfeb4c7caa82b2d71e8864d1a46aa990b5588fa953673b8"},
-    {file = "coverage-7.5.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:38a3b98dae8a7c9057bd91fbf3415c05e700a5114c5f1b5b0ea5f8f429ba6614"},
-    {file = "coverage-7.5.3-cp311-cp311-win32.whl", hash = "sha256:fcf7d1d6f5da887ca04302db8e0e0cf56ce9a5e05f202720e49b3e8157ddb9a9"},
-    {file = "coverage-7.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:8c836309931839cca658a78a888dab9676b5c988d0dd34ca247f5f3e679f4e7a"},
-    {file = "coverage-7.5.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:296a7d9bbc598e8744c00f7a6cecf1da9b30ae9ad51c566291ff1314e6cbbed8"},
-    {file = "coverage-7.5.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:34d6d21d8795a97b14d503dcaf74226ae51eb1f2bd41015d3ef332a24d0a17b3"},
-    {file = "coverage-7.5.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e317953bb4c074c06c798a11dbdd2cf9979dbcaa8ccc0fa4701d80042d4ebf1"},
-    {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705f3d7c2b098c40f5b81790a5fedb274113373d4d1a69e65f8b68b0cc26f6db"},
-    {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1196e13c45e327d6cd0b6e471530a1882f1017eb83c6229fc613cd1a11b53cd"},
-    {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:015eddc5ccd5364dcb902eaecf9515636806fa1e0d5bef5769d06d0f31b54523"},
-    {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fd27d8b49e574e50caa65196d908f80e4dff64d7e592d0c59788b45aad7e8b35"},
-    {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:33fc65740267222fc02975c061eb7167185fef4cc8f2770267ee8bf7d6a42f84"},
-    {file = "coverage-7.5.3-cp312-cp312-win32.whl", hash = "sha256:7b2a19e13dfb5c8e145c7a6ea959485ee8e2204699903c88c7d25283584bfc08"},
-    {file = "coverage-7.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:0bbddc54bbacfc09b3edaec644d4ac90c08ee8ed4844b0f86227dcda2d428fcb"},
-    {file = "coverage-7.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f78300789a708ac1f17e134593f577407d52d0417305435b134805c4fb135adb"},
-    {file = "coverage-7.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b368e1aee1b9b75757942d44d7598dcd22a9dbb126affcbba82d15917f0cc155"},
-    {file = "coverage-7.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f836c174c3a7f639bded48ec913f348c4761cbf49de4a20a956d3431a7c9cb24"},
-    {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:244f509f126dc71369393ce5fea17c0592c40ee44e607b6d855e9c4ac57aac98"},
-    {file = "coverage-7.5.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c2872b3c91f9baa836147ca33650dc5c172e9273c808c3c3199c75490e709d"},
-    {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:dd4b3355b01273a56b20c219e74e7549e14370b31a4ffe42706a8cda91f19f6d"},
-    {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f542287b1489c7a860d43a7d8883e27ca62ab84ca53c965d11dac1d3a1fab7ce"},
-    {file = "coverage-7.5.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:75e3f4e86804023e991096b29e147e635f5e2568f77883a1e6eed74512659ab0"},
-    {file = "coverage-7.5.3-cp38-cp38-win32.whl", hash = "sha256:c59d2ad092dc0551d9f79d9d44d005c945ba95832a6798f98f9216ede3d5f485"},
-    {file = "coverage-7.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:fa21a04112c59ad54f69d80e376f7f9d0f5f9123ab87ecd18fbb9ec3a2beed56"},
-    {file = "coverage-7.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f5102a92855d518b0996eb197772f5ac2a527c0ec617124ad5242a3af5e25f85"},
-    {file = "coverage-7.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d1da0a2e3b37b745a2b2a678a4c796462cf753aebf94edcc87dcc6b8641eae31"},
-    {file = "coverage-7.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8383a6c8cefba1b7cecc0149415046b6fc38836295bc4c84e820872eb5478b3d"},
-    {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aad68c3f2566dfae84bf46295a79e79d904e1c21ccfc66de88cd446f8686341"},
-    {file = "coverage-7.5.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e079c9ec772fedbade9d7ebc36202a1d9ef7291bc9b3a024ca395c4d52853d7"},
-    {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bde997cac85fcac227b27d4fb2c7608a2c5f6558469b0eb704c5726ae49e1c52"},
-    {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:990fb20b32990b2ce2c5f974c3e738c9358b2735bc05075d50a6f36721b8f303"},
-    {file = "coverage-7.5.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3d5a67f0da401e105753d474369ab034c7bae51a4c31c77d94030d59e41df5bd"},
-    {file = "coverage-7.5.3-cp39-cp39-win32.whl", hash = "sha256:e08c470c2eb01977d221fd87495b44867a56d4d594f43739a8028f8646a51e0d"},
-    {file = "coverage-7.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:1d2a830ade66d3563bb61d1e3c77c8def97b30ed91e166c67d0632c018f380f0"},
-    {file = "coverage-7.5.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:3538d8fb1ee9bdd2e2692b3b18c22bb1c19ffbefd06880f5ac496e42d7bb3884"},
-    {file = "coverage-7.5.3.tar.gz", hash = "sha256:04aefca5190d1dc7a53a4c1a5a7f8568811306d7a8ee231c42fb69215571944f"},
+    {file = "coverage-7.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6cfb5a4f556bb51aba274588200a46e4dd6b505fb1a5f8c5ae408222eb416f99"},
+    {file = "coverage-7.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2174e7c23e0a454ffe12267a10732c273243b4f2d50d07544a91198f05c48f47"},
+    {file = "coverage-7.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2214ee920787d85db1b6a0bd9da5f8503ccc8fcd5814d90796c2f2493a2f4d2e"},
+    {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1137f46adb28e3813dec8c01fefadcb8c614f33576f672962e323b5128d9a68d"},
+    {file = "coverage-7.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b385d49609f8e9efc885790a5a0e89f2e3ae042cdf12958b6034cc442de428d3"},
+    {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b4a474f799456e0eb46d78ab07303286a84a3140e9700b9e154cfebc8f527016"},
+    {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5cd64adedf3be66f8ccee418473c2916492d53cbafbfcff851cbec5a8454b136"},
+    {file = "coverage-7.5.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e564c2cf45d2f44a9da56f4e3a26b2236504a496eb4cb0ca7221cd4cc7a9aca9"},
+    {file = "coverage-7.5.4-cp310-cp310-win32.whl", hash = "sha256:7076b4b3a5f6d2b5d7f1185fde25b1e54eb66e647a1dfef0e2c2bfaf9b4c88c8"},
+    {file = "coverage-7.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:018a12985185038a5b2bcafab04ab833a9a0f2c59995b3cec07e10074c78635f"},
+    {file = "coverage-7.5.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:db14f552ac38f10758ad14dd7b983dbab424e731588d300c7db25b6f89e335b5"},
+    {file = "coverage-7.5.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3257fdd8e574805f27bb5342b77bc65578e98cbc004a92232106344053f319ba"},
+    {file = "coverage-7.5.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a6612c99081d8d6134005b1354191e103ec9705d7ba2754e848211ac8cacc6b"},
+    {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d45d3cbd94159c468b9b8c5a556e3f6b81a8d1af2a92b77320e887c3e7a5d080"},
+    {file = "coverage-7.5.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed550e7442f278af76d9d65af48069f1fb84c9f745ae249c1a183c1e9d1b025c"},
+    {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7a892be37ca35eb5019ec85402c3371b0f7cda5ab5056023a7f13da0961e60da"},
+    {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8192794d120167e2a64721d88dbd688584675e86e15d0569599257566dec9bf0"},
+    {file = "coverage-7.5.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:820bc841faa502e727a48311948e0461132a9c8baa42f6b2b84a29ced24cc078"},
+    {file = "coverage-7.5.4-cp311-cp311-win32.whl", hash = "sha256:6aae5cce399a0f065da65c7bb1e8abd5c7a3043da9dceb429ebe1b289bc07806"},
+    {file = "coverage-7.5.4-cp311-cp311-win_amd64.whl", hash = "sha256:d2e344d6adc8ef81c5a233d3a57b3c7d5181f40e79e05e1c143da143ccb6377d"},
+    {file = "coverage-7.5.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:54317c2b806354cbb2dc7ac27e2b93f97096912cc16b18289c5d4e44fc663233"},
+    {file = "coverage-7.5.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:042183de01f8b6d531e10c197f7f0315a61e8d805ab29c5f7b51a01d62782747"},
+    {file = "coverage-7.5.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6bb74ed465d5fb204b2ec41d79bcd28afccf817de721e8a807d5141c3426638"},
+    {file = "coverage-7.5.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3d45ff86efb129c599a3b287ae2e44c1e281ae0f9a9bad0edc202179bcc3a2e"},
+    {file = "coverage-7.5.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5013ed890dc917cef2c9f765c4c6a8ae9df983cd60dbb635df8ed9f4ebc9f555"},
+    {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1014fbf665fef86cdfd6cb5b7371496ce35e4d2a00cda501cf9f5b9e6fced69f"},
+    {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3684bc2ff328f935981847082ba4fdc950d58906a40eafa93510d1b54c08a66c"},
+    {file = "coverage-7.5.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:581ea96f92bf71a5ec0974001f900db495488434a6928a2ca7f01eee20c23805"},
+    {file = "coverage-7.5.4-cp312-cp312-win32.whl", hash = "sha256:73ca8fbc5bc622e54627314c1a6f1dfdd8db69788f3443e752c215f29fa87a0b"},
+    {file = "coverage-7.5.4-cp312-cp312-win_amd64.whl", hash = "sha256:cef4649ec906ea7ea5e9e796e68b987f83fa9a718514fe147f538cfeda76d7a7"},
+    {file = "coverage-7.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdd31315fc20868c194130de9ee6bfd99755cc9565edff98ecc12585b90be882"},
+    {file = "coverage-7.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:02ff6e898197cc1e9fa375581382b72498eb2e6d5fc0b53f03e496cfee3fac6d"},
+    {file = "coverage-7.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d05c16cf4b4c2fc880cb12ba4c9b526e9e5d5bb1d81313d4d732a5b9fe2b9d53"},
+    {file = "coverage-7.5.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5986ee7ea0795a4095ac4d113cbb3448601efca7f158ec7f7087a6c705304e4"},
+    {file = "coverage-7.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5df54843b88901fdc2f598ac06737f03d71168fd1175728054c8f5a2739ac3e4"},
+    {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ab73b35e8d109bffbda9a3e91c64e29fe26e03e49addf5b43d85fc426dde11f9"},
+    {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:aea072a941b033813f5e4814541fc265a5c12ed9720daef11ca516aeacd3bd7f"},
+    {file = "coverage-7.5.4-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:16852febd96acd953b0d55fc842ce2dac1710f26729b31c80b940b9afcd9896f"},
+    {file = "coverage-7.5.4-cp38-cp38-win32.whl", hash = "sha256:8f894208794b164e6bd4bba61fc98bf6b06be4d390cf2daacfa6eca0a6d2bb4f"},
+    {file = "coverage-7.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:e2afe743289273209c992075a5a4913e8d007d569a406ffed0bd080ea02b0633"},
+    {file = "coverage-7.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b95c3a8cb0463ba9f77383d0fa8c9194cf91f64445a63fc26fb2327e1e1eb088"},
+    {file = "coverage-7.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d7564cc09dd91b5a6001754a5b3c6ecc4aba6323baf33a12bd751036c998be4"},
+    {file = "coverage-7.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44da56a2589b684813f86d07597fdf8a9c6ce77f58976727329272f5a01f99f7"},
+    {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e16f3d6b491c48c5ae726308e6ab1e18ee830b4cdd6913f2d7f77354b33f91c8"},
+    {file = "coverage-7.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbc5958cb471e5a5af41b0ddaea96a37e74ed289535e8deca404811f6cb0bc3d"},
+    {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a04e990a2a41740b02d6182b498ee9796cf60eefe40cf859b016650147908029"},
+    {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ddbd2f9713a79e8e7242d7c51f1929611e991d855f414ca9996c20e44a895f7c"},
+    {file = "coverage-7.5.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b1ccf5e728ccf83acd313c89f07c22d70d6c375a9c6f339233dcf792094bcbf7"},
+    {file = "coverage-7.5.4-cp39-cp39-win32.whl", hash = "sha256:56b4eafa21c6c175b3ede004ca12c653a88b6f922494b023aeb1e836df953ace"},
+    {file = "coverage-7.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:65e528e2e921ba8fd67d9055e6b9f9e34b21ebd6768ae1c1723f4ea6ace1234d"},
+    {file = "coverage-7.5.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:79b356f3dd5b26f3ad23b35c75dbdaf1f9e2450b6bcefc6d0825ea0aa3f86ca5"},
+    {file = "coverage-7.5.4.tar.gz", hash = "sha256:a44963520b069e12789d0faea4e9fdb1e410cdc4aab89d94f7f55cbb7fef0353"},
 ]
 
 [package.dependencies]
@@ -777,33 +777,33 @@ files = [
 
 [[package]]
 name = "debugpy"
-version = "1.8.1"
+version = "1.8.2"
 description = "An implementation of the Debug Adapter Protocol for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "debugpy-1.8.1-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:3bda0f1e943d386cc7a0e71bfa59f4137909e2ed947fb3946c506e113000f741"},
-    {file = "debugpy-1.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dda73bf69ea479c8577a0448f8c707691152e6c4de7f0c4dec5a4bc11dee516e"},
-    {file = "debugpy-1.8.1-cp310-cp310-win32.whl", hash = "sha256:3a79c6f62adef994b2dbe9fc2cc9cc3864a23575b6e387339ab739873bea53d0"},
-    {file = "debugpy-1.8.1-cp310-cp310-win_amd64.whl", hash = "sha256:7eb7bd2b56ea3bedb009616d9e2f64aab8fc7000d481faec3cd26c98a964bcdd"},
-    {file = "debugpy-1.8.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:016a9fcfc2c6b57f939673c874310d8581d51a0fe0858e7fac4e240c5eb743cb"},
-    {file = "debugpy-1.8.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd97ed11a4c7f6d042d320ce03d83b20c3fb40da892f994bc041bbc415d7a099"},
-    {file = "debugpy-1.8.1-cp311-cp311-win32.whl", hash = "sha256:0de56aba8249c28a300bdb0672a9b94785074eb82eb672db66c8144fff673146"},
-    {file = "debugpy-1.8.1-cp311-cp311-win_amd64.whl", hash = "sha256:1a9fe0829c2b854757b4fd0a338d93bc17249a3bf69ecf765c61d4c522bb92a8"},
-    {file = "debugpy-1.8.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3ebb70ba1a6524d19fa7bb122f44b74170c447d5746a503e36adc244a20ac539"},
-    {file = "debugpy-1.8.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2e658a9630f27534e63922ebf655a6ab60c370f4d2fc5c02a5b19baf4410ace"},
-    {file = "debugpy-1.8.1-cp312-cp312-win32.whl", hash = "sha256:caad2846e21188797a1f17fc09c31b84c7c3c23baf2516fed5b40b378515bbf0"},
-    {file = "debugpy-1.8.1-cp312-cp312-win_amd64.whl", hash = "sha256:edcc9f58ec0fd121a25bc950d4578df47428d72e1a0d66c07403b04eb93bcf98"},
-    {file = "debugpy-1.8.1-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:7a3afa222f6fd3d9dfecd52729bc2e12c93e22a7491405a0ecbf9e1d32d45b39"},
-    {file = "debugpy-1.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d915a18f0597ef685e88bb35e5d7ab968964b7befefe1aaea1eb5b2640b586c7"},
-    {file = "debugpy-1.8.1-cp38-cp38-win32.whl", hash = "sha256:92116039b5500633cc8d44ecc187abe2dfa9b90f7a82bbf81d079fcdd506bae9"},
-    {file = "debugpy-1.8.1-cp38-cp38-win_amd64.whl", hash = "sha256:e38beb7992b5afd9d5244e96ad5fa9135e94993b0c551ceebf3fe1a5d9beb234"},
-    {file = "debugpy-1.8.1-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:bfb20cb57486c8e4793d41996652e5a6a885b4d9175dd369045dad59eaacea42"},
-    {file = "debugpy-1.8.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efd3fdd3f67a7e576dd869c184c5dd71d9aaa36ded271939da352880c012e703"},
-    {file = "debugpy-1.8.1-cp39-cp39-win32.whl", hash = "sha256:58911e8521ca0c785ac7a0539f1e77e0ce2df753f786188f382229278b4cdf23"},
-    {file = "debugpy-1.8.1-cp39-cp39-win_amd64.whl", hash = "sha256:6df9aa9599eb05ca179fb0b810282255202a66835c6efb1d112d21ecb830ddd3"},
-    {file = "debugpy-1.8.1-py2.py3-none-any.whl", hash = "sha256:28acbe2241222b87e255260c76741e1fbf04fdc3b6d094fcf57b6c6f75ce1242"},
-    {file = "debugpy-1.8.1.zip", hash = "sha256:f696d6be15be87aef621917585f9bb94b1dc9e8aced570db1b8a6fc14e8f9b42"},
+    {file = "debugpy-1.8.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:7ee2e1afbf44b138c005e4380097d92532e1001580853a7cb40ed84e0ef1c3d2"},
+    {file = "debugpy-1.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f8c3f7c53130a070f0fc845a0f2cee8ed88d220d6b04595897b66605df1edd6"},
+    {file = "debugpy-1.8.2-cp310-cp310-win32.whl", hash = "sha256:f179af1e1bd4c88b0b9f0fa153569b24f6b6f3de33f94703336363ae62f4bf47"},
+    {file = "debugpy-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:0600faef1d0b8d0e85c816b8bb0cb90ed94fc611f308d5fde28cb8b3d2ff0fe3"},
+    {file = "debugpy-1.8.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:8a13417ccd5978a642e91fb79b871baded925d4fadd4dfafec1928196292aa0a"},
+    {file = "debugpy-1.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acdf39855f65c48ac9667b2801234fc64d46778021efac2de7e50907ab90c634"},
+    {file = "debugpy-1.8.2-cp311-cp311-win32.whl", hash = "sha256:2cbd4d9a2fc5e7f583ff9bf11f3b7d78dfda8401e8bb6856ad1ed190be4281ad"},
+    {file = "debugpy-1.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:d3408fddd76414034c02880e891ea434e9a9cf3a69842098ef92f6e809d09afa"},
+    {file = "debugpy-1.8.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:5d3ccd39e4021f2eb86b8d748a96c766058b39443c1f18b2dc52c10ac2757835"},
+    {file = "debugpy-1.8.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62658aefe289598680193ff655ff3940e2a601765259b123dc7f89c0239b8cd3"},
+    {file = "debugpy-1.8.2-cp312-cp312-win32.whl", hash = "sha256:bd11fe35d6fd3431f1546d94121322c0ac572e1bfb1f6be0e9b8655fb4ea941e"},
+    {file = "debugpy-1.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:15bc2f4b0f5e99bf86c162c91a74c0631dbd9cef3c6a1d1329c946586255e859"},
+    {file = "debugpy-1.8.2-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:5a019d4574afedc6ead1daa22736c530712465c0c4cd44f820d803d937531b2d"},
+    {file = "debugpy-1.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40f062d6877d2e45b112c0bbade9a17aac507445fd638922b1a5434df34aed02"},
+    {file = "debugpy-1.8.2-cp38-cp38-win32.whl", hash = "sha256:c78ba1680f1015c0ca7115671fe347b28b446081dada3fedf54138f44e4ba031"},
+    {file = "debugpy-1.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:cf327316ae0c0e7dd81eb92d24ba8b5e88bb4d1b585b5c0d32929274a66a5210"},
+    {file = "debugpy-1.8.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:1523bc551e28e15147815d1397afc150ac99dbd3a8e64641d53425dba57b0ff9"},
+    {file = "debugpy-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e24ccb0cd6f8bfaec68d577cb49e9c680621c336f347479b3fce060ba7c09ec1"},
+    {file = "debugpy-1.8.2-cp39-cp39-win32.whl", hash = "sha256:7f8d57a98c5a486c5c7824bc0b9f2f11189d08d73635c326abef268f83950326"},
+    {file = "debugpy-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:16c8dcab02617b75697a0a925a62943e26a0330da076e2a10437edd9f0bf3755"},
+    {file = "debugpy-1.8.2-py2.py3-none-any.whl", hash = "sha256:16e16df3a98a35c63c3ab1e4d19be4cbc7fdda92d9ddc059294f18910928e0ca"},
+    {file = "debugpy-1.8.2.zip", hash = "sha256:95378ed08ed2089221896b9b3a8d021e642c24edc8fef20e5d4342ca8be65c00"},
 ]
 
 [[package]]
@@ -897,13 +897,13 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc
 
 [[package]]
 name = "filelock"
-version = "3.15.3"
+version = "3.15.4"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "filelock-3.15.3-py3-none-any.whl", hash = "sha256:0151273e5b5d6cf753a61ec83b3a9b7d8821c39ae9af9d7ecf2f9e2f17404103"},
-    {file = "filelock-3.15.3.tar.gz", hash = "sha256:e1199bf5194a2277273dacd50269f0d87d0682088a3c561c15674ea9005d8635"},
+    {file = "filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7"},
+    {file = "filelock-3.15.4.tar.gz", hash = "sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb"},
 ]
 
 [package.extras]
@@ -1256,13 +1256,13 @@ files = [
 
 [[package]]
 name = "importlib-metadata"
-version = "7.2.0"
+version = "8.0.0"
 description = "Read metadata from Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "importlib_metadata-7.2.0-py3-none-any.whl", hash = "sha256:04e4aad329b8b948a5711d394fa8759cb80f009225441b4f2a02bd4d8e5f426c"},
-    {file = "importlib_metadata-7.2.0.tar.gz", hash = "sha256:3ff4519071ed42740522d494d04819b666541b9752c43012f85afb2cc220fcc6"},
+    {file = "importlib_metadata-8.0.0-py3-none-any.whl", hash = "sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f"},
+    {file = "importlib_metadata-8.0.0.tar.gz", hash = "sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812"},
 ]
 
 [package.dependencies]
@@ -1664,6 +1664,34 @@ files = [
     {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"},
 ]
 
+[[package]]
+name = "lightning-fabric"
+version = "2.3.0"
+description = ""
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "lightning-fabric-2.3.0.tar.gz", hash = "sha256:b75438e96caba280141ece3512fd613ba680c102fda90657af1bbd2ea5e95bc1"},
+    {file = "lightning_fabric-2.3.0-py3-none-any.whl", hash = "sha256:fff33b1e48a283e486b4a51bc5100b8d6a14dd50278a613c6d964b058584672c"},
+]
+
+[package.dependencies]
+fsspec = {version = ">=2022.5.0", extras = ["http"]}
+lightning-utilities = ">=0.8.0"
+numpy = ">=1.17.2"
+packaging = ">=20.0"
+torch = ">=2.0.0"
+typing-extensions = ">=4.4.0"
+
+[package.extras]
+all = ["bitsandbytes (>=0.42.0)", "deepspeed (>=0.8.2,<=0.9.3)", "lightning-utilities (>=0.8.0)", "torchmetrics (>=0.10.0)", "torchvision (>=0.15.0)"]
+bitsandbytes = ["bitsandbytes (>=0.42.0)"]
+deepspeed = ["deepspeed (>=0.8.2,<=0.9.3)"]
+dev = ["bitsandbytes (>=0.42.0)", "click (==8.1.7)", "coverage (==7.3.1)", "deepspeed (>=0.8.2,<=0.9.3)", "lightning-utilities (>=0.8.0)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-random-order (==1.1.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "tensorboardX (>=2.2)", "torchmetrics (>=0.10.0)", "torchmetrics (>=0.7.0)", "torchvision (>=0.15.0)"]
+examples = ["lightning-utilities (>=0.8.0)", "torchmetrics (>=0.10.0)", "torchvision (>=0.15.0)"]
+strategies = ["bitsandbytes (>=0.42.0)", "deepspeed (>=0.8.2,<=0.9.3)"]
+test = ["click (==8.1.7)", "coverage (==7.3.1)", "pytest (==7.4.0)", "pytest-cov (==4.1.0)", "pytest-random-order (==1.1.0)", "pytest-rerunfailures (==12.0)", "pytest-timeout (==2.1.0)", "tensorboardX (>=2.2)", "torchmetrics (>=0.7.0)"]
+
 [[package]]
 name = "lightning-utilities"
 version = "0.11.2"
@@ -2417,6 +2445,7 @@ description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
+    {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"},
 ]
@@ -3110,6 +3139,7 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -3404,13 +3434,13 @@ files = [
 
 [[package]]
 name = "setuptools"
-version = "70.1.0"
+version = "70.1.1"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "setuptools-70.1.0-py3-none-any.whl", hash = "sha256:d9b8b771455a97c8a9f3ab3448ebe0b29b5e105f1228bba41028be116985a267"},
-    {file = "setuptools-70.1.0.tar.gz", hash = "sha256:01a1e793faa5bd89abc851fa15d0a0db26f160890c7102cd8dce643e886b47f5"},
+    {file = "setuptools-70.1.1-py3-none-any.whl", hash = "sha256:a58a8fde0541dab0419750bcc521fbdf8585f6e5cb41909df3a472ef7b81ca95"},
+    {file = "setuptools-70.1.1.tar.gz", hash = "sha256:937a48c7cdb7a21eb53cd7f9b59e525503aa8abaf3584c730dc5f7a5bec3a650"},
 ]
 
 [package.extras]
@@ -3672,13 +3702,13 @@ files = [
 
 [[package]]
 name = "tenacity"
-version = "8.4.1"
+version = "8.4.2"
 description = "Retry code until it succeeds"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "tenacity-8.4.1-py3-none-any.whl", hash = "sha256:28522e692eda3e1b8f5e99c51464efcc0b9fc86933da92415168bc1c4e2308fa"},
-    {file = "tenacity-8.4.1.tar.gz", hash = "sha256:54b1412b878ddf7e1f1577cd49527bad8cdef32421bd599beac0c6c3f10582fd"},
+    {file = "tenacity-8.4.2-py3-none-any.whl", hash = "sha256:9e6f7cf7da729125c7437222f8a522279751cdfbe6b67bfe64f75d3a348661b2"},
+    {file = "tenacity-8.4.2.tar.gz", hash = "sha256:cd80a53a79336edba8489e767f729e4f391c896956b57140b5d7511a64bbd3ef"},
 ]
 
 [package.extras]
@@ -4229,4 +4259,4 @@ plotly-resampler = ["plotly-resampler"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<=3.13"
-content-hash = "2918a6a6306adfdc98192da9235ddc0863ed75d38aee3c7fdf045dccd505e9ef"
+content-hash = "548ba24b8460a79ec563ee453e04ee4625aed2986de4668e80ccd659142e3b56"
diff --git a/pyproject.toml b/pyproject.toml
index 4e69ae072..217597907 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,7 @@ plotly = ">=5.13.1"
 kaleido = "0.2.1" # required for plotly static image export
 plotly-resampler = { version = ">=0.9.2", optional = true }
 livelossplot = { version = ">=0.5.5", optional = true }
+lightning-fabric = "^2.3.0"
 
 [tool.poetry.extras]
 plotly-resampler = ["plotly-resampler"]
diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index ac0af79e0..50c037250 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -134,7 +134,7 @@ def create_metrics_plot(metrics):
 
 def test_PeytonManning():
     df = pd.read_csv(PEYTON_FILE)
-    m = NeuralProphet()
+    m = NeuralProphet(deterministic=True)
     df_train, df_test = m.split_df(df=df, freq="D", valid_p=0.1)
 
     system_speed, std = get_system_speed()
@@ -160,6 +160,7 @@ def test_YosemiteTemps():
         changepoints_range=0.9,
         n_changepoints=30,
         weekly_seasonality=False,
+        deterministic=True,
     )
     df_train, df_test = m.split_df(df=df, freq="5min", valid_p=0.1)
 
@@ -180,7 +181,7 @@ def test_YosemiteTemps():
 
 def test_AirPassengers():
     df = pd.read_csv(AIR_FILE)
-    m = NeuralProphet(seasonality_mode="multiplicative")
+    m = NeuralProphet(seasonality_mode="multiplicative", deterministic=True)
     df_train, df_test = m.split_df(df=df, freq="MS", valid_p=0.1)
 
     system_speed, std = get_system_speed()
@@ -209,6 +210,7 @@ def test_EnergyPriceDaily():
         weekly_seasonality=True,
         daily_seasonality=False,
         n_lags=14,
+        deterministic=True,
     )
     m.add_lagged_regressor("temp", n_lags=3)
     m.add_future_regressor("temperature")

From aac70dec5a60e4a54d2f70e49fd43522b4e82095 Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Tue, 25 Jun 2024 19:05:10 -0700
Subject: [PATCH 123/128] fixed ruff linting issues

---
 tests/test_glocal.py | 183 +++++--------------------------------------
 1 file changed, 19 insertions(+), 164 deletions(-)

diff --git a/tests/test_glocal.py b/tests/test_glocal.py
index e631b616d..0767fb242 100644
--- a/tests/test_glocal.py
+++ b/tests/test_glocal.py
@@ -205,36 +205,12 @@ def test_wrong_option_global_local_modeling():
     forecast_trend = m.predict_trend(test_df)
     forecast_seasonal_componets = m.predict_seasonal_components(test_df)
 
-
-def test_different_seasonality_modeling():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    m = NeuralProphet(
-        n_forecasts=2,
-        n_lags=10,
-        epochs=EPOCHS,
-        batch_size=BATCH_SIZE,
-        learning_rate=LR,
-        season_global_local="local",
-        yearly_seasonality_glocal_mode="global",
+    log.info(
+        f"forecast = {forecast}, metrics = {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets = {forecast_seasonal_componets}"
     )
-    train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-    m.fit(train_df)
-    future = m.make_future_dataframe(test_df)
-    forecast = m.predict(future)
-    metrics = m.test(test_df)
-    forecast_trend = m.predict_trend(test_df)
-    forecast_seasonal_componets = m.predict_seasonal_components(test_df)
 
 
-def test_adding_new_global_seasonality():
+def test_different_seasonality_modeling():
     # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
     log.info("Global Modeling + Global Normalization")
     df = pd.read_csv(PEYTON_FILE, nrows=512)
@@ -253,7 +229,6 @@ def test_adding_new_global_seasonality():
         season_global_local="local",
         yearly_seasonality_glocal_mode="global",
     )
-    m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="global")
     train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
     m.fit(train_df)
     future = m.make_future_dataframe(test_df)
@@ -262,142 +237,9 @@ def test_adding_new_global_seasonality():
     forecast_trend = m.predict_trend(test_df)
     forecast_seasonal_componets = m.predict_seasonal_components(test_df)
 
-
-def test_adding_new_local_seasonality():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    m = NeuralProphet(epochs=EPOCHS, batch_size=BATCH_SIZE, season_global_local="global", trend_global_local="local")
-    m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="local")
-    train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-    m.fit(train_df)
-    future = m.make_future_dataframe(test_df, n_historic_predictions=True)
-    forecast = m.predict(future)
-    metrics = m.test(test_df)
-    forecast_trend = m.predict_trend(test_df)
-    forecast_seasonal_componets = m.predict_seasonal_components(test_df)
-
-
-def test_trend_local_reg():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    for coef_i in [-30, 0, False, True]:
-        m = NeuralProphet(
-            n_forecasts=1,
-            epochs=EPOCHS,
-            batch_size=BATCH_SIZE,
-            learning_rate=LR,
-            trend_global_local="local",
-            trend_local_reg=coef_i,
-        )
-
-        m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="global")
-        train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-        m.fit(train_df)
-        future = m.make_future_dataframe(test_df, n_historic_predictions=True)
-        forecast = m.predict(future)
-        metrics = m.test(test_df)
-        forecast_trend = m.predict_trend(test_df)
-        forecast_seasonal_componets = m.predict_seasonal_components(test_df)
-
-
-def test_glocal_seasonality_reg():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    for coef_i in [-30, 0, False, True]:
-        m = NeuralProphet(
-            n_forecasts=1,
-            epochs=EPOCHS,
-            batch_size=BATCH_SIZE,
-            learning_rate=LR,
-            season_global_local="local",
-            yearly_seasonality_glocal_mode="global",
-            glocal_seasonality_reg=coef_i,
-        )
-
-        m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="global")
-        train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-        m.fit(train_df)
-        future = m.make_future_dataframe(test_df, n_historic_predictions=True)
-        forecast = m.predict(future)
-        metrics = m.test(test_df)
-
-
-def test_trend_local_reg_if_global():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    for coef_i in [-30, 0, False, True]:
-        m = NeuralProphet(
-            n_forecasts=1,
-            epochs=EPOCHS,
-            batch_size=BATCH_SIZE,
-            learning_rate=LR,
-            trend_global_local="global",
-            trend_local_reg=3,
-        )
-
-        train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-        m.fit(train_df)
-        future = m.make_future_dataframe(test_df, n_historic_predictions=True)
-        forecast = m.predict(future)
-        metrics = m.test(test_df)
-        forecast_trend = m.predict_trend(test_df)
-        forecast_seasonal_componets = m.predict_seasonal_components(test_df)
-
-
-def test_different_seasonality_modeling():
-    # SEASONALITY GLOBAL LOCAL MODELLING - NO EXOGENOUS VARIABLES
-    log.info("Global Modeling + Global Normalization")
-    df = pd.read_csv(PEYTON_FILE, nrows=512)
-    df1_0 = df.iloc[:128, :].copy(deep=True)
-    df1_0["ID"] = "df1"
-    df2_0 = df.iloc[128:256, :].copy(deep=True)
-    df2_0["ID"] = "df2"
-    df3_0 = df.iloc[256:384, :].copy(deep=True)
-    df3_0["ID"] = "df3"
-    m = NeuralProphet(
-        n_forecasts=2,
-        n_lags=10,
-        epochs=EPOCHS,
-        batch_size=BATCH_SIZE,
-        learning_rate=LR,
-        season_global_local="local",
-        yearly_seasonality_glocal_mode="global",
+    log.info(
+        f"forecast = {forecast}, metrics = {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets = {forecast_seasonal_componets}"
     )
-    train_df, test_df = m.split_df(pd.concat((df1_0, df2_0, df3_0)), valid_p=0.33, local_split=True)
-    m.fit(train_df)
-    future = m.make_future_dataframe(test_df)
-    forecast = m.predict(future)
-    metrics = m.test(test_df)
-    forecast_trend = m.predict_trend(test_df)
-    forecast_seasonal_componets = m.predict_seasonal_components(test_df)
 
 
 def test_adding_new_global_seasonality():
@@ -427,6 +269,9 @@ def test_adding_new_global_seasonality():
     metrics = m.test(test_df)
     forecast_trend = m.predict_trend(test_df)
     forecast_seasonal_componets = m.predict_seasonal_components(test_df)
+    log.info(
+        f"forecast = {forecast}, metrics = {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets = {forecast_seasonal_componets}"
+    )
 
 
 def test_adding_new_local_seasonality():
@@ -448,6 +293,9 @@ def test_adding_new_local_seasonality():
     metrics = m.test(test_df)
     forecast_trend = m.predict_trend(test_df)
     forecast_seasonal_componets = m.predict_seasonal_components(test_df)
+    log.info(
+        f"forecast = {forecast}, metrics = {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets = {forecast_seasonal_componets}"
+    )
 
 
 def test_trend_local_reg():
@@ -478,6 +326,9 @@ def test_trend_local_reg():
         metrics = m.test(test_df)
         forecast_trend = m.predict_trend(test_df)
         forecast_seasonal_componets = m.predict_seasonal_components(test_df)
+        log.info(
+            f"forecast = {forecast}, metrics = {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets = {forecast_seasonal_componets}"
+        )
 
 
 def test_glocal_seasonality_reg():
@@ -498,7 +349,7 @@ def test_glocal_seasonality_reg():
             learning_rate=LR,
             season_global_local="local",
             yearly_seasonality_glocal_mode="global",
-            seasonality_local_reg=coef_i,
+            glocal_seasonality_reg=coef_i,
         )
 
         m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="global")
@@ -507,6 +358,7 @@ def test_glocal_seasonality_reg():
         future = m.make_future_dataframe(test_df, n_historic_predictions=True)
         forecast = m.predict(future)
         metrics = m.test(test_df)
+        log.info(f"forecast = {forecast}, metrics = {metrics}")
 
 
 def test_trend_local_reg_if_global():
@@ -536,3 +388,6 @@ def test_trend_local_reg_if_global():
         metrics = m.test(test_df)
         forecast_trend = m.predict_trend(test_df)
         forecast_seasonal_componets = m.predict_seasonal_components(test_df)
+        log.info(
+            f"forecast = {forecast}, metrics = {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets = {forecast_seasonal_componets}"
+        )

From ec76aae37d0a63c453adfe3f248c05c425d12e96 Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Tue, 25 Jun 2024 19:29:09 -0700
Subject: [PATCH 124/128] fixed glocal test

---
 tests/test_glocal.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_glocal.py b/tests/test_glocal.py
index 0767fb242..5c171d597 100644
--- a/tests/test_glocal.py
+++ b/tests/test_glocal.py
@@ -341,7 +341,7 @@ def test_glocal_seasonality_reg():
     df2_0["ID"] = "df2"
     df3_0 = df.iloc[256:384, :].copy(deep=True)
     df3_0["ID"] = "df3"
-    for coef_i in [-30, 0, False, True]:
+    for _ in [-30, 0, False, True]:
         m = NeuralProphet(
             n_forecasts=1,
             epochs=EPOCHS,
@@ -349,7 +349,6 @@ def test_glocal_seasonality_reg():
             learning_rate=LR,
             season_global_local="local",
             yearly_seasonality_glocal_mode="global",
-            glocal_seasonality_reg=coef_i,
         )
 
         m.add_seasonality(period=30, fourier_order=8, name="monthly", global_local="global")

From 19d8e7a24337311fff14ffdcc17f5df972a23815 Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Tue, 25 Jun 2024 19:45:01 -0700
Subject: [PATCH 125/128] fix lock file

---
 poetry.lock | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index e33ec5716..ac42351f9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -896,7 +896,7 @@ files = [
 devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
 
 [[package]]
-name = "filelock"
+name = "file"
 version = "3.15.4"
 description = "A platform independent file lock."
 optional = false
@@ -4258,10 +4258,5 @@ plotly-resampler = ["plotly-resampler"]
 
 [metadata]
 lock-version = "2.0"
-<<<<<<< HEAD
 python-versions = ">=3.9,<3.13"
 content-hash = "d08c423b7a0c27143741287c01f7b597d7af8f45c4c4108194af7be93f442e54"
-=======
-python-versions = ">=3.9,<=3.13"
-content-hash = "548ba24b8460a79ec563ee453e04ee4625aed2986de4668e80ccd659142e3b56"
->>>>>>> bug/make_tests_deterministic

From c533f01490a115969f28c11e4308b3d7492af94b Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Tue, 25 Jun 2024 19:47:58 -0700
Subject: [PATCH 126/128] update poetry

---
 poetry.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index ac42351f9..4df2de293 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -896,7 +896,7 @@ files = [
 devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
 
 [[package]]
-name = "file"
+name = "filelock"
 version = "3.15.4"
 description = "A platform independent file lock."
 optional = false
@@ -4259,4 +4259,4 @@ plotly-resampler = ["plotly-resampler"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.13"
-content-hash = "d08c423b7a0c27143741287c01f7b597d7af8f45c4c4108194af7be93f442e54"
+content-hash = "7c8e2b1178f0498721e849f427703bfcda1ecba529d25bcd93f5a00a5daedbe2"

From ad449c2db5ddf64878b93e5e26fe7b7e767b9351 Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Wed, 26 Jun 2024 10:35:24 -0700
Subject: [PATCH 127/128] moved the deterministic flag to the train method

---
 neuralprophet/forecaster.py     |  8 +++++---
 tests/test_model_performance.py | 24 ++++++++++++++++--------
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/neuralprophet/forecaster.py b/neuralprophet/forecaster.py
index d258a256e..9993fd832 100644
--- a/neuralprophet/forecaster.py
+++ b/neuralprophet/forecaster.py
@@ -435,11 +435,9 @@ def __init__(
         accelerator: Optional[str] = None,
         trainer_config: dict = {},
         prediction_frequency: Optional[dict] = None,
-        deterministic=False,
     ):
         self.config = locals()
         self.config.pop("self")
-        self.deterministic = deterministic
 
         # General
         self.name = "NeuralProphet"
@@ -907,6 +905,7 @@ def fit(
         checkpointing: bool = False,
         continue_training: bool = False,
         num_workers: int = 0,
+        deterministic: bool = False,
     ):
         """Train, and potentially evaluate model.
 
@@ -1071,6 +1070,7 @@ def fit(
                 checkpointing_enabled=checkpointing,
                 continue_training=continue_training,
                 num_workers=num_workers,
+                deterministic=deterministic,
             )
         else:
             df_val, _, _, _ = df_utils.prep_or_copy_df(validation_df)
@@ -1095,6 +1095,7 @@ def fit(
                 checkpointing_enabled=checkpointing,
                 continue_training=continue_training,
                 num_workers=num_workers,
+                deterministic=deterministic,
             )
 
         # Show training plot
@@ -2716,6 +2717,7 @@ def _train(
         checkpointing_enabled: bool = False,
         continue_training=False,
         num_workers=0,
+        deterministic: bool = False,
     ):
         """
         Execute model training procedure for a configured number of epochs.
@@ -2773,7 +2775,7 @@ def _train(
             metrics_enabled=metrics_enabled,
             checkpointing_enabled=checkpointing_enabled,
             num_batches_per_epoch=len(train_loader),
-            deterministic=self.deterministic,
+            deterministic=deterministic,
         )
 
         # Tune hyperparams and train
diff --git a/tests/test_model_performance.py b/tests/test_model_performance.py
index 50c037250..af512d535 100644
--- a/tests/test_model_performance.py
+++ b/tests/test_model_performance.py
@@ -134,12 +134,12 @@ def create_metrics_plot(metrics):
 
 def test_PeytonManning():
     df = pd.read_csv(PEYTON_FILE)
-    m = NeuralProphet(deterministic=True)
+    m = NeuralProphet()
     df_train, df_test = m.split_df(df=df, freq="D", valid_p=0.1)
 
     system_speed, std = get_system_speed()
     start = time.time()
-    metrics = m.fit(df_train, validation_df=df_test, freq="D")  # , early_stopping=True)
+    metrics = m.fit(df_train, validation_df=df_test, freq="D", deterministic=True)  # , early_stopping=True)
     end = time.time()
 
     accuracy_metrics = metrics.to_dict("records")[-1]
@@ -160,13 +160,17 @@ def test_YosemiteTemps():
         changepoints_range=0.9,
         n_changepoints=30,
         weekly_seasonality=False,
-        deterministic=True,
     )
     df_train, df_test = m.split_df(df=df, freq="5min", valid_p=0.1)
 
     system_speed, std = get_system_speed()
     start = time.time()
-    metrics = m.fit(df_train, validation_df=df_test, freq="5min")  # , early_stopping=True)
+    metrics = m.fit(
+        df_train,
+        validation_df=df_test,
+        freq="5min",
+        deterministic=True,
+    )  # , early_stopping=True)
     end = time.time()
 
     accuracy_metrics = metrics.to_dict("records")[-1]
@@ -181,12 +185,12 @@ def test_YosemiteTemps():
 
 def test_AirPassengers():
     df = pd.read_csv(AIR_FILE)
-    m = NeuralProphet(seasonality_mode="multiplicative", deterministic=True)
+    m = NeuralProphet(seasonality_mode="multiplicative")
     df_train, df_test = m.split_df(df=df, freq="MS", valid_p=0.1)
 
     system_speed, std = get_system_speed()
     start = time.time()
-    metrics = m.fit(df_train, validation_df=df_test, freq="MS")  # , early_stopping=True)
+    metrics = m.fit(df_train, validation_df=df_test, freq="MS", deterministic=True)  # , early_stopping=True)
     end = time.time()
 
     accuracy_metrics = metrics.to_dict("records")[-1]
@@ -210,7 +214,6 @@ def test_EnergyPriceDaily():
         weekly_seasonality=True,
         daily_seasonality=False,
         n_lags=14,
-        deterministic=True,
     )
     m.add_lagged_regressor("temp", n_lags=3)
     m.add_future_regressor("temperature")
@@ -219,7 +222,12 @@ def test_EnergyPriceDaily():
 
     system_speed, std = get_system_speed()
     start = time.time()
-    metrics = m.fit(df_train, validation_df=df_test, freq="D")  # , early_stopping=True)
+    metrics = m.fit(
+        df_train,
+        validation_df=df_test,
+        freq="D",
+        deterministic=True,
+    )  # , early_stopping=True)
     end = time.time()
 
     accuracy_metrics = metrics.to_dict("records")[-1]

From 39b69131f927f96f39f77b93c3a42256adf77ee7 Mon Sep 17 00:00:00 2001
From: MaiBe-ctrl <maiisabensalah@gmail.com>
Date: Wed, 26 Jun 2024 15:29:21 -0700
Subject: [PATCH 128/128] update lock file

---
 poetry.lock               | 2 +-
 tests/test_event_utils.py | 1 -
 tests/test_glocal.py      | 4 ++--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 72dbb454d..43abed65d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4259,4 +4259,4 @@ plotly-resampler = ["plotly-resampler"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.13"
-content-hash = "7c8e2b1178f0498721e849f427703bfcda1ecba529d25bcd93f5a00a5daedbe2"
+content-hash = "a3b767eec027be911e9499276840e4740231a7fff6e5658c2f38a36b00e72451"
diff --git a/tests/test_event_utils.py b/tests/test_event_utils.py
index 0d0c75b96..49e24b4c3 100644
--- a/tests/test_event_utils.py
+++ b/tests/test_event_utils.py
@@ -6,7 +6,6 @@
 
 import holidays
 import matplotlib.pyplot as plt
-import numpy as np
 import pandas as pd
 import pytest
 from holidays import country_holidays
diff --git a/tests/test_glocal.py b/tests/test_glocal.py
index bc7fabe8d..9bda1882c 100644
--- a/tests/test_glocal.py
+++ b/tests/test_glocal.py
@@ -273,7 +273,7 @@ def test_adding_new_global_seasonality():
     forecast_seasonal_componets = m.predict_seasonal_components(test_df)
     log.debug(
         f"forecast = {forecast}, metrics= {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets= {forecast_seasonal_componets}"
-
+    )
 
 
 def test_adding_new_local_seasonality():
@@ -297,6 +297,7 @@ def test_adding_new_local_seasonality():
     forecast_seasonal_componets = m.predict_seasonal_components(test_df)
     log.debug(
         f"forecast = {forecast}, metrics= {metrics}, forecast_trend = {forecast_trend}, forecast_seasonal_componets= {forecast_seasonal_componets}"
+    )
 
 
 def test_trend_local_reg():
@@ -343,7 +344,6 @@ def test_glocal_seasonality_reg():
     df3_0 = df.iloc[256:384, :].copy(deep=True)
     df3_0["ID"] = "df3"
     for coef_i in [0, 1.5, False, True]:
-
         m = NeuralProphet(
             n_forecasts=1,
             epochs=EPOCHS,