revert lr scheduler first decay to 1.0 and add more comments for lr a…

…nd lr_scheduler and its related test func
OuyangWenyu · Sep 18, 2024 · 4271cd0 · 4271cd0
1 parent 76ec677
commit 4271cd0
Show file tree

Hide file tree

Showing 4 changed files with 102 additions and 65 deletions.
diff --git a/tests/test_deep_hydro.py b/tests/test_deep_hydro.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2024-05-01 10:34:15
-LastEditTime: 2024-05-04 11:31:52
+LastEditTime: 2024-09-18 16:47:26
 LastEditors: Wenyu Ouyang
 Description: Unit tests for the DeepHydro class
 FilePath: \torchhydro\tests\test_deep_hydro.py
@@ -16,7 +16,8 @@
 from torchhydro.datasets.data_dict import datasets_dict
 from torchhydro.trainers.train_logger import TrainLogger
 import torch
-import tempfile
+from torch.optim import Adam
+from torch.optim.lr_scheduler import LambdaLR, ExponentialLR, ReduceLROnPlateau
 
 
 # Mock dataset class using random data
@@ -32,61 +33,36 @@ def ngrid(self):
 
     @property
     def nt(self):
-        return 200
+        return 100
 
     def __len__(self):
-        return self.ngrid * (self.nt - self.data_cfgs["forecast_history"] + 1)
+        return self.ngrid * (self.nt - self.data_cfgs["forecast_length"] + 1)
 
     def __getitem__(self, idx):
         # Use the stored configurations to generate mock data
-        # rho = self.data_cfgs["forecast_history"]
-        # x = torch.randn(rho, self.data_cfgs["input_features"])
-        # y = torch.randn(rho, self.data_cfgs["output_features"])
-        mode = self.data_cfgs["model_mode"]
-        if mode == "single":
-            src1 = torch.rand(
-                self.data_cfgs["forecast_history"],
-                self.data_cfgs["input_features"] - 1,
-            )
-            src2 = torch.rand(
-                self.data_cfgs["forecast_history"],
-                self.data_cfgs["cnn_size"],
-            )
-        else:
-            src1 = torch.rand(
-                self.data_cfgs["forecast_history"],
-                self.data_cfgs["input_features"],
-            )
-            src2 = torch.rand(
-                self.data_cfgs["forecast_history"],
-                self.data_cfgs["input_size_encoder2"]
-                )
-        src3 = torch.rand(1, self.data_cfgs["output_features"]) # start_token
-        out = torch.rand(self.data_cfgs["forecast_length"], self.data_cfgs["output_features"]) 
-        return [src1, src2, src3], out
+        rho = self.data_cfgs["forecast_length"]
+        x = torch.randn(rho, self.data_cfgs["input_features"])
+        y = torch.randn(rho, self.data_cfgs["output_features"])
+        return x, y
 
 
 @pytest.fixture()
 def dummy_data_cfgs():
-    test_path = "results/test_seq2seq_single2/"
+    test_path = "results/test/"
     if not os.path.exists(test_path):
         os.makedirs(test_path)
     return {
         "dataset": "MockDataset",
-        "input_features": 4,
+        "input_features": 10,
         "output_features": 1,
         # "t_range_valid": ["2010-01-01", "2010-12-31"],
         "t_range_valid": None,
         "test_path": test_path,
         "sampler": "KuaiSampler",
-        # "sampler": "HydroSampler",
         "batch_size": 5,
-        "forecast_history": 5,
-        "forecast_length": 2,
-        "warmup_length": 10,
-        "cnn_size" : 120,
-        "input_size_encoder2": 1,
-        "model_mode": "single",
+        "forecast_history": 0,
+        "forecast_length": 30,
+        "warmup_length": 0,
     }
 
 
@@ -97,49 +73,50 @@ def test_using_mock_dataset(dummy_data_cfgs):
 
     dataset = datasets_dict[dataset_name](dummy_data_cfgs, is_tra_val_te)
 
-    assert len(dataset) == 330
-    sample_x, sample_y= dataset[0]
+    assert len(dataset) == 710
+    sample_x, sample_y = dataset[0]
     print(sample_x[0].shape)
     print(sample_x[1].shape)
     print(sample_x[2].shape)
     print(sample_y.shape)
-    # assert sample_x.shape == (dummy_data_cfgs['forecast_history'], dummy_data_cfgs["input_features"])
-    # assert sample_y.shape == (dummy_data_cfgs['forecast_history'], dummy_data_cfgs["output_features"])
+    assert sample_x.shape == (
+        dummy_data_cfgs["forecast_length"],
+        dummy_data_cfgs["input_features"],
+    )
+    assert sample_y.shape == (
+        dummy_data_cfgs["forecast_length"],
+        dummy_data_cfgs["output_features"],
+    )
+
 
 @pytest.fixture()
 def dummy_train_cfgs(dummy_data_cfgs):
     return {
         "training_cfgs": {
             "early_stopping": False,
             "patience": 4,
-            "epochs": 2,
+            "epochs": 12,
             "start_epoch": 1,
             "which_first_tensor": "batch",
             "device": -1,  # Assuming CPU device
             "train_mode": True,
             "criterion": "RMSE",
             "optimizer": "Adam",
-            "optim_params": {},
-            "lr_scheduler": {"lr": 0.001},
+            "optim_params": {"lr": 0.01},
+            # "optim_params": {},
+            "lr_scheduler": {0: 0.5, 10: 0.1},
             "batch_size": 5,
             "save_epoch": 1,
         },
         "data_cfgs": dummy_data_cfgs,
         "model_cfgs": {
             "model_type": "Normal",
-            # "model_name": "CpuLSTM",
-            "model_name": "Seq2Seq",
+            "model_name": "CpuLSTM",
             "weight_path": None,
             "model_hyperparam": {
-                # "n_input_features": 10,
-                # "n_output_features": 1,
-                # "n_hidden_states": 64,
-                "input_size": 4,
-                "output_size": 1,
-                "hidden_size": 256,
-                "forecast_length": 2,
-                "cnn_size": 120,
-                "model_mode": "single",
+                "n_input_features": 10,
+                "n_output_features": 1,
+                "n_hidden_states": 64,
             },
         },
         "evaluation_cfgs": {
@@ -149,6 +126,7 @@ def dummy_train_cfgs(dummy_data_cfgs):
         },
     }
 
+
 @pytest.fixture()
 def deep_hydro(dummy_train_cfgs):
     datasets_dict["MockDataset"] = MockDataset
@@ -163,8 +141,63 @@ def test_model_train(deep_hydro):
     # Add assertions to check the expected behavior of the method
     assert deep_hydro.model.state_dict() is not None
 
+
 def test_plot_model_structure(deep_hydro, dummy_train_cfgs):
     opt = torch.optim.SGD(deep_hydro.model.parameters(), lr=0.01)
     model_filepath = dummy_train_cfgs["data_cfgs"]["test_path"]
     train_logger = TrainLogger(model_filepath, dummy_train_cfgs, opt)
-    train_logger.plot_model_structure(deep_hydro.model)
+    train_logger.plot_model_structure(deep_hydro.model)
+
+
+def test_get_scheduler_lambda_lr(deep_hydro, dummy_train_cfgs):
+    dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {"lr": 0.001}
+    opt = Adam(deep_hydro.model.parameters())
+    scheduler = deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
+    assert isinstance(scheduler, LambdaLR)
+
+
+def test_get_scheduler_lambda_lr_with_epochs(deep_hydro, dummy_train_cfgs):
+    dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {0: 1.0, 10: 0.1}
+    opt = Adam(deep_hydro.model.parameters())
+    scheduler = deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
+    assert isinstance(scheduler, LambdaLR)
+
+
+def test_get_scheduler_exponential_lr(deep_hydro, dummy_train_cfgs):
+    dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {"lr_factor": 0.9}
+    opt = Adam(deep_hydro.model.parameters())
+    scheduler = deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
+    assert isinstance(scheduler, ExponentialLR)
+
+
+def test_get_scheduler_reduce_lr_on_plateau(deep_hydro, dummy_train_cfgs):
+    dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {
+        "lr_factor": 0.9,
+        "lr_patience": 5,
+    }
+    opt = Adam(deep_hydro.model.parameters())
+    scheduler = deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
+    assert isinstance(scheduler, ReduceLROnPlateau)
+
+
+def test_get_scheduler_invalid_config(deep_hydro, dummy_train_cfgs):
+    dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {"invalid_key": 0.9}
+    opt = Adam(deep_hydro.model.parameters())
+    with pytest.raises(ValueError, match="Invalid lr_scheduler configuration"):
+        deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
+
+
+# add a test func for LambdaLR  show me the lr change in each epoch
+def test_get_scheduler_lambda_lr_with_epochs_show_lr(deep_hydro, dummy_train_cfgs):
+    # NOTE: for scheduler, the start epoch is 0! but scheduler.step is still after each epoch
+    dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {1: 0.5, 10: 0.1}
+    opt = Adam(deep_hydro.model.parameters())
+    scheduler = deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
+    for epoch in range(1, 15):
+        # We start from epoch 1 not 0 to make it easier to understand for human
+        # NOTE: the scheduler.step() is called at the end of each epoch
+        # so that at the first epoch, the lr is still the initial lr,
+        # one has to set initial lr in the optimizer directly for the first epoch
+        print(f"epoch:{epoch}, lr:{opt.param_groups[0]['lr']}")
+        scheduler.step()
+    assert isinstance(scheduler, LambdaLR)
diff --git a/torchhydro/configs/config.py b/torchhydro/configs/config.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2021-12-31 11:08:29
-LastEditTime: 2024-09-15 10:04:38
+LastEditTime: 2024-09-18 16:50:16
 LastEditors: Wenyu Ouyang
 Description: Config for hydroDL
 FilePath: \torchhydro\torchhydro\configs\config.py
@@ -221,16 +221,20 @@ def default_config_file():
             "criterion_params": None,
             # "weight_decay": None, a regularization term in loss func
             "optimizer": "Adam",
+            # "optim_params": {"lr": 0.001} means the initial learning rate is 0.001
             "optim_params": {},
             "lr_scheduler": {
-                # 1st opt config, all epochs use this lr
+                # 1st opt config, all epochs use this lr,
+                # this setting will cover the lr setting in "optim_params"
                 "lr": 0.001,
                 # 2nd opt config, diff epoch uses diff lr, key is epoch,
                 # start from 0, each value means the decay rate
+                # if initial lr is 0.001, then 0: 0.5 neans the lr of 0 epoch is 0.001*0.5=0.0005
                 # "lr_scheduler": {0: 1, 1: 0.5, 2: 0.2},
-                # 3rd opt config, lr as a initial value, and lr_factor as an exponential decay factor
+                # 3rd opt config, lr as a initial value (will cover the lr setting in "optim_params")
+                # lr_factor as an exponential decay factor
                 # "lr": 0.001, "lr_factor": 0.1,
-                # 4th opt config, lr as a initial value,
+                # 4th opt config, lr as a initial value, it will cover the lr setting in "optim_params"
                 # lr_patience represent how many epochs without opt (we watch val_loss) could be tolerated
                 # if lr_patience is satisfied, then lr will be decayed by lr_factor by a linear way
                 # "lr": 0.001, "lr_factor": 0.1, "lr_patience": 1,

diff --git a/torchhydro/trainers/deep_hydro.py b/torchhydro/trainers/deep_hydro.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2024-04-08 18:15:48
-LastEditTime: 2024-09-16 10:19:34
+LastEditTime: 2024-09-18 11:18:15
 LastEditors: Wenyu Ouyang
 Description: HydroDL model class
 FilePath: \torchhydro\torchhydro\trainers\deep_hydro.py
@@ -297,7 +297,7 @@ def _get_scheduler(self, training_cfgs, opt):
             isinstance(epoch, int) for epoch in lr_scheduler_cfg
         ):
             scheduler = LambdaLR(
-                opt, lr_lambda=lambda epoch: lr_scheduler_cfg.get(epoch, 0.5)# 初始从1开始(强制)，10epoch之后再从0.1开始
+                opt, lr_lambda=lambda epoch: lr_scheduler_cfg.get(epoch, 1.0)
             )
         elif "lr_factor" in lr_scheduler_cfg and "lr_patience" not in lr_scheduler_cfg:
             scheduler = ExponentialLR(opt, gamma=lr_scheduler_cfg["lr_factor"])

diff --git a/torchhydro/trainers/train_logger.py b/torchhydro/trainers/train_logger.py
@@ -1,7 +1,7 @@
 """
 Author: Wenyu Ouyang
 Date: 2021-12-31 11:08:29
-LastEditTime: 2024-05-04 11:30:00
+LastEditTime: 2024-09-18 15:40:10
 LastEditors: Wenyu Ouyang
 Description: Training function for DL models
 FilePath: \torchhydro\torchhydro\trainers\train_logger.py
@@ -73,7 +73,7 @@ def save_session_param(
     def log_epoch_train(self, epoch):
         start_time = time.time()
         logs = {}
-        # here content in the with block will be performed
+        # here content in the 'with' block will be performed after yeild
         yield logs
         total_loss = logs["train_loss"]
         elapsed_time = time.time() - start_time