Skip to content

Commit

Permalink
revert lr scheduler first decay to 1.0 and add more comments for lr a…
Browse files Browse the repository at this point in the history
…nd lr_scheduler and its related test func
  • Loading branch information
OuyangWenyu committed Sep 18, 2024
1 parent 76ec677 commit 4271cd0
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 65 deletions.
147 changes: 90 additions & 57 deletions tests/test_deep_hydro.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2024-05-01 10:34:15
LastEditTime: 2024-05-04 11:31:52
LastEditTime: 2024-09-18 16:47:26
LastEditors: Wenyu Ouyang
Description: Unit tests for the DeepHydro class
FilePath: \torchhydro\tests\test_deep_hydro.py
Expand All @@ -16,7 +16,8 @@
from torchhydro.datasets.data_dict import datasets_dict
from torchhydro.trainers.train_logger import TrainLogger
import torch
import tempfile
from torch.optim import Adam
from torch.optim.lr_scheduler import LambdaLR, ExponentialLR, ReduceLROnPlateau


# Mock dataset class using random data
Expand All @@ -32,61 +33,36 @@ def ngrid(self):

@property
def nt(self):
return 200
return 100

def __len__(self):
return self.ngrid * (self.nt - self.data_cfgs["forecast_history"] + 1)
return self.ngrid * (self.nt - self.data_cfgs["forecast_length"] + 1)

def __getitem__(self, idx):
# Use the stored configurations to generate mock data
# rho = self.data_cfgs["forecast_history"]
# x = torch.randn(rho, self.data_cfgs["input_features"])
# y = torch.randn(rho, self.data_cfgs["output_features"])
mode = self.data_cfgs["model_mode"]
if mode == "single":
src1 = torch.rand(
self.data_cfgs["forecast_history"],
self.data_cfgs["input_features"] - 1,
)
src2 = torch.rand(
self.data_cfgs["forecast_history"],
self.data_cfgs["cnn_size"],
)
else:
src1 = torch.rand(
self.data_cfgs["forecast_history"],
self.data_cfgs["input_features"],
)
src2 = torch.rand(
self.data_cfgs["forecast_history"],
self.data_cfgs["input_size_encoder2"]
)
src3 = torch.rand(1, self.data_cfgs["output_features"]) # start_token
out = torch.rand(self.data_cfgs["forecast_length"], self.data_cfgs["output_features"])
return [src1, src2, src3], out
rho = self.data_cfgs["forecast_length"]
x = torch.randn(rho, self.data_cfgs["input_features"])
y = torch.randn(rho, self.data_cfgs["output_features"])
return x, y


@pytest.fixture()
def dummy_data_cfgs():
test_path = "results/test_seq2seq_single2/"
test_path = "results/test/"
if not os.path.exists(test_path):
os.makedirs(test_path)
return {
"dataset": "MockDataset",
"input_features": 4,
"input_features": 10,
"output_features": 1,
# "t_range_valid": ["2010-01-01", "2010-12-31"],
"t_range_valid": None,
"test_path": test_path,
"sampler": "KuaiSampler",
# "sampler": "HydroSampler",
"batch_size": 5,
"forecast_history": 5,
"forecast_length": 2,
"warmup_length": 10,
"cnn_size" : 120,
"input_size_encoder2": 1,
"model_mode": "single",
"forecast_history": 0,
"forecast_length": 30,
"warmup_length": 0,
}


Expand All @@ -97,49 +73,50 @@ def test_using_mock_dataset(dummy_data_cfgs):

dataset = datasets_dict[dataset_name](dummy_data_cfgs, is_tra_val_te)

assert len(dataset) == 330
sample_x, sample_y= dataset[0]
assert len(dataset) == 710
sample_x, sample_y = dataset[0]
print(sample_x[0].shape)
print(sample_x[1].shape)
print(sample_x[2].shape)
print(sample_y.shape)
# assert sample_x.shape == (dummy_data_cfgs['forecast_history'], dummy_data_cfgs["input_features"])
# assert sample_y.shape == (dummy_data_cfgs['forecast_history'], dummy_data_cfgs["output_features"])
assert sample_x.shape == (
dummy_data_cfgs["forecast_length"],
dummy_data_cfgs["input_features"],
)
assert sample_y.shape == (
dummy_data_cfgs["forecast_length"],
dummy_data_cfgs["output_features"],
)


@pytest.fixture()
def dummy_train_cfgs(dummy_data_cfgs):
return {
"training_cfgs": {
"early_stopping": False,
"patience": 4,
"epochs": 2,
"epochs": 12,
"start_epoch": 1,
"which_first_tensor": "batch",
"device": -1, # Assuming CPU device
"train_mode": True,
"criterion": "RMSE",
"optimizer": "Adam",
"optim_params": {},
"lr_scheduler": {"lr": 0.001},
"optim_params": {"lr": 0.01},
# "optim_params": {},
"lr_scheduler": {0: 0.5, 10: 0.1},
"batch_size": 5,
"save_epoch": 1,
},
"data_cfgs": dummy_data_cfgs,
"model_cfgs": {
"model_type": "Normal",
# "model_name": "CpuLSTM",
"model_name": "Seq2Seq",
"model_name": "CpuLSTM",
"weight_path": None,
"model_hyperparam": {
# "n_input_features": 10,
# "n_output_features": 1,
# "n_hidden_states": 64,
"input_size": 4,
"output_size": 1,
"hidden_size": 256,
"forecast_length": 2,
"cnn_size": 120,
"model_mode": "single",
"n_input_features": 10,
"n_output_features": 1,
"n_hidden_states": 64,
},
},
"evaluation_cfgs": {
Expand All @@ -149,6 +126,7 @@ def dummy_train_cfgs(dummy_data_cfgs):
},
}


@pytest.fixture()
def deep_hydro(dummy_train_cfgs):
datasets_dict["MockDataset"] = MockDataset
Expand All @@ -163,8 +141,63 @@ def test_model_train(deep_hydro):
# Add assertions to check the expected behavior of the method
assert deep_hydro.model.state_dict() is not None


def test_plot_model_structure(deep_hydro, dummy_train_cfgs):
opt = torch.optim.SGD(deep_hydro.model.parameters(), lr=0.01)
model_filepath = dummy_train_cfgs["data_cfgs"]["test_path"]
train_logger = TrainLogger(model_filepath, dummy_train_cfgs, opt)
train_logger.plot_model_structure(deep_hydro.model)
train_logger.plot_model_structure(deep_hydro.model)


def test_get_scheduler_lambda_lr(deep_hydro, dummy_train_cfgs):
dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {"lr": 0.001}
opt = Adam(deep_hydro.model.parameters())
scheduler = deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
assert isinstance(scheduler, LambdaLR)


def test_get_scheduler_lambda_lr_with_epochs(deep_hydro, dummy_train_cfgs):
dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {0: 1.0, 10: 0.1}
opt = Adam(deep_hydro.model.parameters())
scheduler = deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
assert isinstance(scheduler, LambdaLR)


def test_get_scheduler_exponential_lr(deep_hydro, dummy_train_cfgs):
dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {"lr_factor": 0.9}
opt = Adam(deep_hydro.model.parameters())
scheduler = deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
assert isinstance(scheduler, ExponentialLR)


def test_get_scheduler_reduce_lr_on_plateau(deep_hydro, dummy_train_cfgs):
dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {
"lr_factor": 0.9,
"lr_patience": 5,
}
opt = Adam(deep_hydro.model.parameters())
scheduler = deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
assert isinstance(scheduler, ReduceLROnPlateau)


def test_get_scheduler_invalid_config(deep_hydro, dummy_train_cfgs):
dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {"invalid_key": 0.9}
opt = Adam(deep_hydro.model.parameters())
with pytest.raises(ValueError, match="Invalid lr_scheduler configuration"):
deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)


# add a test func for LambdaLR show me the lr change in each epoch
def test_get_scheduler_lambda_lr_with_epochs_show_lr(deep_hydro, dummy_train_cfgs):
# NOTE: for scheduler, the start epoch is 0! but scheduler.step is still after each epoch
dummy_train_cfgs["training_cfgs"]["lr_scheduler"] = {1: 0.5, 10: 0.1}
opt = Adam(deep_hydro.model.parameters())
scheduler = deep_hydro._get_scheduler(dummy_train_cfgs["training_cfgs"], opt)
for epoch in range(1, 15):
# We start from epoch 1 not 0 to make it easier to understand for human
# NOTE: the scheduler.step() is called at the end of each epoch
# so that at the first epoch, the lr is still the initial lr,
# one has to set initial lr in the optimizer directly for the first epoch
print(f"epoch:{epoch}, lr:{opt.param_groups[0]['lr']}")
scheduler.step()
assert isinstance(scheduler, LambdaLR)
12 changes: 8 additions & 4 deletions torchhydro/configs/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2021-12-31 11:08:29
LastEditTime: 2024-09-15 10:04:38
LastEditTime: 2024-09-18 16:50:16
LastEditors: Wenyu Ouyang
Description: Config for hydroDL
FilePath: \torchhydro\torchhydro\configs\config.py
Expand Down Expand Up @@ -221,16 +221,20 @@ def default_config_file():
"criterion_params": None,
# "weight_decay": None, a regularization term in loss func
"optimizer": "Adam",
# "optim_params": {"lr": 0.001} means the initial learning rate is 0.001
"optim_params": {},
"lr_scheduler": {
# 1st opt config, all epochs use this lr
# 1st opt config, all epochs use this lr,
# this setting will cover the lr setting in "optim_params"
"lr": 0.001,
# 2nd opt config, diff epoch uses diff lr, key is epoch,
# start from 0, each value means the decay rate
# if initial lr is 0.001, then 0: 0.5 neans the lr of 0 epoch is 0.001*0.5=0.0005
# "lr_scheduler": {0: 1, 1: 0.5, 2: 0.2},
# 3rd opt config, lr as a initial value, and lr_factor as an exponential decay factor
# 3rd opt config, lr as a initial value (will cover the lr setting in "optim_params")
# lr_factor as an exponential decay factor
# "lr": 0.001, "lr_factor": 0.1,
# 4th opt config, lr as a initial value,
# 4th opt config, lr as a initial value, it will cover the lr setting in "optim_params"
# lr_patience represent how many epochs without opt (we watch val_loss) could be tolerated
# if lr_patience is satisfied, then lr will be decayed by lr_factor by a linear way
# "lr": 0.001, "lr_factor": 0.1, "lr_patience": 1,
Expand Down
4 changes: 2 additions & 2 deletions torchhydro/trainers/deep_hydro.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2024-04-08 18:15:48
LastEditTime: 2024-09-16 10:19:34
LastEditTime: 2024-09-18 11:18:15
LastEditors: Wenyu Ouyang
Description: HydroDL model class
FilePath: \torchhydro\torchhydro\trainers\deep_hydro.py
Expand Down Expand Up @@ -297,7 +297,7 @@ def _get_scheduler(self, training_cfgs, opt):
isinstance(epoch, int) for epoch in lr_scheduler_cfg
):
scheduler = LambdaLR(
opt, lr_lambda=lambda epoch: lr_scheduler_cfg.get(epoch, 0.5)# 初始从1开始(强制),10epoch之后再从0.1开始
opt, lr_lambda=lambda epoch: lr_scheduler_cfg.get(epoch, 1.0)
)
elif "lr_factor" in lr_scheduler_cfg and "lr_patience" not in lr_scheduler_cfg:
scheduler = ExponentialLR(opt, gamma=lr_scheduler_cfg["lr_factor"])
Expand Down
4 changes: 2 additions & 2 deletions torchhydro/trainers/train_logger.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Author: Wenyu Ouyang
Date: 2021-12-31 11:08:29
LastEditTime: 2024-05-04 11:30:00
LastEditTime: 2024-09-18 15:40:10
LastEditors: Wenyu Ouyang
Description: Training function for DL models
FilePath: \torchhydro\torchhydro\trainers\train_logger.py
Expand Down Expand Up @@ -73,7 +73,7 @@ def save_session_param(
def log_epoch_train(self, epoch):
start_time = time.time()
logs = {}
# here content in the with block will be performed
# here content in the 'with' block will be performed after yeild
yield logs
total_loss = logs["train_loss"]
elapsed_time = time.time() - start_time
Expand Down

0 comments on commit 4271cd0

Please sign in to comment.