diff --git a/src/open_clip_train/scheduler.py b/src/open_clip_train/scheduler.py index fba76fcf1..f76ba110f 100644 --- a/src/open_clip_train/scheduler.py +++ b/src/open_clip_train/scheduler.py @@ -1,4 +1,4 @@ -import numpy as np +import math def assign_learning_rate(optimizer, new_lr): @@ -18,6 +18,7 @@ def _lr_adjuster(step): lr = base_lr assign_learning_rate(optimizer, lr) return lr + return _lr_adjuster @@ -33,10 +34,11 @@ def _lr_adjuster(step): e = step - start_cooldown_step es = steps - start_cooldown_step # linear decay if power == 1; polynomial decay otherwise; - decay = (1 - (e/es)) ** cooldown_power + decay = (1 - (e / es)) ** cooldown_power lr = decay * (base_lr - cooldown_end_lr) + cooldown_end_lr assign_learning_rate(optimizer, lr) return lr + return _lr_adjuster @@ -47,7 +49,9 @@ def _lr_adjuster(step): else: e = step - warmup_length es = steps - warmup_length - lr = 0.5 * (1 + np.cos(np.pi * e / es)) * base_lr + lr = 0.5 * (1 + math.cos(math.pi * e / es)) * base_lr assign_learning_rate(optimizer, lr) return lr + return _lr_adjuster +