Source code for catalyst.contrib.nn.schedulers.onecycle
from typing import List
import numpy as np
from torch.optim import Optimizer
from catalyst.contrib.nn.schedulers.base import BatchScheduler
from catalyst.utils.torch import get_optimizer_momentum
[docs]class OneCycleLRWithWarmup(BatchScheduler):
"""OneCycle scheduler with warm-up & lr decay stages.
First stage increases lr from ``init_lr`` to ``max_lr``,
and called ``warmup``. Also it decreases momentum
from ``init_momentum`` to ``min_momentum``. Takes ``warmup_steps`` steps
Second is ``annealing`` stage. Decrease lr from ``max_lr`` to ``min_lr``,
Increase momentum from ``min_momentum`` to ``max_momentum``.
Third, optional, lr decay.
"""
[docs] def __init__(
self,
optimizer: Optimizer,
num_steps: int,
lr_range=(1.0, 0.005),
init_lr: float = None,
warmup_steps: int = 0,
warmup_fraction: float = None,
decay_steps: int = 0,
decay_fraction: float = None,
momentum_range=(0.8, 0.99, 0.999),
init_momentum: float = None,
):
"""
Args:
optimizer: PyTorch optimizer
num_steps: total number of steps
lr_range: tuple with two or three elements
(max_lr, min_lr, [final_lr])
init_lr (float, optional): initial lr
warmup_steps: count of steps for warm-up stage
warmup_fraction (float, optional): fraction in [0; 1) to calculate
number of warmup steps.
Cannot be set together with ``warmup_steps``
decay_steps: count of steps for lr decay stage
decay_fraction (float, optional): fraction in [0; 1) to calculate
number of decay steps.
Cannot be set together with ``decay_steps``
momentum_range: tuple with two or three elements
(min_momentum, max_momentum, [final_momentum])
init_momentum (float, optional): initial momentum
"""
if len(lr_range) == 2:
max_lr, min_lr = lr_range
final_lr = min_lr
elif len(lr_range) == 3:
max_lr, min_lr, final_lr = lr_range
if len(momentum_range) == 2:
min_momentum, max_momentum = momentum_range
final_momentum = max_momentum
elif len(momentum_range) == 3:
min_momentum, max_momentum, final_momentum = momentum_range
if init_lr is None:
init_lr = optimizer.defaults["lr"]
if init_momentum is None:
init_momentum = get_optimizer_momentum(optimizer)
warmup_steps = self._calculate_warmup(
num_steps, warmup_steps, warmup_fraction
)
decay_steps = self._calculate_decay(
num_steps, decay_steps, decay_fraction
)
lr_annealing_steps = num_steps - (warmup_steps + decay_steps)
self.warmup_steps = warmup_steps
self.lr_annealing_steps = lr_annealing_steps
self.decay_steps = decay_steps
self.num_steps = warmup_steps + lr_annealing_steps + decay_steps
self.lr_range = init_lr, max_lr, min_lr, final_lr
self.momentum_range = (
init_momentum,
min_momentum,
max_momentum,
final_momentum,
)
self._calculate_lr_momentum(
warmup_steps, lr_annealing_steps, decay_steps
)
self.total_groups = len(optimizer.param_groups)
super().__init__(optimizer)
def _calculate_warmup(
self, num_steps: int, warmup_steps: int, warmup_fraction: float
):
if warmup_fraction is not None:
assert 0.0 <= warmup_fraction < 1.0 and warmup_steps == 0, (
"You should pass either warmup_steps or "
"warmup_fraction in range [0; 1) "
)
warmup_steps = int(num_steps * warmup_fraction)
self.warmup_steps = warmup_steps
self.has_warmup = warmup_steps != 0
return self.warmup_steps
def _calculate_decay(
self, num_steps: int, decay_steps: int, decay_fraction: float
):
if decay_fraction is not None:
assert 0.0 <= decay_fraction < 1.0 and decay_steps == 0, (
"You should pass either decay_steps or "
"decay_fraction in range [0; 1) "
)
decay_steps = int(num_steps * decay_fraction)
self.decay_steps = decay_steps
self.has_decay = decay_steps != 0
return self.decay_steps
def _calculate_lr_momentum(
self, warmup_steps: int, lr_annealing_steps: int, decay_steps: int
):
init_lr, max_lr, min_lr, final_lr = self.lr_range
(
init_momentum,
min_momentum,
max_momentum,
final_momentum,
) = self.momentum_range
lr_warmup = np.linspace(init_lr, max_lr, warmup_steps)
lr_annealing = np.linspace(max_lr, min_lr, lr_annealing_steps)
lr_decay = np.linspace(min_lr, final_lr, decay_steps)
self.learning_rates = np.concatenate(
(lr_warmup, lr_annealing, lr_decay)
)
momentum_decay = np.linspace(init_momentum, min_momentum, warmup_steps)
momentum_annealing = np.linspace(
min_momentum, max_momentum, lr_annealing_steps
)
momentum_warmup = np.linspace(
max_momentum, final_momentum, decay_steps
)
self.momentums = np.concatenate(
(momentum_decay, momentum_annealing, momentum_warmup)
)
def _get_steps_lr_momentum(self, step_num: int):
if step_num < len(self.learning_rates):
lr = self.learning_rates[step_num]
else:
_, _, _, final_lr = self.lr_range
lr = final_lr
if step_num < len(self.momentums):
momentum = self.momentums[step_num]
else:
_, _, _, final_momentum = self.momentum_range
momentum = final_momentum
return lr, momentum
[docs] def get_lr(self) -> List[float]:
"""Function that returns the new lr for optimizer.
Returns:
List[float]: calculated lr for every param groups
"""
lr, _ = self._get_steps_lr_momentum(self.last_epoch)
return [lr] * self.total_groups
[docs] def get_momentum(self) -> List[float]:
"""Function that returns the new momentum for optimizer.
Returns:
List[float]: calculated momentum for every param groups
"""
_, momentum = self._get_steps_lr_momentum(self.last_epoch)
return [momentum] * self.total_groups
[docs] def reset(self):
"""@TODO: Docs. Contribution is welcome."""
self._calculate_lr_momentum(
self.warmup_steps, self.lr_annealing_steps, self.decay_steps
)
self.last_epoch = 0
[docs] def recalculate(self, loader_len: int, current_step: int) -> None:
"""Recalculates total num_steps for ``batch`` mode.
Args:
loader_len: total count of batches in an epoch
current_step: current step
"""
warmup_steps = self.warmup_steps * loader_len
lr_annealing_steps = self.lr_annealing_steps * loader_len
decay_steps = self.decay_steps * loader_len
self._calculate_lr_momentum(
warmup_steps, lr_annealing_steps, decay_steps
)
self.last_epoch = current_step * loader_len
__all__ = ["OneCycleLRWithWarmup"]