Source code for ssds.core.optimizer
import torch.optim as optim
from torch.optim import lr_scheduler
class InvertedExponentialLR(lr_scheduler._LRScheduler):
"""Exponentially increases the learning rate between two boundaries over a number of
iterations.
Arguments:
optimizer (torch.optim.Optimizer): wrapped optimizer.
end_lr (float): the final learning rate.
num_iter (int): the number of iterations over which the test occurs.
last_epoch (int, optional): the index of last epoch. Default: -1.
:meta private:
"""
def __init__(self, optimizer, end_lr, num_iter=100, last_epoch=-1):
self.end_lr = end_lr
self.num_iter = num_iter
super(ExponentialLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
curr_iter = self.last_epoch + 1
r = curr_iter / self.num_iter
return [base_lr * (self.end_lr / base_lr) ** r for base_lr in self.base_lrs]
[docs]def trainable_param(model, trainable_scope):
r""" Return the trainable parameters for the optimizers by :attr:`cfg.TRAIN.TRAINABLE_SCOPE`
If the module in trainable scope, then train this module's parameters
When :
* cfg.TRAIN.TRAINABLE_SCOPE = ""
All the parameters in the model are used to train
* cfg.TRAIN.TRAINABLE_SCOPE = "a,b,c.d"
Only the the parameters in the a, b and c.d are used to train
* cfg.TRAIN.TRAINABLE_SCOPE = "a;b,c.d"
Only the the parameters in the a, b and c.d are used to train. module a and model b&c.d can be assigned to different learning rate (differential learning rate)
Args:
model: the ssds model for training
trainable_scope (str): the scope for the trainable parameter in the given ssds model, which is defined in the cfg.TRAIN.TRAINABLE_SCOPE
"""
trainable_param = []
if trainable_scope == "":
for param in model.parameters():
param.requires_grad = True
trainable_param.append(model.parameters())
else:
for param in model.parameters():
param.requires_grad = False
for train_scope in trainable_scope.split(";"):
param_temp = []
for module in train_scope.split(","):
submodule = module.split(".")
tmp_model = model
for subm in submodule:
if hasattr(tmp_model, subm):
tmp_model = getattr(tmp_model, subm)
else:
raise ValueError(module + " is not in the model")
for param in tmp_model.parameters():
param.requires_grad = True
param_temp.extend(tmp_model.parameters())
trainable_param.append(param_temp)
return trainable_param
[docs]def configure_optimizer(trainable_param, cfg):
r""" Return the optimizer for the trainable parameters
Basically, it returns the optimizer defined by :attr:`cfg.TRAIN.OPTIMIZER.OPTIMIZER`. The learning rate for the optimizer is defined by :attr:`cfg.TRAIN.OPTIMIZER.LEARNING_RATE`
and :attr:`cfg.TRAIN.OPTIMIZER.DIFFERENTIAL_LEARNING_RATE`. Some other parameters are also defined in :attr:`cfg.TRAIN.OPTIMIZER`.
Currently, there are 4 popular optimizers supported: sgd, rmsprop, adam and amsgrad.
TODO: directly fetch the optimizer by getattr(optim, cfg.OPTIMIZER) and send the the relative parameter by dict.
Args:
trainable_param: the trainable parameter in the given ssds model, check :meth:`trainable_param` for more details.
cfg: the config dict, which is defined in :attr:`cfg.TRAIN.OPTIMIZER`.
"""
if len(cfg.DIFFERENTIAL_LEARNING_RATE) == 0 or len(trainable_param) == 1:
trainable_param = trainable_param[0]
else:
assert len(cfg.DIFFERENTIAL_LEARNING_RATE) == len(trainable_param)
trainable_param = [
{"params": _param, "lr": _lr}
for _param, _lr in zip(trainable_param, cfg.DIFFERENTIAL_LEARNING_RATE)
]
if cfg.OPTIMIZER == "sgd":
optimizer = optim.SGD(
trainable_param,
lr=cfg.LEARNING_RATE,
momentum=cfg.MOMENTUM,
weight_decay=cfg.WEIGHT_DECAY,
)
elif cfg.OPTIMIZER == "rmsprop":
optimizer = optim.RMSprop(
trainable_param,
lr=cfg.LEARNING_RATE,
momentum=cfg.MOMENTUM,
alpha=cfg.MOMENTUM_2,
eps=cfg.EPS,
weight_decay=cfg.WEIGHT_DECAY,
)
elif cfg.OPTIMIZER == "adam":
optimizer = optim.Adam(
trainable_param,
lr=cfg.LEARNING_RATE,
betas=(cfg.MOMENTUM, cfg.MOMENTUM_2),
weight_decay=cfg.WEIGHT_DECAY,
)
elif cfg.OPTIMIZER == "amsgrad":
optimizer = optim.Adam(
trainable_param,
lr=cfg.LEARNING_RATE,
betas=(cfg.MOMENTUM, cfg.MOMENTUM_2),
weight_decay=cfg.WEIGHT_DECAY,
amsgrad=True,
)
else:
AssertionError("optimizer can not be recognized")
return optimizer
[docs]def configure_lr_scheduler(optimizer, cfg):
r""" Return the learning rate scheduler for the trainable parameters
Basically, it returns the learning rate scheduler defined by :attr:`cfg.TRAIN.LR_SCHEDULER.SCHEDULER`.
Some parameters for the learning rate scheduler are also defined in :attr:`cfg.TRAIN.LR_SCHEDULER`.
Currently, there are 4 popular learning rate scheduler supported: step, multi_step, exponential and sgdr.
TODO: directly fetch the optimizer by getattr(lr_scheduler, cfg.SCHEDULER) and send the the relative parameter by dict.
Args:
optimizer: the optimizer in the given ssds model, check :meth:`configure_optimizer` for more details.
cfg: the config dict, which is defined in :attr:`cfg.TRAIN.LR_SCHEDULER`.
"""
if cfg.SCHEDULER == "step":
scheduler = lr_scheduler.StepLR(
optimizer, step_size=cfg.STEPS[0], gamma=cfg.GAMMA
)
elif cfg.SCHEDULER == "multi_step":
scheduler = lr_scheduler.MultiStepLR(
optimizer, milestones=cfg.STEPS, gamma=cfg.GAMMA
)
elif cfg.SCHEDULER == "exponential":
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=cfg.GAMMA)
elif cfg.SCHEDULER == "inverted_exponential":
scheduler = InvertedExponentialLR(optimizer, end_lr=cfg.LR_MIN)
elif cfg.SCHEDULER == "sgdr":
scheduler = lr_scheduler.CosineAnnealingWarmRestarts(
optimizer, T_0=2, T_mult=2, eta_min=cfg.LR_MIN
)
else:
AssertionError("scheduler can not be recognized.")
return scheduler