Source code for ssds.core.optimizer

import torch.optim as optim
from torch.optim import lr_scheduler


class InvertedExponentialLR(lr_scheduler._LRScheduler):
    """Exponentially increases the learning rate between two boundaries over a number of
    iterations.
    Arguments:
        optimizer (torch.optim.Optimizer): wrapped optimizer.
        end_lr (float): the final learning rate.
        num_iter (int): the number of iterations over which the test occurs.
        last_epoch (int, optional): the index of last epoch. Default: -1.

    :meta private:
    """

    def __init__(self, optimizer, end_lr, num_iter=100, last_epoch=-1):
        self.end_lr = end_lr
        self.num_iter = num_iter
        super(ExponentialLR, self).__init__(optimizer, last_epoch)

    def get_lr(self):
        curr_iter = self.last_epoch + 1
        r = curr_iter / self.num_iter
        return [base_lr * (self.end_lr / base_lr) ** r for base_lr in self.base_lrs]


[docs]def trainable_param(model, trainable_scope): r""" Return the trainable parameters for the optimizers by :attr:`cfg.TRAIN.TRAINABLE_SCOPE` If the module in trainable scope, then train this module's parameters When : * cfg.TRAIN.TRAINABLE_SCOPE = "" All the parameters in the model are used to train * cfg.TRAIN.TRAINABLE_SCOPE = "a,b,c.d" Only the the parameters in the a, b and c.d are used to train * cfg.TRAIN.TRAINABLE_SCOPE = "a;b,c.d" Only the the parameters in the a, b and c.d are used to train. module a and model b&c.d can be assigned to different learning rate (differential learning rate) Args: model: the ssds model for training trainable_scope (str): the scope for the trainable parameter in the given ssds model, which is defined in the cfg.TRAIN.TRAINABLE_SCOPE """ trainable_param = [] if trainable_scope == "": for param in model.parameters(): param.requires_grad = True trainable_param.append(model.parameters()) else: for param in model.parameters(): param.requires_grad = False for train_scope in trainable_scope.split(";"): param_temp = [] for module in train_scope.split(","): submodule = module.split(".") tmp_model = model for subm in submodule: if hasattr(tmp_model, subm): tmp_model = getattr(tmp_model, subm) else: raise ValueError(module + " is not in the model") for param in tmp_model.parameters(): param.requires_grad = True param_temp.extend(tmp_model.parameters()) trainable_param.append(param_temp) return trainable_param
[docs]def configure_optimizer(trainable_param, cfg): r""" Return the optimizer for the trainable parameters Basically, it returns the optimizer defined by :attr:`cfg.TRAIN.OPTIMIZER.OPTIMIZER`. The learning rate for the optimizer is defined by :attr:`cfg.TRAIN.OPTIMIZER.LEARNING_RATE` and :attr:`cfg.TRAIN.OPTIMIZER.DIFFERENTIAL_LEARNING_RATE`. Some other parameters are also defined in :attr:`cfg.TRAIN.OPTIMIZER`. Currently, there are 4 popular optimizers supported: sgd, rmsprop, adam and amsgrad. TODO: directly fetch the optimizer by getattr(optim, cfg.OPTIMIZER) and send the the relative parameter by dict. Args: trainable_param: the trainable parameter in the given ssds model, check :meth:`trainable_param` for more details. cfg: the config dict, which is defined in :attr:`cfg.TRAIN.OPTIMIZER`. """ if len(cfg.DIFFERENTIAL_LEARNING_RATE) == 0 or len(trainable_param) == 1: trainable_param = trainable_param[0] else: assert len(cfg.DIFFERENTIAL_LEARNING_RATE) == len(trainable_param) trainable_param = [ {"params": _param, "lr": _lr} for _param, _lr in zip(trainable_param, cfg.DIFFERENTIAL_LEARNING_RATE) ] if cfg.OPTIMIZER == "sgd": optimizer = optim.SGD( trainable_param, lr=cfg.LEARNING_RATE, momentum=cfg.MOMENTUM, weight_decay=cfg.WEIGHT_DECAY, ) elif cfg.OPTIMIZER == "rmsprop": optimizer = optim.RMSprop( trainable_param, lr=cfg.LEARNING_RATE, momentum=cfg.MOMENTUM, alpha=cfg.MOMENTUM_2, eps=cfg.EPS, weight_decay=cfg.WEIGHT_DECAY, ) elif cfg.OPTIMIZER == "adam": optimizer = optim.Adam( trainable_param, lr=cfg.LEARNING_RATE, betas=(cfg.MOMENTUM, cfg.MOMENTUM_2), weight_decay=cfg.WEIGHT_DECAY, ) elif cfg.OPTIMIZER == "amsgrad": optimizer = optim.Adam( trainable_param, lr=cfg.LEARNING_RATE, betas=(cfg.MOMENTUM, cfg.MOMENTUM_2), weight_decay=cfg.WEIGHT_DECAY, amsgrad=True, ) else: AssertionError("optimizer can not be recognized") return optimizer
[docs]def configure_lr_scheduler(optimizer, cfg): r""" Return the learning rate scheduler for the trainable parameters Basically, it returns the learning rate scheduler defined by :attr:`cfg.TRAIN.LR_SCHEDULER.SCHEDULER`. Some parameters for the learning rate scheduler are also defined in :attr:`cfg.TRAIN.LR_SCHEDULER`. Currently, there are 4 popular learning rate scheduler supported: step, multi_step, exponential and sgdr. TODO: directly fetch the optimizer by getattr(lr_scheduler, cfg.SCHEDULER) and send the the relative parameter by dict. Args: optimizer: the optimizer in the given ssds model, check :meth:`configure_optimizer` for more details. cfg: the config dict, which is defined in :attr:`cfg.TRAIN.LR_SCHEDULER`. """ if cfg.SCHEDULER == "step": scheduler = lr_scheduler.StepLR( optimizer, step_size=cfg.STEPS[0], gamma=cfg.GAMMA ) elif cfg.SCHEDULER == "multi_step": scheduler = lr_scheduler.MultiStepLR( optimizer, milestones=cfg.STEPS, gamma=cfg.GAMMA ) elif cfg.SCHEDULER == "exponential": scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=cfg.GAMMA) elif cfg.SCHEDULER == "inverted_exponential": scheduler = InvertedExponentialLR(optimizer, end_lr=cfg.LR_MIN) elif cfg.SCHEDULER == "sgdr": scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer, T_0=2, T_mult=2, eta_min=cfg.LR_MIN ) else: AssertionError("scheduler can not be recognized.") return scheduler