add grad clip (#1411)
This commit is contained in:
parent
53b514e39d
commit
0e32093fdc
|
@ -42,6 +42,7 @@
|
||||||
| name | 优化器类名 | Adam | 目前支持`Momentum`,`Adam`,`RMSProp`, 见[ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py) |
|
| name | 优化器类名 | Adam | 目前支持`Momentum`,`Adam`,`RMSProp`, 见[ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py) |
|
||||||
| beta1 | 设置一阶矩估计的指数衰减率 | 0.9 | \ |
|
| beta1 | 设置一阶矩估计的指数衰减率 | 0.9 | \ |
|
||||||
| beta2 | 设置二阶矩估计的指数衰减率 | 0.999 | \ |
|
| beta2 | 设置二阶矩估计的指数衰减率 | 0.999 | \ |
|
||||||
|
| clip_norm | 所允许的二范数最大值 | | \ |
|
||||||
| **lr** | 设置学习率decay方式 | - | \ |
|
| **lr** | 设置学习率decay方式 | - | \ |
|
||||||
| name | 学习率decay类名 | Cosine | 目前支持`Linear`,`Cosine`,`Step`,`Piecewise`, 见[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) |
|
| name | 学习率decay类名 | Cosine | 目前支持`Linear`,`Cosine`,`Step`,`Piecewise`, 见[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) |
|
||||||
| learning_rate | 基础学习率 | 0.001 | \ |
|
| learning_rate | 基础学习率 | 0.001 | \ |
|
||||||
|
|
|
@ -41,6 +41,7 @@ Take rec_chinese_lite_train_v2.0.yml as an example
|
||||||
| name | Optimizer class name | Adam | Currently supports`Momentum`,`Adam`,`RMSProp`, see [ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py) |
|
| name | Optimizer class name | Adam | Currently supports`Momentum`,`Adam`,`RMSProp`, see [ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py) |
|
||||||
| beta1 | Set the exponential decay rate for the 1st moment estimates | 0.9 | \ |
|
| beta1 | Set the exponential decay rate for the 1st moment estimates | 0.9 | \ |
|
||||||
| beta2 | Set the exponential decay rate for the 2nd moment estimates | 0.999 | \ |
|
| beta2 | Set the exponential decay rate for the 2nd moment estimates | 0.999 | \ |
|
||||||
|
| clip_norm | The maximum norm value | - | \ |
|
||||||
| **lr** | Set the learning rate decay method | - | \ |
|
| **lr** | Set the learning rate decay method | - | \ |
|
||||||
| name | Learning rate decay class name | Cosine | Currently supports`Linear`,`Cosine`,`Step`,`Piecewise`, see[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) |
|
| name | Learning rate decay class name | Cosine | Currently supports`Linear`,`Cosine`,`Step`,`Piecewise`, see[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) |
|
||||||
| learning_rate | Set the base learning rate | 0.001 | \ |
|
| learning_rate | Set the base learning rate | 0.001 | \ |
|
||||||
|
|
|
@ -16,8 +16,8 @@ from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
|
import paddle
|
||||||
|
|
||||||
__all__ = ['build_optimizer']
|
__all__ = ['build_optimizer']
|
||||||
|
|
||||||
|
@ -49,7 +49,13 @@ def build_optimizer(config, epochs, step_each_epoch, parameters):
|
||||||
|
|
||||||
# step3 build optimizer
|
# step3 build optimizer
|
||||||
optim_name = config.pop('name')
|
optim_name = config.pop('name')
|
||||||
|
if 'clip_norm' in config:
|
||||||
|
clip_norm = config.pop('clip_norm')
|
||||||
|
grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
|
||||||
|
else:
|
||||||
|
grad_clip = None
|
||||||
optim = getattr(optimizer, optim_name)(learning_rate=lr,
|
optim = getattr(optimizer, optim_name)(learning_rate=lr,
|
||||||
weight_decay=reg,
|
weight_decay=reg,
|
||||||
|
grad_clip=grad_clip,
|
||||||
**config)
|
**config)
|
||||||
return optim(parameters), lr
|
return optim(parameters), lr
|
||||||
|
|
|
@ -30,18 +30,25 @@ class Momentum(object):
|
||||||
regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
|
regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, learning_rate, momentum, weight_decay=None, **args):
|
def __init__(self,
|
||||||
|
learning_rate,
|
||||||
|
momentum,
|
||||||
|
weight_decay=None,
|
||||||
|
grad_clip=None,
|
||||||
|
**args):
|
||||||
super(Momentum, self).__init__()
|
super(Momentum, self).__init__()
|
||||||
self.learning_rate = learning_rate
|
self.learning_rate = learning_rate
|
||||||
self.momentum = momentum
|
self.momentum = momentum
|
||||||
self.weight_decay = weight_decay
|
self.weight_decay = weight_decay
|
||||||
|
self.grad_clip = grad_clip
|
||||||
|
|
||||||
def __call__(self, parameters):
|
def __call__(self, parameters):
|
||||||
opt = optim.Momentum(
|
opt = optim.Momentum(
|
||||||
learning_rate=self.learning_rate,
|
learning_rate=self.learning_rate,
|
||||||
momentum=self.momentum,
|
momentum=self.momentum,
|
||||||
parameters=parameters,
|
weight_decay=self.weight_decay,
|
||||||
weight_decay=self.weight_decay)
|
grad_clip=self.grad_clip,
|
||||||
|
parameters=parameters)
|
||||||
return opt
|
return opt
|
||||||
|
|
||||||
|
|
||||||
|
@ -96,10 +103,11 @@ class RMSProp(object):
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
learning_rate,
|
learning_rate,
|
||||||
momentum,
|
momentum=0.0,
|
||||||
rho=0.95,
|
rho=0.95,
|
||||||
epsilon=1e-6,
|
epsilon=1e-6,
|
||||||
weight_decay=None,
|
weight_decay=None,
|
||||||
|
grad_clip=None,
|
||||||
**args):
|
**args):
|
||||||
super(RMSProp, self).__init__()
|
super(RMSProp, self).__init__()
|
||||||
self.learning_rate = learning_rate
|
self.learning_rate = learning_rate
|
||||||
|
@ -107,6 +115,7 @@ class RMSProp(object):
|
||||||
self.rho = rho
|
self.rho = rho
|
||||||
self.epsilon = epsilon
|
self.epsilon = epsilon
|
||||||
self.weight_decay = weight_decay
|
self.weight_decay = weight_decay
|
||||||
|
self.grad_clip = grad_clip
|
||||||
|
|
||||||
def __call__(self, parameters):
|
def __call__(self, parameters):
|
||||||
opt = optim.RMSProp(
|
opt = optim.RMSProp(
|
||||||
|
@ -115,5 +124,6 @@ class RMSProp(object):
|
||||||
rho=self.rho,
|
rho=self.rho,
|
||||||
epsilon=self.epsilon,
|
epsilon=self.epsilon,
|
||||||
weight_decay=self.weight_decay,
|
weight_decay=self.weight_decay,
|
||||||
|
grad_clip=self.grad_clip,
|
||||||
parameters=parameters)
|
parameters=parameters)
|
||||||
return opt
|
return opt
|
||||||
|
|
Loading…
Reference in New Issue