未验证 提交 de85470d 编写于 作者: Y Yu Yang 提交者: GitHub

Support Clip in param_attr (#6729)

* Support Clip in param_attr

* Fix the order of clip & regular

Regular is not need to be clipped
上级 51f3bf15
......@@ -16,12 +16,13 @@ import regularizer
from param_attr import ParamAttr
from data_feeder import DataFeeder
from core import LoDTensor, CPUPlace, GPUPlace
import clip
Tensor = LoDTensor
__all__ = framework.__all__ + executor.__all__ + [
'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward',
'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr'
'DataFeeder'
'DataFeeder', 'clip'
]
......
import functools
import layers
__all__ = ['GradientClipByValue', 'append_gradient_clip_ops']
class BaseGradientClipAttr(object):
def process_context(self, context, p_g):
raise NotImplementedError()
def create_operators(self, param, grad):
raise NotImplementedError()
class NullGradientClipAttr(BaseGradientClipAttr):
def process_context(self, context, p_g):
pass
def create_operators(self, param, grad):
return param, grad
class GradientClipByValue(BaseGradientClipAttr):
def __init__(self, max, min=None):
max = float(max)
if min is None:
min = -max
else:
min = float(min)
self.max = max
self.min = min
def process_context(self, context, p_g):
pass
def create_operators(self, param, grad):
new_grad = layers.clip(x=grad, min=self.min, max=self.max)
return param, new_grad
def append_gradient_clip_ops(param_grad):
context = dict()
create_op_callbacks = []
for p, g in param_grad:
clip_attr = getattr(p, 'clip_attr', NullGradientClipAttr())
if clip_attr is None:
clip_attr = NullGradientClipAttr()
if not isinstance(clip_attr, BaseGradientClipAttr):
raise TypeError(
"clip attribute should be an instance of BaseGradientClippingAttr"
)
clip_attr.process_context(context=context, p_g=param_grad)
create_op_callbacks.append(
functools.partial(
clip_attr.create_operators, param=p, grad=g))
return [each_callback() for each_callback in create_op_callbacks]
ClipByValue = GradientClipByValue
......@@ -704,6 +704,7 @@ class Block(object):
trainable=p.trainable,
optimize_attr=p.optimize_attr,
regularizer=p.regularizer,
clip_attr=p.clip_attr,
name=v.name)
self.vars[new_p.name] = new_p
......@@ -866,6 +867,8 @@ class Parameter(Variable):
self.regularizer = kwargs.get('regularizer', None)
self.clip_attr = kwargs.get('clip_attr', None)
# program is a global instance.
_main_program_ = Program()
......
......@@ -6,6 +6,7 @@ from framework import unique_name, program_guard
from initializer import Constant
from layer_helper import LayerHelper
from regularizer import append_regularization_ops
from clip import append_gradient_clip_ops
__all__ = ['SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad']
......@@ -197,9 +198,13 @@ class Optimizer(object):
`create_optimization_pass()` into one.
"""
params_grads = append_backward_ops(loss, parameter_list, no_grad_set)
params_grads = append_gradient_clip_ops(params_grads)
# Add regularization if any
params_grads = append_regularization_ops(params_grads,
self.regularization)
optimize_ops = self.create_optimization_pass(params_grads, loss,
startup_program)
return optimize_ops
......
from initializer import Initializer, Xavier, Constant
from regularizer import WeightDecayRegularizer
__all__ = ['ParamAttr']
class ParamAttr(object):
def __init__(self,
......@@ -8,12 +10,14 @@ class ParamAttr(object):
initializer=None,
learning_rate=1.0,
regularizer=None,
trainable=True):
trainable=True,
clip=None):
self.name = name
self.initializer = initializer
self.learning_rate = learning_rate
self.regularizer = regularizer
self.trainable = trainable
self.clip = clip
def set_default_initializer(self, initializer):
if initializer is None:
......@@ -56,7 +60,8 @@ class ParamAttr(object):
'name': self.name,
'learning_rate': self.learning_rate,
'regularizer': self.regularizer,
'trainable': self.trainable
'trainable': self.trainable,
'clip_attr': self.clip
}
if with_initializer:
kwargs['initializer'] = self.initializer
......
......@@ -11,7 +11,9 @@ regularizer = fluid.regularizer.L2Decay(0.0005 * BATCH_SIZE)
hidden1 = fluid.layers.fc(input=image,
size=128,
act='relu',
param_attr=regularizer)
param_attr=fluid.ParamAttr(
regularizer=regularizer,
clip=fluid.clip.ClipByValue(10)))
hidden2 = fluid.layers.fc(input=hidden1,
size=64,
act='relu',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册