提交 f189ad74 编写于 作者: F fengjiayi

refine the defination of class GradientClipByGlobalNorm

上级 adc26dff
...@@ -93,41 +93,48 @@ class GradientClipByNorm(BaseGradientClipAttr): ...@@ -93,41 +93,48 @@ class GradientClipByNorm(BaseGradientClipAttr):
class GradientClipByGlobalNorm(BaseGradientClipAttr): class GradientClipByGlobalNorm(BaseGradientClipAttr):
global_norm_var = None global_norm_var = None
clip_norm_var = None clip_norm_var = None
ratio_var = None scale_var = None
@classmethod @classmethod
def init(cls, clip_norm): def init(cls, clip_norm):
if not (isinstance(clip_norm, int) or isinstance(clip_norm, float)):
raise TypeError("The 'clip_norm' must be a value of int or float")
cls.global_norm_var = layers.fill_constant( cls.global_norm_var = layers.fill_constant(
shape=[1], dtype="float32", value=0.0) shape=[1], dtype="float32", value=0.0)
cls.clip_norm_var = layers.fill_constant( cls.clip_norm_var = layers.fill_constant(
shape=[1], dtype="float32", value=clip_norm) shape=[1], dtype="float32", value=clip_norm)
def __init__(self): @classmethod
if not (isinstance(self.__class__.global_norm_var, Variable) and def check_init(cls):
isinstance(self.__class__.clip_norm_var, Variable)): if not (isinstance(cls.global_norm_var, Variable) and
isinstance(cls.clip_norm_var, Variable)):
raise ValueError( raise ValueError(
"Class 'GradientClipByGlobalNorm' has not been properly initialized. Please call GradientClipByGlobalNorm.init() first." "Class 'GradientClipByGlobalNorm' has not been properly initialized. \
) Please call GradientClipByGlobalNorm.init() first.")
@classmethod
def process_context(cls, context, param, grad):
cls.check_init()
def process_context(self, context, param, grad):
local_norm_var = layers.reduce_sum( local_norm_var = layers.reduce_sum(
x=layers.pow(x=grad, factor=2), reduce_all=True) x=layers.pow(x=grad, factor=2), reduce_all=True)
layers.sums( layers.sums(
input=[local_norm_var, self.__class__.global_norm_var], input=[local_norm_var, cls.global_norm_var],
out=[self.__class__.global_norm_var]) out=[cls.global_norm_var])
def create_operators(self, param, grad): @classmethod
if self.__class__.ratio_var is None: def create_operators(cls, param, grad):
self.__class__.global_norm_var = layers.sqrt( cls.check_init()
x=self.__class__.global_norm_var)
self.__class__.ratio_var = layers.elementwise_div( if cls.scale_var is None:
x=self.__class__.clip_norm_var, cls.global_norm_var = layers.sqrt(x=cls.global_norm_var)
cls.scale_var = layers.elementwise_div(
x=cls.clip_norm_var,
y=layers.elementwise_max( y=layers.elementwise_max(
x=self.__class__.clip_norm_var, x=cls.clip_norm_var, y=cls.global_norm_var))
y=self.__class__.global_norm_var)) new_grad = layers.elementwise_mul(x=grad, y=cls.scale_var)
# 缺乏elementwise_max return param, new_grad
# 没法将ratio_var送给scale_op。
# new_grad = layers.
def append_gradient_clip_ops(param_grad): def append_gradient_clip_ops(param_grad):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册