diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index 7a36df0dabbca74484f607ec6854a92e613fd951..d4f025a4af60d3365b0e85e3024a2dd94f5ae842 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -138,8 +138,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): cls = self.__class__ cls.check_init() - local_norm_var = layers.reduce_sum( - x=layers.pow(x=grad, factor=2), reduce_all=True) + local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0)) layers.sums( input=[local_norm_var, cls.global_norm_var], out=[cls.global_norm_var]) @@ -154,6 +153,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): x=cls.clip_norm_var, y=layers.elementwise_max( x=cls.clip_norm_var, y=cls.global_norm_var)) + assert cls.scale_var.shape == (1L, ) + new_grad = layers.elementwise_mul(x=grad, y=cls.scale_var) return param, new_grad diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index dd3197fc0029bfc5ede6201b836edbcf57541206..a2055c5d7b844b492addbda220973d0c6c1a54e0 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -48,7 +48,7 @@ __all__ = [ 'mean', 'mul', 'reshape', 'scale', 'transpose', 'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div', 'elementwise_sub', 'elementwise_mul', 'elementwise_max', 'elementwise_min', - 'clip', 'clip_by_norm', 'sequence_softmax', 'reduce_sum' + 'clip', 'clip_by_norm', 'sequence_softmax' ] + __activations__ for _OP in set(__all__):