From 63222501f03fc777240b238a581ebe12f6f6d757 Mon Sep 17 00:00:00 2001 From: Qingsheng Li Date: Fri, 28 Sep 2018 16:16:37 +0800 Subject: [PATCH] [Do not merge] Fix global gradient clip by Yu Yang (#13516) * Yuyang fix global gradient clip * Share LoDs * Revert unnecessary changes * Fix bug in sequence_slice_op --- python/paddle/fluid/clip.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index e884185528..4c24d0d6a7 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -271,7 +271,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): "All parameters' 'clip_norm' of a same group should be the same" ) - local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0)) + square = grad * grad + local_norm_var = layers.cast(layers.reduce_sum(input=square), 'float64') context[self.group_name].append(local_norm_var) self.context = context @@ -281,6 +282,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): if group_scale_name not in self.context: group_norm_var = layers.sums(input=self.context[self.group_name]) group_norm_var = layers.sqrt(x=group_norm_var) + group_norm_var = layers.cast(group_norm_var, 'float32') clip_var = self.context[self.group_name + "_clip"] group_scale_var = layers.elementwise_div( x=clip_var, -- GitLab