From 17bd776e599c8d28ae2a8f1eb6343a83cdbcb540 Mon Sep 17 00:00:00 2001 From: WangXi Date: Wed, 17 Mar 2021 07:24:37 +0000 Subject: [PATCH] GlobalNormClip use inplace mul --- .../meta_optimizers/sharding/gradient_clip_helper.py | 3 ++- python/paddle/fluid/clip.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py b/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py index 340eff46f73..4796ad2f1f3 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py @@ -111,6 +111,7 @@ class GradientClipHelper(object): to_check_param - should_check_param) for var_name in deperated_vars: - block._remove_var(var_name, sync=False) + if block.has_var(var_name): + block._remove_var(var_name, sync=False) block._sync_with_cpp() return diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 8fd01509331..5e4ea24137e 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -489,9 +489,14 @@ class ClipGradByGlobalNorm(ClipGradBase): continue with p.block.program._optimized_guard([p, g]): - new_grad = layers.elementwise_mul(x=g, y=scale_var) - param_new_grad_name_dict[p.name] = new_grad.name - params_and_grads.append((p, new_grad)) + p.block.append_op( + type='elementwise_mul', + inputs={'X': g, + 'Y': scale_var}, + outputs={'Out': g}) + + param_new_grad_name_dict[p.name] = p.name + params_and_grads.append((p, p)) _correct_clip_op_role_var(params_and_grads, param_new_grad_name_dict) return params_and_grads -- GitLab