From 598d32d664ba251abb2d8624e4984e42bdd475cb Mon Sep 17 00:00:00 2001 From: WangXi Date: Tue, 14 Sep 2021 14:26:02 +0800 Subject: [PATCH] fix GradientClipByGlobalNorm in hybrid parallel (#35691) --- python/paddle/fluid/clip.py | 4 +++- .../tests/unittests/test_fleet_sharding_meta_optimizer.py | 7 +++---- python/paddle/fluid/tests/unittests/test_gradient_clip.py | 7 +++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index e9f5c181a6..5a9ea1a445 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -522,7 +522,9 @@ class ClipGradByGlobalNorm(ClipGradBase): # fp64 global_norm_var_other_dtype = layers.sums(sum_square_list) global_norm_var.append(global_norm_var_other_dtype) - global_norm_var = layers.sums(global_norm_var) + + global_norm_var = layers.sums(global_norm_var) if len( + global_norm_var) > 1 else global_norm_var[0] global_norm_var = layers.sqrt(x=global_norm_var) max_global_norm = layers.fill_constant( shape=[1], diff --git a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py index c462896eed..3b0df74d3e 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py @@ -266,10 +266,9 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'c_reduce_sum', 'c_reduce_sum', 'c_reduce_sum', 'c_reduce_sum', 'c_reduce_sum', 'c_reduce_sum', 'c_sync_comm_stream', 'squared_l2_norm', 'squared_l2_norm', 'squared_l2_norm', 'sum', - 'c_allreduce_sum', 'sum', 'c_allreduce_sum', 'sqrt', - 'fill_constant', 'elementwise_max', 'elementwise_div', - 'elementwise_mul', 'elementwise_mul', 'elementwise_mul', 'momentum', - 'momentum', 'momentum' + 'c_allreduce_sum', 'sqrt', 'fill_constant', 'elementwise_max', + 'elementwise_div', 'elementwise_mul', 'elementwise_mul', + 'elementwise_mul', 'momentum', 'momentum', 'momentum' ]) def test_sharding_clone_for_test(self): diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py index 4360214e7d..e2050cf32d 100644 --- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py +++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py @@ -216,7 +216,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip): def test_none_grad_fp32(self): ops = self._test_none_grad_helper("float32") self.assertListEqual(ops, [ - 'squared_l2_norm', 'squared_l2_norm', 'sum', 'sum', 'sqrt', + 'squared_l2_norm', 'squared_l2_norm', 'sum', 'sqrt', 'fill_constant', 'elementwise_max', 'elementwise_div', 'elementwise_mul', 'elementwise_mul' ]) @@ -225,9 +225,8 @@ class TestGradientClipByGlobalNorm(TestGradientClip): ops = self._test_none_grad_helper("float16") self.assertListEqual(ops, [ 'square', 'reduce_sum', 'square', 'reduce_sum', 'sum', 'cast', - 'sum', 'sqrt', 'fill_constant', 'elementwise_max', - 'elementwise_div', 'cast', 'elementwise_mul', 'cast', - 'elementwise_mul' + 'sqrt', 'fill_constant', 'elementwise_max', 'elementwise_div', + 'cast', 'elementwise_mul', 'cast', 'elementwise_mul' ]) def _test_none_grad_helper(self, dtype): -- GitLab