diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index e9f5c181a6b81dfced799be28af64f78b823f48b..5a9ea1a445e2dab0fc2a1122cedbda994db823dc 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -522,7 +522,9 @@ class ClipGradByGlobalNorm(ClipGradBase): # fp64 global_norm_var_other_dtype = layers.sums(sum_square_list) global_norm_var.append(global_norm_var_other_dtype) - global_norm_var = layers.sums(global_norm_var) + + global_norm_var = layers.sums(global_norm_var) if len( + global_norm_var) > 1 else global_norm_var[0] global_norm_var = layers.sqrt(x=global_norm_var) max_global_norm = layers.fill_constant( shape=[1], diff --git a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py index c462896eed22d1912d3acad76ca37d7c0d68231d..3b0df74d3e6b4b8efa2156ff4c6741f4ba91fa5c 100755 --- a/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_sharding_meta_optimizer.py @@ -266,10 +266,9 @@ class TestFleetShardingMetaOptimizer(TestFleetMetaOptimizer): 'c_reduce_sum', 'c_reduce_sum', 'c_reduce_sum', 'c_reduce_sum', 'c_reduce_sum', 'c_reduce_sum', 'c_sync_comm_stream', 'squared_l2_norm', 'squared_l2_norm', 'squared_l2_norm', 'sum', - 'c_allreduce_sum', 'sum', 'c_allreduce_sum', 'sqrt', - 'fill_constant', 'elementwise_max', 'elementwise_div', - 'elementwise_mul', 'elementwise_mul', 'elementwise_mul', 'momentum', - 'momentum', 'momentum' + 'c_allreduce_sum', 'sqrt', 'fill_constant', 'elementwise_max', + 'elementwise_div', 'elementwise_mul', 'elementwise_mul', + 'elementwise_mul', 'momentum', 'momentum', 'momentum' ]) def test_sharding_clone_for_test(self): diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py index 4360214e7ddbe616a2383907d8f638e1c445c1b4..e2050cf32dbddc72a420472b2addd274f7474300 100644 --- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py +++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py @@ -216,7 +216,7 @@ class TestGradientClipByGlobalNorm(TestGradientClip): def test_none_grad_fp32(self): ops = self._test_none_grad_helper("float32") self.assertListEqual(ops, [ - 'squared_l2_norm', 'squared_l2_norm', 'sum', 'sum', 'sqrt', + 'squared_l2_norm', 'squared_l2_norm', 'sum', 'sqrt', 'fill_constant', 'elementwise_max', 'elementwise_div', 'elementwise_mul', 'elementwise_mul' ]) @@ -225,9 +225,8 @@ class TestGradientClipByGlobalNorm(TestGradientClip): ops = self._test_none_grad_helper("float16") self.assertListEqual(ops, [ 'square', 'reduce_sum', 'square', 'reduce_sum', 'sum', 'cast', - 'sum', 'sqrt', 'fill_constant', 'elementwise_max', - 'elementwise_div', 'cast', 'elementwise_mul', 'cast', - 'elementwise_mul' + 'sqrt', 'fill_constant', 'elementwise_max', 'elementwise_div', + 'cast', 'elementwise_mul', 'cast', 'elementwise_mul' ]) def _test_none_grad_helper(self, dtype):