From f609ca379173de0be1e288a1832fd074b6c61587 Mon Sep 17 00:00:00 2001 From: WangXi Date: Wed, 25 Aug 2021 13:09:41 +0800 Subject: [PATCH] [hybrid npu] fix npu found_finite in hybrid (#35134) --- .../fleet/meta_optimizers/sharding_optimizer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py index c94bd572f05..a76a70cdcab 100755 --- a/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py @@ -371,8 +371,11 @@ class ShardingOptimizer(MetaOptimizerBase): # FIXME(wangxi): mp should prune duplicated param_grads when calc # amp inf_var & clip global_norm_var - FP16Utils.sync_amp_check_nan_inf(main_block, - [self.mp_ring_id, self.pp_ring_id]) + rings = [self.mp_ring_id, self.pp_ring_id] + # FIXME(wangxi): some problem with NPU found_finite, need sync with DP + if core.is_compiled_with_npu(): + rings += [self.dp_ring_id] + FP16Utils.sync_amp_check_nan_inf(main_block, rings) gradientclip_helper = GradientClipHelper(None) gradientclip_helper.sync_global_norm( -- GitLab