未验证 提交 e64105f6 编写于 作者: R Roc 提交者: GitHub

[hybrid npu] fix npu found_finite in hybrid (#35134) (#35291)

Co-authored-by: NWangXi <wangxi16@baidu.com>
上级 6fb58aef
......@@ -369,8 +369,11 @@ class ShardingOptimizer(MetaOptimizerBase):
# FIXME(wangxi): mp should prune duplicated param_grads when calc
# amp inf_var & clip global_norm_var
FP16Utils.sync_amp_check_nan_inf(main_block,
[self.mp_ring_id, self.pp_ring_id])
rings = [self.mp_ring_id, self.pp_ring_id]
# FIXME(wangxi): some problem with NPU found_finite, need sync with DP
if core.is_compiled_with_npu():
rings += [self.dp_ring_id]
FP16Utils.sync_amp_check_nan_inf(main_block, rings)
gradientclip_helper = GradientClipHelper(None)
gradientclip_helper.sync_global_norm(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册