未验证 提交 ffa88c31 编写于 作者: L lilong12 提交者: GitHub

fix the bug of all_reduce pipeline gradient multiple times (#30437) (#30595)

* update, test=develop
上级 d15e73b0
...@@ -233,6 +233,7 @@ class PipelineOptimizer(MetaOptimizerBase): ...@@ -233,6 +233,7 @@ class PipelineOptimizer(MetaOptimizerBase):
block = self.main_program_list[ring_id - 1]['program'].global_block() block = self.main_program_list[ring_id - 1]['program'].global_block()
origin_block = self.main_program.global_block() origin_block = self.main_program.global_block()
grad = None grad = None
processed_param_name = set()
for idx, op in reversed(list(enumerate(block.ops))): for idx, op in reversed(list(enumerate(block.ops))):
if is_backward_op(op) and \ if is_backward_op(op) and \
OP_ROLE_VAR_KEY in op.attr_names: OP_ROLE_VAR_KEY in op.attr_names:
...@@ -242,7 +243,10 @@ class PipelineOptimizer(MetaOptimizerBase): ...@@ -242,7 +243,10 @@ class PipelineOptimizer(MetaOptimizerBase):
assert len(op_role_var) % 2 == 0 assert len(op_role_var) % 2 == 0
offset = idx offset = idx
for i in range(0, len(op_role_var), 2): for i in range(0, len(op_role_var), 2):
param_name = op_role_var[i]
param = block.vars[op_role_var[i]] param = block.vars[op_role_var[i]]
if param_name in processed_param_name: continue
processed_param_name.add(param_name)
grad = block.vars[op_role_var[i + 1]] grad = block.vars[op_role_var[i + 1]]
origin_param = origin_block.vars[op_role_var[i]] origin_param = origin_block.vars[op_role_var[i]]
if origin_param.is_distributed: if origin_param.is_distributed:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册