diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 4ea2c3e0554c6bda3fe54aaa95f695122b1c5b9a..27708ffbe44d38aaa6c504c766e6a72917cf45cc 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -163,7 +163,8 @@ void ListenAndServOp::RunSyncLoop( } void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, - framework::ProgramDesc *program) const { + framework::ProgramDesc *program, + framework::Scope *recv_scope) const { // grad name to block id std::unordered_map grad_to_block_id; std::unordered_map id_to_grad; @@ -190,6 +191,10 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, block_list.push_back(blkid); } auto optimize_prepared = executor->Prepare(*program, block_list); + // execute global block if needed + if (block_list[0] == 1 && id_to_grad.count(1) == 0) { + executor->RunPreparedContext(optimize_prepared[0].get(), recv_scope); + } std::unordered_map> grad_to_prepared_ctx; @@ -317,7 +322,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, if (sync_mode) { RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list); } else { - RunAsyncLoop(&executor, program); + RunAsyncLoop(&executor, program, &recv_scope); } } diff --git a/paddle/fluid/operators/listen_and_serv_op.h b/paddle/fluid/operators/listen_and_serv_op.h index 634c1b4f4b541be9f4950a9ef48f944863486705..f856fdc376196c736593eb8f941c1197d043f7c5 100644 --- a/paddle/fluid/operators/listen_and_serv_op.h +++ b/paddle/fluid/operators/listen_and_serv_op.h @@ -50,7 +50,8 @@ class ListenAndServOp : public framework::OperatorBase { const std::vector& prefetch_block_id_list) const; void RunAsyncLoop(framework::Executor* executor, - framework::ProgramDesc* program) const; + framework::ProgramDesc* program, + framework::Scope* recv_scope) const; void SavePort() const; diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 930cdabf117d6b591b682048720f6238d50690f9..20c7ea26b2b0ce2f664bc242f4ab4cbdde5c84e8 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -1299,16 +1299,6 @@ class DistributeTranspiler(object): ufind.union(op1, op2) return ufind - def _is_opt_role_op(self, op): - # NOTE: depend on oprole to find out whether this op is for - # optimize - op_maker = core.op_proto_and_checker_maker - optimize_role = core.op_proto_and_checker_maker.OpRole.Optimize - if op_maker.kOpRoleAttrName() in op.attrs and \ - int(op.attrs[op_maker.kOpRoleAttrName()]) == int(optimize_role): - return True - return False - def _is_optimizer_op(self, op): if "Param" in op.input_names and \ "LearningRate" in op.input_names: @@ -1399,7 +1389,10 @@ class DistributeTranspiler(object): params_grads = [] origin_var_dict = self.origin_program.global_block().vars for op in block.ops: - if self._is_opt_role_op(op): + # NOTE(Yancey1989): we can not use op role to distinguish an optimizer op + # or not, because all ops in optimizer sub-graph would + # sign the optimizer op role + if self._is_optimizer_op(op): opt_ops.append(op) # HACK(wuyi): if we find grad vars from input of optimize # ops, we may get the output of clip op. Use syntax "@GRAD"