From 86e09b34e3636e7375bab4748660927b1ad33ddf Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Tue, 26 Jun 2018 09:33:02 +0800 Subject: [PATCH] fix asyn update error on pserver --- paddle/fluid/operators/listen_and_serv_op.cc | 9 +++++++-- paddle/fluid/operators/listen_and_serv_op.h | 3 ++- .../fluid/transpiler/distribute_transpiler.py | 15 ++++----------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index d98bf807a9..0bcb1cee32 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -163,7 +163,8 @@ void ListenAndServOp::RunSyncLoop( } void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, - framework::ProgramDesc *program) const { + framework::ProgramDesc *program, + framework::Scope *recv_scope) const { VLOG(3) << "RunAsyncLoop in"; // grad name to block id std::unordered_map grad_to_block_id; @@ -191,6 +192,10 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, block_list.push_back(blkid); } auto optimize_prepared = executor->Prepare(*program, block_list); + // execute global block if needed + if (block_list[0] == 1 && id_to_grad.count(1) == 0) { + executor->RunPreparedContext(optimize_prepared[0].get(), recv_scope); + } std::unordered_map> grad_to_prepared_ctx; @@ -319,7 +324,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, if (sync_mode) { RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list); } else { - RunAsyncLoop(&executor, program); + RunAsyncLoop(&executor, program, &recv_scope); } } diff --git a/paddle/fluid/operators/listen_and_serv_op.h b/paddle/fluid/operators/listen_and_serv_op.h index 634c1b4f4b..f856fdc376 100644 --- a/paddle/fluid/operators/listen_and_serv_op.h +++ b/paddle/fluid/operators/listen_and_serv_op.h @@ -50,7 +50,8 @@ class ListenAndServOp : public framework::OperatorBase { const std::vector& prefetch_block_id_list) const; void RunAsyncLoop(framework::Executor* executor, - framework::ProgramDesc* program) const; + framework::ProgramDesc* program, + framework::Scope* recv_scope) const; void SavePort() const; diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index bb61f82a9c..68b0b1ff4c 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -1299,16 +1299,6 @@ class DistributeTranspiler(object): ufind.union(op1, op2) return ufind - def _is_opt_role_op(self, op): - # NOTE: depend on oprole to find out whether this op is for - # optimize - op_maker = core.op_proto_and_checker_maker - optimize_role = core.op_proto_and_checker_maker.OpRole.Optimize - if op_maker.kOpRoleAttrName() in op.attrs and \ - int(op.attrs[op_maker.kOpRoleAttrName()]) == int(optimize_role): - return True - return False - def _is_optimizer_op(self, op): if "Param" in op.input_names and \ "LearningRate" in op.input_names: @@ -1399,7 +1389,10 @@ class DistributeTranspiler(object): params_grads = [] origin_var_dict = self.origin_program.global_block().vars for op in block.ops: - if self._is_opt_role_op(op): + # NOTE(Yancey1989): we can not use op role to distinguish an optimizer op + # or not, because all ops in optimizer sub-graph would + # sign the optimizer op role + if self._is_optimizer_op(op): opt_ops.append(op) # HACK(wuyi): if we find grad vars from input of optimize # ops, we may get the output of clip op. Use syntax "@GRAD" -- GitLab