diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index bd6e25449f05f27cef04cf8f38a1b0b3a55d8da2..da44128cddd4642d7ef14d4fc475d274b082c5a9 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -99,7 +99,6 @@ class ListenAndServOp : public framework::OperatorBase { blk_ctx_list.push_back(nullptr); // block0 is not used. for (int blkid = 1; blkid < num_blocks; ++blkid) { auto *exe_ctx = executor.Prepare(*program, blkid); - VLOG(2) << "prepare ctx: " << exe_ctx; blk_ctx_list.push_back(exe_ctx); } @@ -149,6 +148,7 @@ class ListenAndServOp : public framework::OperatorBase { // should be global ops. // NOTE: if is_gpu_place, CUDA kernels are laugched by multiple threads // and this will still work. + std::vector> fs; // block0 contains only listen_and_serv op, start run from block1. for (int blkid = 1; blkid < num_blocks - 1; ++blkid) { @@ -156,13 +156,8 @@ class ListenAndServOp : public framework::OperatorBase { [&executor, &program, &recv_scope, &blk_ctx_list, blkid]() { int run_block = blkid; // thread local try { - VLOG(2) << "run ctx: " << blk_ctx_list[run_block] - << " block: " << run_block; executor.RunPreparedContext(blk_ctx_list[run_block], &recv_scope, false, false); - // executor.Run(*program, &recv_scope, run_block, - // false /*create_local_scope*/, - // false /*create_vars*/); } catch (std::exception &e) { LOG(ERROR) << "run sub program error " << e.what(); } @@ -174,8 +169,6 @@ class ListenAndServOp : public framework::OperatorBase { try { executor.RunPreparedContext(blk_ctx_list[num_blocks - 1], &recv_scope, false, false); - // executor.Run(*program, &recv_scope, num_blocks - 1, - // false /*create_local_scope*/, false /*create_vars*/); } catch (std::exception &e) { LOG(ERROR) << "run sub program error " << e.what(); } diff --git a/paddle/fluid/operators/send_op.cc b/paddle/fluid/operators/send_op.cc index 443f40e803ea31c3961ed77842bd0775e0f74f35..2df25ae5a6d7e94f6a4610a8ba5e106cbcd22ad2 100644 --- a/paddle/fluid/operators/send_op.cc +++ b/paddle/fluid/operators/send_op.cc @@ -66,6 +66,7 @@ class SendOp : public framework::OperatorBase { auto* client_var = scope.FindVar(client_var_name); detail::RPCClient* rpc_client = client_var->GetMutable(); + ctx.Wait(); // wait before sending for (size_t i = 0; i < ins.size(); i++) { if (NeedSend(scope, ins[i])) { VLOG(3) << "sending " << ins[i] << " to " << epmap[i];