diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.h b/paddle/fluid/framework/details/multi_devices_graph_pass.h index 26fc8dc198645c5cdb2eca8c2a02978d784e7b50..7cc68dd2d5a422cfa1ac3a4bfdd48545a6e5691d 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.h +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.h @@ -132,8 +132,11 @@ class AsyncSSAGraphBuilder : public MultiDevSSAGraphBuilderBase { node->Op()->Flush(); } else if (node->Name() == "lookup_table" || node->Name() == "nce" || node->Name() == "hierarchical_sigmoid") { + // in async_mode, we do not need remote prefetch, because communicator + // will do async parameter recv. VLOG(1) << "set " << node->Name() << " op remote_prefetch to false"; node->Op()->SetAttr("remote_prefetch", false); + node->Op()->Flush(); } return false; } diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index cd752077d66f9b0292a634d42c64ecc4738c0007..6665458d4c8abd999385a7bf6bfd86c15e904b5f 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -52,6 +52,10 @@ class Scope { /// Mark it to const because that new kid scope cannot change parent scope. Scope& NewScope() const; + /// Create a sub-scope for current scope but do not record it in the kids to + /// avoid performance problems. + /// Note!!! You should delete the result pointer yourself to avoid memory + /// leak! Scope* NewTmpScope() const; /// Create a variable with given name if it doesn't exist. diff --git a/paddle/fluid/operators/distributed/parameter_send.cc b/paddle/fluid/operators/distributed/parameter_send.cc index ec2884c25290aa3cfd9818ead61119cc6c6b6feb..4858dbe84e05fe660c3b4d8fca19fffc49cee8f8 100644 --- a/paddle/fluid/operators/distributed/parameter_send.cc +++ b/paddle/fluid/operators/distributed/parameter_send.cc @@ -81,8 +81,8 @@ void ParameterSend::operator()(const RpcContext &rpc_ctx, auto abs_sections = ToAbsoluteSection(rpc_ctx.height_sections); auto &send_rows = send_slr.rows(); - std::vector> outs_rows_idx; - std::vector> outs_dense_idx; + std::vector> outs_rows_idx; + std::vector> outs_dense_idx; outs_rows_idx.resize(out_num); outs_dense_idx.resize(out_num); @@ -99,7 +99,7 @@ void ParameterSend::operator()(const RpcContext &rpc_ctx, // split rows index into output sparse vars for (size_t i = 0; i < send_rows.size(); ++i) { - int out_idx = GetSectionIndex(send_rows[i], abs_sections); + size_t out_idx = GetSectionIndex(send_rows[i], abs_sections); outs_rows_idx[out_idx].push_back(send_rows[i]); outs_dense_idx[out_idx].push_back(i); } @@ -160,10 +160,9 @@ void ParameterSend::operator()(const RpcContext &rpc_ctx, } } - // note!! only support sync send now - if (true || sync) { - for (size_t i = 0; i < rets.size(); i++) { - PADDLE_ENFORCE(rets[i]->Wait(), "internal error in RPCClient"); + if (sync) { + for (auto &handle : rets) { + PADDLE_ENFORCE(handle->Wait(), "internal error in RPCClient"); } } diff --git a/paddle/fluid/operators/distributed_ops/send_op.cc b/paddle/fluid/operators/distributed_ops/send_op.cc index 47688d0ad456873c93e9e7cdc1e550028347b052..b08cd0942f8c89b60d722c931d0cec2063b96578 100644 --- a/paddle/fluid/operators/distributed_ops/send_op.cc +++ b/paddle/fluid/operators/distributed_ops/send_op.cc @@ -52,7 +52,7 @@ class SendOp : public framework::OperatorBase { auto send_functor = distributed::ParameterSend(); auto rpc_ctx = distributed::RpcContext(ins[0], send_varnames, epmap, height_sections); - send_functor(rpc_ctx, scope, static_cast(sync_send)); + send_functor(rpc_ctx, scope, true); } else { distributed::Communicator::GetInstance()->Send(ins[0], scope); }