From 7a395881d42017dd7ee32bcfa1e744708ed64c3c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 23 Apr 2018 14:29:24 +0800 Subject: [PATCH] Add customize_loss_grad option to PE --- .../framework/details/multi_devices_graph_builder.cc | 9 ++++++--- .../framework/details/multi_devices_graph_builder.h | 3 +++ paddle/fluid/framework/parallel_executor.cc | 12 +++++++----- paddle/fluid/framework/parallel_executor.h | 2 +- paddle/fluid/pybind/pybind.cc | 10 +++++----- python/paddle/fluid/parallel_executor.py | 6 ++++-- 6 files changed, 26 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index 002952436e5..f27f1843107 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -34,7 +34,7 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶ms, - const std::vector &local_scopes, + const std::vector &local_scopes, bool skip_scale_loss, platform::NCCLContextMap *nccl_ctxs) : loss_var_name_(loss_var_name), places_(places), @@ -44,7 +44,7 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( const std::vector &places, const std::string &loss_var_name, - const std::unordered_set ¶ms, + const std::unordered_set ¶ms, bool skip_scale_loss, const std::vector &local_scopes) : loss_var_name_(loss_var_name), places_(places), @@ -53,6 +53,7 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( for (auto &p : params) { grad_names_.insert(GradVarName(p)); } + skip_scale_loss_ = skip_scale_loss; } void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, @@ -95,7 +96,9 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( // always use the first device CreateSendOp(&result, *op); } else if (IsScaleLossOp(*op)) { - CreateScaleLossGradOp(&result); + if (!skip_scale_loss_) { + CreateScaleLossGradOp(&result); + } is_forwarding = false; } else { CreateComputationalOps(&result, *op); diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h index b5ba2dbd3c0..f2428b01ca9 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.h +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h @@ -34,11 +34,13 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { const std::string &loss_var_name, const std::unordered_set ¶ms, const std::vector &local_scopes, + bool skip_scale_loss, platform::NCCLContextMap *nccl_ctxs); #else MultiDevSSAGraphBuilder(const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶ms, + bool skip_scale_loss, const std::vector &local_scopes); #endif @@ -57,6 +59,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { #ifdef PADDLE_WITH_CUDA platform::NCCLContextMap *nccl_ctxs_; #endif + bool skip_scale_loss_; bool IsScaleLossOp(const OpDesc &op) const; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 67e02e2f119..a673fa52880 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -57,7 +57,8 @@ ParallelExecutor::ParallelExecutor( const std::unordered_set ¶ms, const std::unordered_set &bcast_vars, const ProgramDesc &main_program, const std::string &loss_var_name, - Scope *scope, const std::vector &local_scopes, bool allow_op_delay) + Scope *scope, const std::vector &local_scopes, bool allow_op_delay, + bool customize_scale_loss) : member_(new ParallelExecutorPrivate(places)) { member_->global_scope_ = scope; @@ -90,12 +91,13 @@ ParallelExecutor::ParallelExecutor( // Step 2. Convert main_program to SSA form and dependency graph. Also, insert // ncclOp #ifdef PADDLE_WITH_CUDA - details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, - params, member_->local_scopes_, - member_->nccl_ctxs_.get()); + details::MultiDevSSAGraphBuilder builder( + member_->places_, loss_var_name, params, member_->local_scopes_, + customize_scale_loss, member_->nccl_ctxs_.get()); #else details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, - params, member_->local_scopes_); + params, member_->local_scopes_, + customize_scale_loss); #endif auto graph = builder.Build(main_program); diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index f4f283bb4b5..49da123d981 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -40,7 +40,7 @@ class ParallelExecutor { const ProgramDesc& main_program, const std::string& loss_var_name, Scope* scope, const std::vector& local_scopes, - bool allow_op_delay); + bool allow_op_delay, bool customize_scale_loss); ~ParallelExecutor(); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 1f21e7abe76..b20b514fcdd 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -502,11 +502,11 @@ All parameter, weight, gradient are variables in Paddle. const std::unordered_set &bcast_vars, const ProgramDesc &main_program, const std::string &loss_var_name, Scope *scope, std::vector &local_scopes, - bool allow_op_delay) { - new (&self) - ParallelExecutor(num_threads, use_event, places, params, - bcast_vars, main_program, loss_var_name, - scope, local_scopes, allow_op_delay); + bool allow_op_delay, bool customize_loss_grad) { + new (&self) ParallelExecutor(num_threads, use_event, places, + params, bcast_vars, main_program, + loss_var_name, scope, local_scopes, + allow_op_delay, customize_loss_grad); }) .def("bcast_params", &ParallelExecutor::BCastParamsToGPUs) // NOTE: even we return a vec* to Python use reference policy. diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index fbdd6fd4496..364a3eba747 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -29,7 +29,8 @@ class ParallelExecutor(object): main_program=None, num_threads=None, allow_op_delay=False, - share_vars_from=None): + share_vars_from=None, + customize_loss_grad=False): """ ParallelExecutor can run program in parallel. @@ -122,7 +123,8 @@ class ParallelExecutor(object): loss_name if loss_name else '', scope, local_scopes, - allow_op_delay) + allow_op_delay, + customize_loss_grad) self.scope = scope def run(self, fetch_list, feed=None, feed_dict=None): -- GitLab