From e506c99c20dbe6220f37d3a0cab82b98b469cef3 Mon Sep 17 00:00:00 2001 From: chengduo <30176695+chengduoZH@users.noreply.github.com> Date: Wed, 11 Sep 2019 18:12:36 +0800 Subject: [PATCH] Open fuse broadcast option (#18833) * fix vlog level and fuse option type test=develop --- paddle/fluid/framework/details/broadcast_op_handle.cc | 3 +++ paddle/fluid/framework/details/build_strategy.cc | 10 +++++----- paddle/fluid/framework/details/build_strategy.h | 2 +- paddle/fluid/framework/ir/pass_builder.cc | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index 75143b9a1a0..afd0b70c29b 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -126,6 +126,9 @@ void BroadcastOpHandle::BroadcastOneVar( &VariableVisitor::GetMutableTensor(out_var)); } }); + for (auto &p : places_) { + nccl_ctxs_->DevCtx(p)->Wait(); + } #else PADDLE_THROW("CUDA is not enabled."); #endif diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index d14ed36e28a..216fb66c034 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -278,12 +278,12 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph, #else const bool use_cuda) const { #endif - VLOG(3) << "apply all passes"; + VLOG(1) << "apply all passes"; // Create a default one if not finalized by user. CreatePassesFromStrategy(false); for (std::shared_ptr &pass : pass_builder_->AllPasses()) { - VLOG(3) << "BuildStrategy::Apply pass:" << pass->Type(); + VLOG(1) << "BuildStrategy::Apply pass:" << pass->Type(); if (IsMultiDevPass(pass->Type())) { pass->Erase(kPlaces); pass->SetNotOwned>(kPlaces, &places); @@ -349,11 +349,11 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph, continue; } } - VLOG(3) << "Start Apply Pass " << pass->Type(); + VLOG(1) << "Start Apply Pass " << pass->Type(); graph = pass->Apply(graph); - VLOG(3) << "Finish Apply Pass " << pass->Type(); + VLOG(1) << "Finish Apply Pass " << pass->Type(); } - VLOG(3) << "All Passes Applied"; + VLOG(1) << "All Passes Applied"; return graph; } diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index 929cb51b845..47409b89bcf 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -98,7 +98,7 @@ struct BuildStrategy { // faster. Because fusing broadcast OP equals delaying the execution of all // broadcast Ops, in this case, all nccl streams are used only for reduce // operations for a period of time. - bool fuse_broadcast_ops_{false}; + bool fuse_broadcast_ops_{true}; // replace batch_norm with sync_batch_norm. bool sync_batch_norm_{false}; diff --git a/paddle/fluid/framework/ir/pass_builder.cc b/paddle/fluid/framework/ir/pass_builder.cc index 457de41c8f6..8355764aa6c 100644 --- a/paddle/fluid/framework/ir/pass_builder.cc +++ b/paddle/fluid/framework/ir/pass_builder.cc @@ -21,7 +21,7 @@ namespace framework { namespace ir { std::shared_ptr PassBuilder::AppendPass(const std::string& pass_type) { - VLOG(3) << "Append " << pass_type; + VLOG(1) << "Append " << pass_type; auto pass = ir::PassRegistry::Instance().Get(pass_type); passes_.emplace_back(pass.release()); return passes_.back(); -- GitLab