From e5fef8f38aa3bde7c9dad282f21c7cf01a42f9d9 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Sat, 21 Mar 2020 20:32:23 -0500 Subject: [PATCH] [Dygraph double grad]Code polish (#23121) * fix dygraph double grad, test=develop * fix unpack constructor, test=develop --- paddle/fluid/imperative/layer.cc | 19 ++---- paddle/fluid/imperative/layer.h | 5 +- .../fluid/imperative/partial_grad_engine.cc | 67 +++++++++---------- 3 files changed, 43 insertions(+), 48 deletions(-) diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 3936435273..6f2e377feb 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -210,15 +210,10 @@ std::string LayerDebugString(const std::string& op_type, return LayerDebugStringImpl(op_type, ins, outs); } -VarBase::VarBase(bool has_grad, const std::shared_ptr& var) +VarBase::VarBase(const std::shared_ptr& var) : var_(var), grad_node_(var->GetGradNode()) { - if (has_grad) { - if (auto grad_var = var_->GetGradVar()) { - grad_var_ = std::make_shared(false, grad_var); - } else { - grad_var_ = std::make_shared(false, GradVarName()); - var_->SetGradVar(grad_var_->var_); - } + if (auto grad_var = var_->GetGradVar()) { + grad_var_ = std::make_shared(grad_var); } if (IsDebugEnabled()) { @@ -417,10 +412,10 @@ std::shared_ptr CreateGradOpNode( auto grad_node = info.dygraph_grad_op_maker_(op.Type(), ins, outs, attrs); if (grad_node && !grad_node->empty()) { - for (auto& op : *grad_node) { - op.SetId(OpBase::GenerateUniqueId()); - op.SetPlace(place); - ClearNoNeedBufferInputs(&op); + for (auto& grad_op : *grad_node) { + grad_op.SetId(OpBase::GenerateUniqueId()); + grad_op.SetPlace(place); + ClearNoNeedBufferInputs(&grad_op); } return grad_node; } else { diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 9a9f0f9bef..17a0b5e043 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -76,7 +76,8 @@ class VarBase { explicit VarBase(const std::string& name) : VarBase(true, name) {} // NOTE(zengjinle): be careful when you use this constructor!!! - explicit VarBase(bool has_grad, const std::shared_ptr& var); + // Unpack VarBase from VariableWrapper. + explicit VarBase(const std::shared_ptr& var); ~VarBase() { VLOG(10) << "Destruct VarBase: " << Name(); @@ -100,7 +101,7 @@ class VarBase { const std::shared_ptr& MutableGradVarBase() { if (grad_var_ == nullptr) { if (auto grad_var_wrapper = var_->GetGradVar()) { - grad_var_ = std::make_shared(false, grad_var_wrapper); + grad_var_ = std::make_shared(grad_var_wrapper); } else { grad_var_ = std::make_shared(false, GradVarName()); var_->SetGradVar(grad_var_->var_); diff --git a/paddle/fluid/imperative/partial_grad_engine.cc b/paddle/fluid/imperative/partial_grad_engine.cc index 99cabaf70b..597a9093cd 100644 --- a/paddle/fluid/imperative/partial_grad_engine.cc +++ b/paddle/fluid/imperative/partial_grad_engine.cc @@ -719,7 +719,7 @@ PartialGradTask::PartialGradTask( auto grad_accumulator_iter = grad_accumulators_.find(mapped_out_grad_var); if (grad_accumulator_iter == grad_accumulators_.end()) { ready_grad_vars_.Set(mapped_out_grad_var, - std::make_shared(false, out_grad_var)); + std::make_shared(out_grad_var)); VLOG(10) << "Fill 1.0f or user-provided gradient as ready var " << out_grad_var->Name(); } else { @@ -783,7 +783,7 @@ void PartialGradTask::RunEachOp(const OpBase *op) { if (!input_pair.second.IsGrad()) { for (auto &fwd_var : input_pair.second) { if (fwd_var) { - new_inputs.emplace_back(new VarBase(true, fwd_var)); + new_inputs.emplace_back(new VarBase(fwd_var)); VLOG(10) << "Unpacked forward var " << fwd_var->Name() << ", grad ops: " << GradOpTypes(*new_inputs.back()); } else { @@ -813,7 +813,7 @@ void PartialGradTask::RunEachOp(const OpBase *op) { for (auto &fwd_var : output_pair.second) { // unpack forward var if (fwd_var) { - new_outputs.emplace_back(new VarBase(true, fwd_var)); + new_outputs.emplace_back(new VarBase(fwd_var)); VLOG(10) << "Unpacked forward var " << fwd_var->Name(); } else { new_outputs.emplace_back(); @@ -878,44 +878,43 @@ void PartialGradTask::RunEachOp(const OpBase *op) { auto partial_grad_grads = accumulator_info->SumGradient( std::move(grad_var), op->id(), &is_finished); + if (!partial_grad_grads.empty()) { + auto sum_grad_var_grad = + accumulator_info->GradVarBase()->MutableGradVarBase(); + sum_grad_var_grad->SetOverridedStopGradient(false); + + auto assign_node = std::make_shared(); + sum_grad_var_grad->SetGradNode(assign_node); + + VLOG(10) << "Add " << partial_grad_grads.size() << " assign op for " + << sum_grad_var_grad->Name(); + + for (auto &grad_grad : partial_grad_grads) { + auto *assign_op = &(assign_node->emplace_back()); + assign_op->SetType("assign"); // Can use "scale" as static graph mode + assign_op->SetInput("X", {sum_grad_var_grad->SharedVar()}, true); + assign_op->SetOutput("Out", {grad_grad}, true); + assign_op->CheckAttrs(); + assign_op->SetId(OpBase::GenerateUniqueId()); + assign_op->SetPlace(op->place()); + + if (auto grad_pending_node = grad_grad->GetGradNode()) { + assign_node->InsertGradPendingNode(std::move(grad_pending_node)); + } + } + VLOG(10) << "Pending ops of assign is " + << GradPendingOpTypes(*assign_node); + double_grad_nodes_.emplace_back(assign_node); + } + if (is_finished) { VLOG(10) << "Sum has finished for " << accumulator_info->MappedGradVar()->Name() << " " << accumulator_info->GradVarBase(); ready_grad_vars_.Set(accumulator_info->MappedGradVar(), accumulator_info->GradVarBase()); + grad_accumulators_.erase(accumulator_info->MappedGradVar()); } - - if (partial_grad_grads.empty()) { - continue; - } - - auto sum_grad_var_grad = - accumulator_info->GradVarBase()->MutableGradVarBase(); - sum_grad_var_grad->SetOverridedStopGradient(false); - - auto assign_node = std::make_shared(); - sum_grad_var_grad->SetGradNode(assign_node); - - VLOG(10) << "Add " << partial_grad_grads.size() << " assign op for " - << sum_grad_var_grad->Name(); - - for (auto &grad_grad : partial_grad_grads) { - auto *assign_op = &(assign_node->emplace_back()); - assign_op->SetType("assign"); // Can use "scale" as static graph mode - assign_op->SetInput("X", {sum_grad_var_grad->SharedVar()}, true); - assign_op->SetOutput("Out", {grad_grad}, true); - assign_op->CheckAttrs(); - assign_op->SetId(OpBase::GenerateUniqueId()); - assign_op->SetPlace(op->place()); - - if (auto grad_pending_node = grad_grad->GetGradNode()) { - assign_node->InsertGradPendingNode(std::move(grad_pending_node)); - } - } - VLOG(10) << "Pending ops of assign is " << GradPendingOpTypes(*assign_node); - grad_accumulators_.erase(accumulator_info->MappedGradVar()); - double_grad_nodes_.emplace_back(assign_node); } grads_to_accumulate_.clear(); -- GitLab