diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index c966f97c2d5b553f6ab67bb2f7aac27108b80409..1e20789a1f1b520e33c99b0f8740fbbcf2e792fa 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -28,15 +28,15 @@ namespace paddle { namespace framework { static inline std::unique_ptr CreateGradOp( - const OperatorBase& op, - const std::unordered_set& no_grad_set) { + const OperatorBase& op, const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var) { OpDescBind op_desc; op_desc.SetInputMap(op.Inputs()); op_desc.SetOutputMap(op.Outputs()); op_desc.SetType(op.Type()); op_desc.SetAttrMap(op.Attrs()); auto& info = OpInfoMap::Instance().Get(op.Type()); - auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set); + auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set, grad_to_var); std::vector> grad_ops; grad_ops.reserve(grad_descs.size()); std::transform(grad_descs.begin(), grad_descs.end(), @@ -99,7 +99,9 @@ static std::unique_ptr NOP() { // See Backward.h for details static std::unique_ptr BackwardRecursive( const OperatorBase& forwardOp, - std::unordered_set& no_grad_names, size_t& uniq_id) { + std::unordered_set& no_grad_names, + std::unordered_map* grad_to_var, + size_t& uniq_id) { // If all input gradients of forwarding operator do not need to calculate, // just return an NOP. Not return null ptr because NOP does not take // too much time for calculation, but it is useful for simplifying logic. @@ -137,7 +139,7 @@ static std::unique_ptr BackwardRecursive( for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend(); ++it, ++local_op_id) { auto& fwd = *it; - auto bwd = BackwardRecursive(*fwd, no_grad_names, uniq_id); + auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id); ForEachVarName(bwd->Outputs(), [&dup_output_ops, local_op_id](const std::string& out) { dup_output_ops[out].emplace_back(local_op_id); @@ -189,7 +191,7 @@ static std::unique_ptr BackwardRecursive( } } else { std::unique_ptr grad_op( - CreateGradOp(forwardOp, no_grad_names)); + CreateGradOp(forwardOp, no_grad_names, grad_to_var)); ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op]( const std::string& grad_input) { @@ -228,7 +230,7 @@ static std::unique_ptr BackwardRecursive( *static_cast(&rnnop.stepnet()); // create stepnet's gradient op rnn_grad_op->set_stepnet( - BackwardRecursive(stepnet_op, no_grad_names, uniq_id)); + BackwardRecursive(stepnet_op, no_grad_names, grad_to_var, uniq_id)); } if (net->ops_.empty()) { // Current no aux op is added to network @@ -255,7 +257,8 @@ std::unique_ptr Backward( no_grad_names.insert(name + kGradVarSuffix); } size_t uid = 0; - return BackwardRecursive(forwardOp, no_grad_names, uid); + std::unordered_map grad_to_var; + return BackwardRecursive(forwardOp, no_grad_names, &grad_to_var, uid); } // ==================================== // @@ -272,30 +275,31 @@ static bool AllGradInSet(const std::vector& names, std::vector> MakeOpGrad( const std::unique_ptr& op_desc, - std::unordered_set& no_grad_vars) { + std::unordered_set* no_grad_vars, + std::unordered_map* grad_to_var) { std::vector> grad_op_descs; // All input gradients of forwarding operator do not need to calculate. const std::vector& inputs = op_desc->InputArgumentNames(); - if (AllGradInSet(inputs, no_grad_vars)) { + if (AllGradInSet(inputs, *no_grad_vars)) { return grad_op_descs; // empty vector } // All output gradients of forwarding operator do not need to calculate. const std::vector& outputs = op_desc->OutputArgumentNames(); - if (AllGradInSet(outputs, no_grad_vars)) { + if (AllGradInSet(outputs, *no_grad_vars)) { for (const std::string& name : inputs) { - no_grad_vars.insert(GradVarName(name)); + no_grad_vars->insert(GradVarName(name)); } return grad_op_descs; // empty vector } grad_op_descs = OpInfoMap::Instance() .Get(op_desc->Type()) - .GradOpMaker()(*op_desc, no_grad_vars); + .GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var); std::list> pending_fill_zeros_ops; for (auto& desc : grad_op_descs) { for (const std::string& in_name : desc->InputArgumentNames()) { - if (no_grad_vars.count(in_name)) { + if (no_grad_vars->count(in_name)) { std::string prefix = in_name.substr( 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); std::string new_name = prefix + kZeroVarSuffix; @@ -315,7 +319,8 @@ std::vector> MakeOpGrad( std::vector> MakeBlockBackward( ProgramDescBind& program_desc, int block_idx, - std::unordered_set& no_grad_vars) { + std::unordered_set* no_grad_vars, + std::unordered_map* grad_to_var) { BlockDescBind* cur_block = program_desc.Block(block_idx); std::deque>& op_descs = cur_block->ops_; std::unordered_map> dup_out_ops; @@ -323,15 +328,15 @@ std::vector> MakeBlockBackward( std::vector> backward_descs; for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { std::vector> op_grads = - MakeOpGrad(*it, no_grad_vars); + MakeOpGrad(*it, no_grad_vars, grad_to_var); if ((*it)->Type() == "recurrent") { PADDLE_ENFORCE_EQ( op_grads.size(), size_t(1), "rnn_op's gradient process should contain only one op."); int step_block_idx = (*it)->GetBlockAttr("stop_block"); - auto backward_block_op_descs = - MakeBlockBackward(program_desc, step_block_idx, no_grad_vars); + auto backward_block_op_descs = MakeBlockBackward( + program_desc, step_block_idx, no_grad_vars, grad_to_var); BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block); for (auto& ptr : backward_block_op_descs) { backward_block->ops_.push_back(std::move(ptr)); @@ -387,8 +392,9 @@ void AppendBackward(ProgramDescBind& program_desc, no_grad_var_names.insert(GradVarName(name)); } const int root_block_idx = 0; - auto backward_op_descs = - MakeBlockBackward(program_desc, root_block_idx, no_grad_var_names); + std::unordered_map grad_to_var; + auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx, + &no_grad_var_names, &grad_to_var); auto& forw_op_descs = program_desc.Block(root_block_idx)->ops_; for (auto& ptr : backward_op_descs) { forw_op_descs.push_back(std::move(ptr)); diff --git a/paddle/framework/block_desc.h b/paddle/framework/block_desc.h index 3437e89923da8de79eeaa88d0466cf7eb0b5926d..9d453e1d6f42077df3886d9645e1ab59eaf1aa1d 100644 --- a/paddle/framework/block_desc.h +++ b/paddle/framework/block_desc.h @@ -35,7 +35,8 @@ class BlockDescBind { public: friend std::vector> MakeBlockBackward( ProgramDescBind &program_desc, int block_idx, - std::unordered_set &no_grad_vars); + std::unordered_set *no_grad_vars, + std::unordered_map *grad_to_var); friend void AppendBackward( ProgramDescBind &program_desc, diff --git a/paddle/framework/details/op_registry.h b/paddle/framework/details/op_registry.h index ca8584b78ab081138e0d73b8a71ae4cc111a1b4c..ed7c5f17b0854809bde923276f36440cce193a88 100644 --- a/paddle/framework/details/op_registry.h +++ b/paddle/framework/details/op_registry.h @@ -99,8 +99,9 @@ struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { info->grad_op_maker_ = []( const OpDescBind& fwd_op, - const std::unordered_set& no_grad_set) { - T maker(fwd_op, no_grad_set); + const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var) { + T maker(fwd_op, no_grad_set, grad_to_var); return maker(); }; } diff --git a/paddle/framework/grad_op_desc_maker.h b/paddle/framework/grad_op_desc_maker.h index d7366b11ec94403e0d8d5d8a3485896f0dc691c0..1219e0487531b19b00adde5a9aa2bde51bfc0aa8 100644 --- a/paddle/framework/grad_op_desc_maker.h +++ b/paddle/framework/grad_op_desc_maker.h @@ -25,8 +25,9 @@ class GradOpDescMakerBase { public: explicit GradOpDescMakerBase( const OpDescBind& fwd_op, - const std::unordered_set& no_grad_set) - : fwd_op_(fwd_op), no_grad_set_(no_grad_set) {} + const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var) + : fwd_op_(fwd_op), no_grad_set_(no_grad_set), grad_to_var_(grad_to_var) {} virtual ~GradOpDescMakerBase() = default; virtual std::vector> operator()() const = 0; @@ -37,12 +38,17 @@ class GradOpDescMakerBase { std::vector ret_val; auto var_names = this->Input(name); ret_val.reserve(var_names.size()); - std::transform( - var_names.begin(), var_names.end(), std::back_inserter(ret_val), - [this](const std::string& fwd_var_name) -> std::string { - auto g_name = GradVarName(fwd_var_name); - return no_grad_set_.count(g_name) == 0 ? g_name : kEmptyVarName; - }); + std::transform(var_names.begin(), var_names.end(), + std::back_inserter(ret_val), + [this](const std::string& fwd_var_name) -> std::string { + auto g_name = GradVarName(fwd_var_name); + if (no_grad_set_.count(g_name)) { + return kEmptyVarName; + } else { + (*this->grad_to_var_)[g_name] = fwd_var_name; + return g_name; + } + }); if (!drop_empty_grad) { return ret_val; } @@ -95,6 +101,7 @@ class GradOpDescMakerBase { private: const OpDescBind& fwd_op_; const std::unordered_set& no_grad_set_; + std::unordered_map* grad_to_var_; }; class SingleGradOpDescMaker : public GradOpDescMakerBase { diff --git a/paddle/framework/type_defs.h b/paddle/framework/type_defs.h index 7e1b79c97b5b4c3f0292fbfa205a8ca541702fbc..0d1564a7510ddf0106ff417fb0b487ddbde1ac2e 100644 --- a/paddle/framework/type_defs.h +++ b/paddle/framework/type_defs.h @@ -37,7 +37,8 @@ using OpCreator = std::function; using GradOpMakerFN = std::function>( - const OpDescBind&, const std::unordered_set& /*no_grad_set*/)>; + const OpDescBind&, const std::unordered_set& /*no_grad_set*/, + std::unordered_map* /*grad_to_var*/)>; } // namespace framework } // namespace paddle