From 2bee99df712080b4412dd8ecb0e7f8cd310fb60a Mon Sep 17 00:00:00 2001 From: Jiabin Yang <360788950@qq.com> Date: Fri, 29 Apr 2022 09:45:10 +0800 Subject: [PATCH] Using small vector for slot and merge edge into grad_slot_meta (#42350) --- .../fluid/distributed/collective/reducer.cc | 10 +- .../eager/accumulation/accumulation_node.cc | 16 +- .../eager/accumulation/accumulation_node.h | 9 +- paddle/fluid/eager/amp_utils.h | 8 +- .../eager_generated/backwards/scale_node.cc | 21 +- .../eager_generated/backwards/scale_node.h | 11 +- .../eager_generated/forwards/scale.cc | 3 - paddle/fluid/eager/api/utils/global_utils.h | 3 +- .../auto_code_generator/eager_generator.cc | 31 ++- .../final_state_generator/eager_gen.py | 17 +- paddle/fluid/eager/backward.cc | 52 +++-- .../custom_operator/custom_operator_node.cc | 31 +-- .../custom_operator/custom_operator_node.h | 11 +- paddle/fluid/eager/grad_node_info.cc | 144 +++++-------- paddle/fluid/eager/grad_node_info.h | 201 +++++++++--------- paddle/fluid/eager/grad_tensor_holder.h | 14 +- paddle/fluid/eager/pylayer/py_layer_node.cc | 20 +- paddle/fluid/eager/pylayer/py_layer_node.h | 9 +- paddle/fluid/eager/tensor_wrapper.h | 2 +- .../accumulation_node_test.cc | 8 +- .../grad_node_info_test.cc | 23 +- .../data_structure_tests/grad_node_test.h | 13 +- .../grad_tensor_holder_test.cc | 4 +- .../eager/tests/task_tests/backward_test.cc | 47 ++-- .../cross_batch_accumulation_test.cc | 2 +- .../tests/task_tests/eager_utils_test.cc | 9 +- .../tests/task_tests/forward_autograd_test.cc | 27 ++- .../fluid/eager/tests/task_tests/grad_test.cc | 46 ++-- .../fluid/eager/tests/task_tests/hook_test.cc | 25 +-- .../eager/to_static/run_program_op_func.h | 3 - .../eager/to_static/run_program_op_node.h | 14 +- paddle/fluid/eager/utils.cc | 6 +- paddle/fluid/eager/utils.h | 6 +- paddle/fluid/pybind/eager_functions.cc | 3 - paddle/fluid/pybind/eager_py_layer.cc | 2 - 35 files changed, 432 insertions(+), 419 deletions(-) diff --git a/paddle/fluid/distributed/collective/reducer.cc b/paddle/fluid/distributed/collective/reducer.cc index 75153df936..a7c3e2208a 100644 --- a/paddle/fluid/distributed/collective/reducer.cc +++ b/paddle/fluid/distributed/collective/reducer.cc @@ -447,10 +447,12 @@ void EagerReducer::TraverseBackwardGraph(const std::vector &outputs) { while (!queue.empty()) { egr::GradNodeBase *node = queue.front(); queue.pop(); - const std::vector> &edges = node->GetEdges(); - for (size_t i = 0; i < edges.size(); i++) { - for (size_t j = 0; j < edges[i].size(); j++) { - const egr::Edge &edge = edges[i][j]; + const paddle::small_vector, + egr::kSlotSmallVectorSize> &metas = + node->OutputMeta(); + for (size_t i = 0; i < metas.size(); i++) { + for (size_t j = 0; j < metas[i].size(); j++) { + const egr::Edge &edge = metas[i][j].GetEdge(); auto next_node_shared = edge.GetMutableGradNode(); if (!next_node_shared || !next_node_shared.get()) { continue; diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index 802c28d7d3..08e8f2baef 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -38,10 +38,13 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, } } -std::vector> GradNodeAccumulation:: -operator()( - std::vector>& grads, // NOLINT - bool create_graph, bool is_new_grad) { +paddle::small_vector, + kSlotSmallVectorSize> +GradNodeAccumulation::operator()( + paddle::small_vector, + kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph, + bool is_new_grad) { VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation"; PADDLE_ENFORCE(grads.size() == 1, paddle::platform::errors::Fatal( @@ -56,8 +59,9 @@ operator()( // Apply Gradient Hooks paddle::experimental::Tensor grad_out; if (GradientHooksRegistered()) { - std::vector> hooked_grads = - ApplyGradientHooks(grads); + paddle::small_vector, + kSlotSmallVectorSize> + hooked_grads = ApplyGradientHooks(grads); grad_out = hooked_grads[0][0]; } else { grad_out = grads[0][0]; diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h index dbf518252e..f37de9c8e8 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.h +++ b/paddle/fluid/eager/accumulation/accumulation_node.h @@ -37,9 +37,12 @@ class GradNodeAccumulation : public GradNodeBase { } // Functor: perform backward computations - virtual std::vector> operator()( - std::vector>& grads, // NOLINT - bool create_graph = false, bool is_new_grad = false) override; + virtual paddle::small_vector, + kSlotSmallVectorSize> + operator()(paddle::small_vector, + kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/amp_utils.h b/paddle/fluid/eager/amp_utils.h index 95313bde02..2145f4a119 100644 --- a/paddle/fluid/eager/amp_utils.h +++ b/paddle/fluid/eager/amp_utils.h @@ -21,8 +21,8 @@ namespace egr { static inline paddle::experimental::DataType GetPromoteType( const std::string& op_name, - const std::vector>& - amp_tensors_vector, + const paddle::small_vector, + kSlotSmallVectorSize>& amp_tensors_vector, const paddle::experimental::DataType& amp_dtype) { auto dst_type = amp_dtype; if (egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype() == @@ -86,8 +86,8 @@ static inline paddle::experimental::DataType GetPromoteType( inline paddle::experimental::DataType GetAmpDestDtype( const std::string& op_name, - const std::vector>& - amp_tensors_vector) { + const paddle::small_vector, + kSlotSmallVectorSize>& amp_tensors_vector) { auto amp_dtype = egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype(); auto amp_level = egr::Controller::Instance().GetAMPLevel(); diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc index 18678b774c..8bd40140f5 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -144,11 +144,15 @@ void GradNodeScale::SetTensorWrappers_X( void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; } -std::vector> GradNodeScale:: -operator()( - std::vector>& grads, // NOLINT - bool create_graph, bool is_new_grad) { +paddle::small_vector, + kSlotSmallVectorSize> +GradNodeScale::operator()( + paddle::small_vector, + kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph, + bool is_new_grad) { // 1. Check Output Size + VLOG(6) << "grad size is: " << grads.size(); PADDLE_ENFORCE( ((grads.size() == 1) && (grads[0].size() == 1)), paddle::platform::errors::Fatal( @@ -156,15 +160,18 @@ operator()( "However received: %d", "This indicates an issue with Eager Dygraph Backward logic", grads.size())); - std::vector> outs; + paddle::small_vector, + kSlotSmallVectorSize> + outs; // 2. Create needed out parttern paddle::experimental::Tensor out; // Apply Gradient Hooks if (GradientHooksRegistered()) { // TODO(jiabin): Shall we apply hook slot by slot here or accept // vector> to apply all hooks? - std::vector> hooked_grads = - ApplyGradientHooks(grads); + paddle::small_vector, + kSlotSmallVectorSize> + hooked_grads = ApplyGradientHooks(grads); ScaleAPI(/* slot by slot set */ hooked_grads[0][0], scale_, 0.0 /* bias */, true /* bias_after_scale */, &out); } else { diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h index cd4c0c5ac6..04ff510944 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h @@ -38,9 +38,12 @@ class GradNodeScale : public GradNodeBase { ~GradNodeScale() override = default; // Functor: perform backward computations - virtual std::vector> operator()( - std::vector>& grads, // NOLINT - bool create_graph = false, bool is_new_grad = false) override; + virtual paddle::small_vector, + kSlotSmallVectorSize> + operator()(paddle::small_vector, + kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } @@ -48,7 +51,7 @@ class GradNodeScale : public GradNodeBase { const std::vector& tensors); void SetAttributes_scale(float scale); - std::string name() override { return ""; } + std::string name() override { return "scale node"; } // Members: define fwd input tensors // For Scale there is no fwd input tensor needed diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc index 1be3b31de0..7a374d567d 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc @@ -79,9 +79,6 @@ paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x, // Pass Attributes to GradNode scale_node->SetAttributes_scale(scale); - // Set Next Edges - scale_node->AddEdges(p_autograd_in, /*slot id*/ 0); - // Set TensorWrappers scale_node->SetTensorWrappers_X({x}); diff --git a/paddle/fluid/eager/api/utils/global_utils.h b/paddle/fluid/eager/api/utils/global_utils.h index adfcab961b..44e78c3bbf 100644 --- a/paddle/fluid/eager/api/utils/global_utils.h +++ b/paddle/fluid/eager/api/utils/global_utils.h @@ -19,8 +19,9 @@ #include #include "paddle/fluid/imperative/tracer.h" #include "paddle/phi/api/ext/op_meta_info.h" +#include "paddle/utils/small_vector.h" namespace egr { - +constexpr size_t kSlotSmallVectorSize = 15U; class UniqueNameGenerator { public: explicit UniqueNameGenerator(std::string prefix = "") : prefix_(prefix) {} diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 39559a2d90..6b962b537e 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1187,11 +1187,6 @@ static std::string GenerateGradNodeCreationContent( grad_node_creation_str += paddle::string::Sprintf( SET_GRAD_OUT_META_TEMPLATE, input_name, input_position); - const char* ADD_EDGES_TEMPLATE = - " if(%s) grad_node->AddEdges(%s, %d);\n"; - grad_node_creation_str += - paddle::string::Sprintf(ADD_EDGES_TEMPLATE, input_autograd_name, - input_autograd_name, input_position); } else { compute_require_grad_args += ", &" + input_autograd_name; size_t input_position = fwd_inputs_name_pos_map.at(input_name); @@ -1200,10 +1195,6 @@ static std::string GenerateGradNodeCreationContent( " grad_node->SetGradOutMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( SET_GRAD_OUT_META_TEMPLATE, input_name, input_position); - - const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n"; - grad_node_creation_str += paddle::string::Sprintf( - ADD_EDGES_TEMPLATE, input_autograd_name, input_position); } } @@ -1649,7 +1640,8 @@ static std::pair GenerateForwardFunctionContents( std::string amp_logic_str = ""; if (in_vars.size() != 0) { const char* AMP_TENSORS_VECTOR_TEMPLATE = - " std::vector> " + " paddle::small_vector, " + "egr::kSlotSmallVectorSize> " "amp_tensors_vector = { " "%s };\n"; std::string amp_tensors_vector = paddle::string::Sprintf( @@ -2428,9 +2420,11 @@ static std::string GenerateGradNodeCCContents( } const char* BWD_RETURN_TEMPLATE = - " std::vector> hooked_grads = " + " paddle::small_vector, " + "egr::kSlotSmallVectorSize> hooked_grads = " "GradNode%s::ApplyGradientHooks(grads);\n" - " std::vector> outputs(%d);\n" + " paddle::small_vector, " + "egr::kSlotSmallVectorSize> outputs(%d);\n" " %s\n" " if(NeedComplexToRealConversion()) " "HandleComplexGradToRealGrad(&outputs);\n" @@ -2441,9 +2435,11 @@ static std::string GenerateGradNodeCCContents( // [Generation] Get Full Grad Function const char* GRAD_FUNCTION_TEMPLATE = - "std::vector> " + "paddle::small_vector, " + "egr::kSlotSmallVectorSize> " "GradNode%s::operator()(" - "std::vector>& grads, bool " + "paddle::small_vector, " + "egr::kSlotSmallVectorSize>& grads, bool " "create_graph, bool is_new_grad) {\n" "%s" "%s" @@ -2487,9 +2483,12 @@ static std::string GenerateGradNodeHeaderContents( "Construct GradNode%s \"; }\n" " ~GradNode%s() override { VLOG(6) << \" Destruct GradNode%s \"; }\n" "\n" - " virtual std::vector> " + " virtual " + "paddle::small_vector, " + "egr::kSlotSmallVectorSize> " "operator()(" - "std::vector>& grads, bool " + "paddle::small_vector, " + "egr::kSlotSmallVectorSize>& grads, bool " "create_graph = false, bool is_new_grad = false) " "override;\n" "\n" diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 078f1b3039..00b9aa7a23 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -118,8 +118,8 @@ class {} : public egr::GradNodeBase {{ egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {{}} ~{}() override = default; - virtual std::vector> operator()( - std::vector>& grads, bool create_graph = false, bool is_new_grad = false) override; + virtual paddle::small_vector, egr::kSlotSmallVectorSize> operator()( + paddle::small_vector, egr::kSlotSmallVectorSize>& grads, bool create_graph = false, bool is_new_grad = false) override; std::string name() override {{ return \"{}\"; }} void ClearTensorWrappers() override {{ @@ -149,7 +149,7 @@ class {} : public egr::GradNodeBase {{ GRAD_FUNCTION_TEMPLATE = \ """ -std::vector> {}::operator()(std::vector>& grads, bool create_graph, bool is_new_grad) {{ +paddle::small_vector, egr::kSlotSmallVectorSize> {}::operator()(paddle::small_vector, egr::kSlotSmallVectorSize>& grads, bool create_graph, bool is_new_grad) {{ // Fill Zero For GradIn Tensors {} @@ -239,7 +239,6 @@ FORWARD_BODY_TEMPLATE = \ // Set TensorWrappers for Forward Inputs {} // SetGradOutMeta & SetEdges -{} {} // SetOutRank & SetHistory & SetGradInMeta & RetainGrad {} @@ -356,7 +355,7 @@ AMP_LOGIC_TEMPLATE = \ if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {{ VLOG(5) << "Check and Prepare For AMP"; {} - std::vector> amp_tensors_vector = {}; + paddle::small_vector, egr::kSlotSmallVectorSize> amp_tensors_vector = {}; {} {} {} @@ -769,15 +768,11 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): is_optional = (name in self.optional_inputs) if is_optional: set_grad_out_meta = f"{indent}if({name}.get_ptr() != nullptr) grad_node->SetGradOutMeta(*({name}.get_ptr()), {pos});" - set_edges = f"{indent}if({name}.get_ptr() != nullptr) grad_node->AddEdges({input_autograd_meta_name}, {pos});" else: set_grad_out_meta = f"{indent}grad_node->SetGradOutMeta({name}, {pos});" - set_edges = f"{indent}grad_node->AddEdges({input_autograd_meta_name}, {pos});" set_grad_out_meta_list.append(set_grad_out_meta) - set_edges_list.append(set_edges) set_grad_out_meta_str = "\n".join(set_grad_out_meta_list) - set_edges_str = "\n".join(set_edges_list) # SetOutRank & SetHistory & SetGradInMeta set_out_rank_list = [] @@ -808,7 +803,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): self.node_creation_str = FORWARD_BODY_TEMPLATE.format( node_creation_event_str, pass_stop_gradient_args_str, node_construction_str, set_attributes_str, - set_input_tensor_wrappers_str, set_grad_out_meta_str, set_edges_str, + set_input_tensor_wrappers_str, set_grad_out_meta_str, set_out_rank_str, set_history_str, set_grad_in_meta_str, set_retain_grad_str, set_output_tensor_wrappers_str) @@ -1454,7 +1449,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): # Construct grad_api returns slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) - returns_str = f"{indent}std::vector> returns({slot_num_bwd_outputs});\n" + returns_str = f"{indent}paddle::small_vector, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs});\n" for name, (ttype, fwd_position, grad_api_position) in backward_grad_outputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index 7ca1b49bcb..3f56c2d01c 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -169,9 +169,12 @@ class GeneralGrad { input_target_nodes_inputmeta_map.count(node); // Find and append next nodes - const std::vector>& edges = node->GetEdges(); - for (const auto& edge_list : edges) { - for (const Edge& edge : edge_list) { + const paddle::small_vector, + kSlotSmallVectorSize>& metas = + node->OutputMeta(); + for (const auto& meta_list : metas) { + for (const GradSlotMeta& meta : meta_list) { + const auto& edge = meta.GetEdge(); GradNodeBase* next_node = edge.GetMutableGradNode().get(); // Next node could be nullptr if it is leaf tensor with no @@ -381,13 +384,15 @@ class GeneralGrad { "unable to find copied target for certain grad node.")); GradNodeBase* copied_node = orig_to_copied_node_mapping_[orig_node].get(); - const std::vector>& orig_edges = orig_node->GetEdges(); - std::vector>& copied_edges = - copied_node->GetMutableEdges(); - for (size_t i = 0; i < orig_edges.size(); i++) { - for (size_t j = 0; j < orig_edges[i].size(); j++) { - const Edge& orig_edge = orig_edges[i][j]; - Edge& copied_edge = copied_edges[i][j]; + const paddle::small_vector, + kSlotSmallVectorSize>& orig_meta = + orig_node->OutputMeta(); + paddle::small_vector, kSlotSmallVectorSize>& + copied_edges = copied_node->MutableOutputMeta(); + for (size_t i = 0; i < orig_meta.size(); i++) { + for (size_t j = 0; j < orig_meta[i].size(); j++) { + const Edge& orig_edge = orig_meta[i][j].GetEdge(); + Edge& copied_edge = copied_edges[i][j].GetMutableEdge(); std::shared_ptr orig_next_node = orig_edge.GetMutableGradNode(); @@ -468,9 +473,11 @@ std::unordered_map getInDegreeMap( "We got null node when we traverse the backward graph, and this " "should not happened please check your code and contact us.")); // Find and append next nodes - const std::vector>& edges = node->GetEdges(); - for (const auto& edge_list : edges) { - for (const Edge& edge : edge_list) { + const paddle::small_vector, kSlotSmallVectorSize>& + metas = node->OutputMeta(); + for (const auto& meta_list : metas) { + for (const GradSlotMeta& meta : meta_list) { + const auto& edge = meta.GetEdge(); GradNodeBase* next_node = edge.GetMutableGradNode().get(); // Next node could be nullptr if it is leaf tensor with no // AccumulationNode attached @@ -689,8 +696,10 @@ std::vector RunBackward( VLOG(6) << "Run Backward Kernel with GradTensorHolder."; // Run Pre Backward Node and get outputs - std::vector> grad_output_tensors = - (*node)(node_input_buffer->Buffers(), create_graph, is_general_grad); + paddle::small_vector, + kSlotSmallVectorSize> + grad_output_tensors = (*node)(node_input_buffer->Buffers(), + create_graph, is_general_grad); // retain_grad or not if (!retain_graph) { @@ -704,17 +713,18 @@ std::vector RunBackward( node_input_buffers_dict.erase(node); // Prepare GradTensorHolder for next node - const std::vector>& edges = node->GetEdges(); - PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(), + const paddle::small_vector, kSlotSmallVectorSize>& + metas = node->OutputMeta(); + PADDLE_ENFORCE(metas.size() == grad_output_tensors.size() || metas.empty(), paddle::platform::errors::Fatal( "Number of edges should be either empty ( for leaf node " ") or the same as number of output grad tensors, but we " "got edges size is: %d, grad_output size is: %d", - edges.size(), grad_output_tensors.size())); + metas.size(), grad_output_tensors.size())); - for (size_t i = 0; i < edges.size(); i++) { - for (size_t j = 0; j < edges[i].size(); j++) { - const Edge& edge = edges[i][j]; + for (size_t i = 0; i < metas.size(); i++) { + for (size_t j = 0; j < metas[i].size(); j++) { + const Edge& edge = metas[i][j].GetEdge(); if (!edge.IsInitialized()) { continue; } diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.cc b/paddle/fluid/eager/custom_operator/custom_operator_node.cc index a9a41c106d..2bb86a86e8 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.cc +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.cc @@ -19,10 +19,12 @@ #include "paddle/phi/core/dense_tensor.h" namespace egr { -std::vector> RunCustomOpNode:: -operator()( - std::vector>& grads, // NOLINT - bool create_graph, bool is_new_grad) { +paddle::small_vector, + kSlotSmallVectorSize> +RunCustomOpNode::operator()( + paddle::small_vector, + kSlotSmallVectorSize>& grads, + bool create_graph, bool is_new_grad) { // NOLINT paddle::CustomOpKernelContext ctx; auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs( egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]); @@ -31,8 +33,9 @@ operator()( auto map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_); auto kernel_map = egr::Controller::Instance().GetOpMetaInfoMap(); - std::vector> tmp_ins( - grad_inputs_name.size()); + paddle::small_vector, + kSlotSmallVectorSize> + tmp_ins(grad_inputs_name.size()); VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size() << ", whose grad_inputs_name size is: " << grad_inputs_name.size(); for (size_t i = 0; i < grads.size(); i++) { @@ -58,17 +61,19 @@ operator()( } VLOG(6) << "Prepare Grad attrs"; ctx.EmplaceBackAttrs(attrs_); - std::vector> outs( - GetEdges().size()); - std::vector> tmp_outs( - grad_outputs_names.size()); + paddle::small_vector, + kSlotSmallVectorSize> + outs(OutputMeta().size()); + paddle::small_vector, + kSlotSmallVectorSize> + tmp_outs(grad_outputs_names.size()); VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size(); - for (size_t i = 0; i < GetEdges().size(); i++) { + for (size_t i = 0; i < OutputMeta().size(); i++) { if (map[0].find(i) != map[0].end()) { VLOG(7) << "Insert grad outputs: " << i - << " with size: " << GetEdges()[i].size() + << " with size: " << OutputMeta()[i].size() << " to tmp_outputs: " << map[0][i]; - for (size_t j = 0; j < GetEdges()[i].size(); j++) { + for (size_t j = 0; j < OutputMeta()[i].size(); j++) { outs[i].emplace_back(/* init it incase of copy nullptr of shared_ptr */ std::make_shared( phi::DataType::UNDEFINED), diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.h b/paddle/fluid/eager/custom_operator/custom_operator_node.h index 2e7885001c..4801088e51 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.h +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.h @@ -36,10 +36,13 @@ class RunCustomOpNode : public GradNodeBase { } // Functor: perform backward computations - virtual std::vector> - operator()( // NOLINT - std::vector>& grads, // NOLINT - bool create_graph = false, bool is_new_grad = false) // NOLINT + virtual paddle::small_vector, + kSlotSmallVectorSize> + operator()( // NOLINT + paddle::small_vector, + kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) // NOLINT override; std::string name() { diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 5b4921320f..610b177829 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -40,70 +40,20 @@ GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) { VLOG(6) << "Construct GradNodeBase"; bwd_in_meta_.resize(bwd_in_slot_num); bwd_out_meta_.resize(bwd_out_slot_num); - adj_edges_.resize(bwd_out_slot_num); } -void GradNodeBase::AddEdges(std::vector* metas, size_t slot_id) { - PADDLE_ENFORCE_LT( - slot_id, adj_edges_.size(), - paddle::platform::errors::InvalidArgument( - "Given slot id is out of range of adj_edges outter size, " - "adj_edges is designed to has the same size of grad " - "inputs's slot num.")); - - for (size_t i = 0; i < metas->size(); i++) { - const auto& meta = (*metas)[i]; - // adj_edges has as same rank as fwd inputs, and record it's output rank - // from - // its pre-ops - if (meta && !meta->StopGradient()) { - auto node = meta->GetMutableGradNode(); - if (!node || !node.get()) { - meta->SetGradNode(std::make_shared(meta)); - } - VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from " - << this->name() << " (addr: " << this << ") " - << " to " << meta->GetMutableGradNode()->name() - << " (addr: " << meta->GetMutableGradNode().get() << ")"; - - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } else { - adj_edges_[slot_id].emplace_back(); - } - } -} - -void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { - PADDLE_ENFORCE_LT( - slot_id, adj_edges_.size(), - paddle::platform::errors::InvalidArgument( - "Given slot id is out of range of adj_edges outter size, " - "adj_edges is designed to has the same size of grad " - "inputs's slot num.")); - - if (meta && !meta->StopGradient()) { - auto node = meta->GetMutableGradNode(); - if (!node || !node.get()) { - meta->SetGradNode(std::make_shared(meta)); - } - VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from " - << this->name() << " (addr: " << this << ") " - << " to " << meta->GetMutableGradNode()->name() - << " (addr: " << meta->GetMutableGradNode().get() << ")"; - - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } else { - adj_edges_[slot_id].emplace_back(); - } +const paddle::small_vector, kSlotSmallVectorSize>& +GradNodeBase::InputMeta() const { + return bwd_in_meta_; } -const std::vector>& GradNodeBase::InputMeta() const { - return bwd_in_meta_; +const paddle::small_vector, kSlotSmallVectorSize>& +GradNodeBase::OutputMeta() const { + return bwd_out_meta_; } -const std::vector>& GradNodeBase::OutputMeta() const { +paddle::small_vector, kSlotSmallVectorSize>& +GradNodeBase::MutableOutputMeta() { return bwd_out_meta_; } @@ -123,7 +73,9 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, } auto& meta = metas[0]; - meta.SetStopGradient(fwd_out_meta->StopGradient()); + if (fwd_out_meta && fwd_out_meta->StopGradient()) { + meta.SetStopGradient(fwd_out_meta->StopGradient()); + } if (!fwd_out.initialized()) { VLOG(6) @@ -153,8 +105,8 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, meta.SetTensorMeta(dense_tensor->meta()); meta.SetPlace(fwd_out.place()); - if (paddle::framework::IsComplexType( - paddle::framework::TransToProtoVarType(dense_tensor->type()))) { + if (dense_tensor->type() == paddle::experimental::DataType::COMPLEX64 || + dense_tensor->type() == paddle::experimental::DataType::COMPLEX128) { need_complex_to_real_ = true; } } @@ -186,7 +138,7 @@ void GradNodeBase::SetGradInMeta( "Bwd_in_meta should only be called while " "autograd_meta is not null. If you got this " "error, it indicates bugs in framework.")); - if (fwd_out_meta->StopGradient()) { + if (fwd_out_meta && fwd_out_meta->StopGradient()) { // Set Stop Gradient only when its true or non-initialized autograd_meta, // since all default value is false. meta.SetStopGradient(fwd_out_meta->StopGradient()); @@ -212,8 +164,8 @@ void GradNodeBase::SetGradInMeta( meta.SetTensorMeta(dense_tensor->meta()); meta.SetPlace(fwd_out_tensor.place()); - if (paddle::framework::IsComplexType( - paddle::framework::TransToProtoVarType(dense_tensor->type()))) { + if (dense_tensor->type() == paddle::experimental::DataType::COMPLEX64 || + dense_tensor->type() == paddle::experimental::DataType::COMPLEX128) { need_complex_to_real_ = true; } } else { @@ -238,12 +190,24 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in, metas.resize(1); } auto& meta = metas[0]; + // Set Stop_gradient if (fwd_in_meta) { meta.SetStopGradient(fwd_in_meta->StopGradient()); - } else { - meta.SetStopGradient(true); } + // Set Adj Edges + if (fwd_in_meta && !fwd_in_meta->StopGradient()) { + auto node = fwd_in_meta->GetMutableGradNode(); + if (!node || !node.get()) { + fwd_in_meta->SetGradNode( + std::make_shared(fwd_in_meta)); + } + VLOG(6) << "Add Edges for slot: " << slot_rank << ", the Edge is from " + << this->name() << " (addr: " << this << ") " + << " to " << fwd_in_meta->GetMutableGradNode()->name() + << " (addr: " << fwd_in_meta->GetMutableGradNode().get() << ")"; + meta.SetEdge(fwd_in_meta->GetMutableGradNode(), fwd_in_meta->OutRankInfo()); + } // Record TensorMeta if (fwd_in.impl() && fwd_in.impl().get()) { if (phi::DenseTensor::classof(fwd_in.impl().get())) { @@ -282,30 +246,43 @@ void GradNodeBase::SetGradOutMeta( const auto& fwd_in_tensor = fwd_in[i]; auto& meta = metas[i]; auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in_tensor); + // Set Stop_gradient if (fwd_in_meta) { - // Set Stop Gradient only when its true or non-initialized autograd_meta, - // since all default value is false. meta.SetStopGradient(fwd_in_meta->StopGradient()); } + // Set Adj Edges + if (fwd_in_meta && !fwd_in_meta->StopGradient()) { + auto node = fwd_in_meta->GetMutableGradNode(); + if (!node || !node.get()) { + fwd_in_meta->SetGradNode( + std::make_shared(fwd_in_meta)); + } + VLOG(6) << "Add Edges for slot: " << slot_rank << ", the Edge is from " + << this->name() << " (addr: " << this << ") " + << " to " << fwd_in_meta->GetMutableGradNode()->name() + << " (addr: " << fwd_in_meta->GetMutableGradNode().get() << ")"; + meta.SetEdge(fwd_in_meta->GetMutableGradNode(), + fwd_in_meta->OutRankInfo()); + } // Record TensorMeta if (fwd_in_tensor.impl() && fwd_in_tensor.impl().get()) { if (phi::DenseTensor::classof(fwd_in_tensor.impl().get())) { // Only Copy Meta phi::DenseTensor* dense_tensor = static_cast(fwd_in_tensor.impl().get()); - PADDLE_ENFORCE_NE(dense_tensor->meta().dtype, phi::DataType::UNDEFINED, paddle::platform::errors::Fatal( - "Attempting to copy DenseTensorMeta with " - "phi::DataType::UNDEFINED," + "Attempting to copy DenseTensorMeta " + "with phi::DataType::UNDEFINED," "which is illegal.")); meta.SetTensorMeta(dense_tensor->meta()); meta.SetPlace(fwd_in_tensor.place()); } } else { - VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta " - "with non-DenseTensor argument."; + VLOG(6) + << "Unable to initialize the DenseTensorMeta of GradSlotMeta with " + "non-DenseTensor argument."; } } } @@ -328,18 +305,14 @@ int64_t GradNodeBase::RegisterGradientHook( return next_hook_id_++; } -const std::vector>& GradNodeBase::GetEdges() const { - return adj_edges_; -} - -std::vector>& GradNodeBase::GetMutableEdges() { - return adj_edges_; -} - -std::vector> +paddle::small_vector, + kSlotSmallVectorSize> GradNodeBase::ApplyGradientHooks( - const std::vector>& tensors) { - std::vector> outs(tensors.size()); + const paddle::small_vector, + kSlotSmallVectorSize>& tensors) { + paddle::small_vector, + kSlotSmallVectorSize> + outs(tensors.size()); for (auto& hook_pair : gradient_hooks_) { size_t slot_id = std::get<0>(hook_pair.second); size_t rank = std::get<1>(hook_pair.second); @@ -386,7 +359,8 @@ GradNodeBase::ApplyGradientHooks( } void GradNodeBase::HandleComplexGradToRealGrad( - std::vector>* out_grads) { + paddle::small_vector, + kSlotSmallVectorSize>* out_grads) { for (size_t slot_id = 0; slot_id < out_grads->size(); slot_id++) { const std::vector& slot_out_grads = (*out_grads)[slot_id]; diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 07b62082f5..6fdee203c1 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -16,6 +16,7 @@ #include +#include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/hooks.h" #include "paddle/phi/api/all.h" @@ -46,9 +47,8 @@ namespace egr { * indicate which * input of grad this edge belong). * */ -class Edge; class AutogradMeta; - +class GradNodeBase; /** * GradSlotMeta is used to Record Forward Tensor info to backward, since paddle * has lots of operators @@ -56,6 +56,70 @@ class AutogradMeta; * So, we need a meta info * to record it's needs. * **/ +class Edge { + public: + // Default constructor for Edges in order to construct it for AutogradMeta + Edge() : in_slot_id_(0), in_rank_(0), grad_node_(nullptr) {} + + // In real use cases we should create Edge from grad node and input rank which + // indicate which edge it is. + // Since we have slot design in operators we will have to locate an edge with + // slot + // and rank. + Edge(const std::shared_ptr& grad_node, size_t in_slot_id, + size_t in_rank) + : in_slot_id_(in_slot_id), in_rank_(in_rank), grad_node_(grad_node) {} + + Edge(const std::shared_ptr& grad_node, + const std::pair& rank_info) + : in_slot_id_(rank_info.first), + in_rank_(rank_info.second), + grad_node_(grad_node) {} + + GradNodeBase* GetGradNode() const { return grad_node_.get(); } + + std::shared_ptr GetMutableGradNode() const { + return grad_node_; + } + + void SetGradNode(const std::shared_ptr& node) { + VLOG(6) << "Reseting Edge's Grad Node"; + grad_node_ = node; + } + + std::pair GetEdgeRankInfo() const { + return std::make_pair(in_slot_id_, in_rank_); + } + + void SetEdgeRankInfo(size_t slot_id, size_t in_rank) { + in_slot_id_ = slot_id; + in_rank_ = in_rank; + } + + void SetEdgeRankInfo( + const std::pair& edge_rank) { + in_slot_id_ = edge_rank.first; + in_rank_ = edge_rank.second; + } + + // Currently we use grad_node_ to identify if a edge is initialized. + bool IsInitialized() const { + if (!grad_node_) { + return false; + } else { + if (!(grad_node_.get())) { + return false; + } else { + return true; + } + } + } + + private: + size_t in_slot_id_; + size_t in_rank_; + std::shared_ptr grad_node_{nullptr}; +}; class GradSlotMeta { public: GradSlotMeta() = default; @@ -81,10 +145,21 @@ class GradSlotMeta { void SetPlace(const phi::Place& place) { place_ = place; } const phi::Place& GetPlace() const { return place_; } + void SetEdge(const Edge& edge) { adj_edge_ = edge; } + void SetEdge( + const std::shared_ptr& grad_node, + const std::pair& rank_info) { + adj_edge_.SetGradNode(grad_node); + adj_edge_.SetEdgeRankInfo(rank_info); + } + Edge& GetMutableEdge() { return adj_edge_; } + const Edge& GetEdge() const { return adj_edge_; } + private: bool stop_gradient_{false}; phi::Place place_; std::shared_ptr meta_ = nullptr; + Edge adj_edge_; }; class GradNodeBase { @@ -107,9 +182,12 @@ class GradNodeBase { * so, vector of vector * is better choice to fit this format. * **/ - virtual std::vector> operator()( - std::vector>& grads, // NOLINT - bool create_graph = false, bool is_new_grad = false) = 0; + virtual paddle::small_vector, + kSlotSmallVectorSize> + operator()(paddle::small_vector, + kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) = 0; virtual void ClearTensorWrappers() = 0; @@ -118,17 +196,6 @@ class GradNodeBase { * **/ virtual std::shared_ptr Copy() const = 0; - /** - * AddEdges is designed to set input tensors' backward Node as current - * node's Edges. - * This method should be call in forward code and for double backward depends - * computation. - * - * This one is called slot by slot - * **/ - void AddEdges(std::vector* metas, size_t slot_id); - void AddEdges(AutogradMeta* meta, size_t slot_id); - // adj_edges were moved inside OutputMeta(), so no available direct access // from GradNodeBase. // To access Edges, get GradSlotMeta by calling OutputMeta(), then use @@ -136,10 +203,15 @@ class GradNodeBase { /** * Get Input Meta of current Grad node**/ - const std::vector>& InputMeta() const; + const paddle::small_vector, kSlotSmallVectorSize>& + InputMeta() const; /** * Get Output Meta of current Grad node**/ - const std::vector>& OutputMeta() const; + const paddle::small_vector, kSlotSmallVectorSize>& + OutputMeta() const; + + paddle::small_vector, kSlotSmallVectorSize>& + MutableOutputMeta(); /** * Set bwd ins and outs info with forward vars * **/ @@ -180,23 +252,22 @@ class GradNodeBase { * **/ inline bool GradientHooksRegistered() { return !gradient_hooks_.empty(); } - std::vector> ApplyGradientHooks( - const std::vector>& tensors); + paddle::small_vector, + kSlotSmallVectorSize> + ApplyGradientHooks( + const paddle::small_vector, + kSlotSmallVectorSize>& tensors); /** * Handle Complex - Real Type Promotion * **/ void HandleComplexGradToRealGrad( - std::vector>* out_grads); + paddle::small_vector, + kSlotSmallVectorSize>* out_grads); bool NeedComplexToRealConversion() { return need_complex_to_real_; } virtual std::string name() { return "GradNodeBase"; } - /** - * GetEdges is designed to get all edges of current node**/ - const std::vector>& GetEdges() const; - std::vector>& GetMutableEdges(); - /** * The following interfaces are designed for no_need_buffer * **/ @@ -207,18 +278,13 @@ class GradNodeBase { } private: - // TODO(zhanlve): Merge adj_edges_ into GradOutMeta - // Edges recorded the backward related node info, which indicate all edges - // linked - // by this Grad Node. - // Why we need vector>: Edges is as same rank as bwd output. - std::vector> adj_edges_; - // bwd_out_meta_ is used to record Grad output info for backward - std::vector> bwd_out_meta_; + paddle::small_vector, kSlotSmallVectorSize> + bwd_out_meta_; // bwd_in_meta_ used to record Grad input info for backward - std::vector> bwd_in_meta_; + paddle::small_vector, kSlotSmallVectorSize> + bwd_in_meta_; // Gradient Hooks // Customer may register a list of hooks which will be called in order during // backward @@ -235,71 +301,6 @@ class GradNodeBase { bool is_tensor_wrappers_cleared_ = false; }; -class Edge { - public: - // Default constructor for Edges in order to construct it for AutogradMeta - Edge() : in_slot_id_(0), in_rank_(0), grad_node_(nullptr) {} - - // In real use cases we should create Edge from grad node and input rank which - // indicate which edge it is. - // Since we have slot design in operators we will have to locate an edge with - // slot - // and rank. - Edge(const std::shared_ptr& grad_node, size_t in_slot_id, - size_t in_rank) - : in_slot_id_(in_slot_id), in_rank_(in_rank), grad_node_(grad_node) {} - - Edge(const std::shared_ptr& grad_node, - const std::pair& rank_info) - : in_slot_id_(rank_info.first), - in_rank_(rank_info.second), - grad_node_(grad_node) {} - - GradNodeBase* GetGradNode() const { return grad_node_.get(); } - - std::shared_ptr GetMutableGradNode() const { - return grad_node_; - } - - void SetGradNode(const std::shared_ptr& node) { - VLOG(6) << "Reseting Edge's Grad Node"; - grad_node_ = node; - } - - std::pair GetEdgeRankInfo() const { - return std::make_pair(in_slot_id_, in_rank_); - } - - void SetEdgeRankInfo(size_t slot_id, size_t in_rank) { - in_slot_id_ = slot_id; - in_rank_ = in_rank; - } - - void SetEdgeRankInfo( - const std::pair& edge_rank) { - in_slot_id_ = edge_rank.first; - in_rank_ = edge_rank.second; - } - - // Currently we use grad_node_ to identify if a edge is initialized. - bool IsInitialized() const { - if (!grad_node_) { - return false; - } else { - if (!(grad_node_.get())) { - return false; - } else { - return true; - } - } - } - - private: - size_t in_slot_id_; - size_t in_rank_; - std::shared_ptr grad_node_{nullptr}; -}; - inline void CheckTensor(const paddle::experimental::Tensor& pre, const paddle::experimental::Tensor& post) { if (!pre.initialized() && post.initialized()) { diff --git a/paddle/fluid/eager/grad_tensor_holder.h b/paddle/fluid/eager/grad_tensor_holder.h index 80b7c59df8..a9800afc62 100644 --- a/paddle/fluid/eager/grad_tensor_holder.h +++ b/paddle/fluid/eager/grad_tensor_holder.h @@ -27,7 +27,8 @@ namespace egr { class GradTensorHolder { public: explicit GradTensorHolder( - const std::vector>& metas) { + const paddle::small_vector, + kSlotSmallVectorSize>& metas) { VLOG(7) << "Init GradTensorHolder with meta size: " << metas.size(); buffer_.resize(metas.size()); for (size_t i = 0; i < buffer_.size(); i++) { @@ -39,7 +40,8 @@ class GradTensorHolder { GradTensorHolder(const GradTensorHolder& other) = default; explicit GradTensorHolder( - std::vector>&& inputs) + paddle::small_vector, + kSlotSmallVectorSize>&& inputs) : buffer_(std::move(inputs)) {} GradTensorHolder& operator=(const GradTensorHolder& other) = default; @@ -56,14 +58,18 @@ class GradTensorHolder { return buffer_[pos]; } - std::vector>& Buffers() { + paddle::small_vector, + kSlotSmallVectorSize>& + Buffers() { return buffer_; } void SetBufferSlotRankZeros(size_t slot_id, size_t rank); private: - std::vector> buffer_; + paddle::small_vector, + kSlotSmallVectorSize> + buffer_; }; } // namespace egr diff --git a/paddle/fluid/eager/pylayer/py_layer_node.cc b/paddle/fluid/eager/pylayer/py_layer_node.cc index 29e98483ed..fad4fd50a5 100644 --- a/paddle/fluid/eager/pylayer/py_layer_node.cc +++ b/paddle/fluid/eager/pylayer/py_layer_node.cc @@ -29,14 +29,18 @@ #include "pybind11/pytypes.h" namespace egr { -std::vector> GradNodePyLayer:: -operator()( - std::vector>& grads, // NOLINT - bool create_graph, bool is_new_grad) { +paddle::small_vector, + kSlotSmallVectorSize> +GradNodePyLayer::operator()( + paddle::small_vector, + kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph, + bool is_new_grad) { VLOG(3) << "Running Eager Backward Node: " << name(); - std::vector> hooked_grads = - GradNodePyLayer::ApplyGradientHooks(grads); + paddle::small_vector, + kSlotSmallVectorSize> + hooked_grads = GradNodePyLayer::ApplyGradientHooks(grads); paddle::pybind::PyLayerObject* ctx = reinterpret_cast(ctx_); @@ -124,7 +128,9 @@ operator()( ctx->forward_input_tensor_is_duplicable.size(), outputs_size)); } - std::vector> grad_out; + paddle::small_vector, + kSlotSmallVectorSize> + grad_out; grad_out.reserve(ctx->forward_input_tensor_is_duplicable.size()); for (size_t i = 0; i < ctx->forward_input_tensor_is_duplicable.size(); i++) { if (i < outputs_size) { diff --git a/paddle/fluid/eager/pylayer/py_layer_node.h b/paddle/fluid/eager/pylayer/py_layer_node.h index 40291afaba..affed77019 100644 --- a/paddle/fluid/eager/pylayer/py_layer_node.h +++ b/paddle/fluid/eager/pylayer/py_layer_node.h @@ -34,9 +34,12 @@ class GradNodePyLayer : public GradNodeBase { ~GradNodePyLayer() override { Py_DECREF(ctx_); }; - virtual std::vector> operator()( - std::vector>& grads, // NOLINT - bool create_graph = false, bool is_new_grad = false) override; + virtual paddle::small_vector, + kSlotSmallVectorSize> + operator()(paddle::small_vector, + kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/tensor_wrapper.h b/paddle/fluid/eager/tensor_wrapper.h index 3ee1603a53..f13fcfa990 100644 --- a/paddle/fluid/eager/tensor_wrapper.h +++ b/paddle/fluid/eager/tensor_wrapper.h @@ -88,7 +88,7 @@ class TensorWrapper { } else { intermidiate_tensor_.set_impl(tensor.impl()); } - + // TODO(jiabin): This may has server performance issue intermidiate_tensor_.set_name(tensor.name() + "@Saved"); auto* tensor_autograd_meta = EagerUtils::nullable_autograd_meta(tensor); diff --git a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc index 6c6c7fd25e..f9f00749dc 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc @@ -80,14 +80,18 @@ TEST(AccumulationNode, Tensor) { grad_meta->SetStopGradient(false); // operator() - std::vector> et0_vec = {{et0}}; + paddle::small_vector, + kSlotSmallVectorSize> + et0_vec = {{et0}}; paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0]; auto* ret_et0_ptr = std::dynamic_pointer_cast(ret_et0.impl()) ->data(); CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f)); - std::vector> et1_vec = {{et1}}; + paddle::small_vector, + kSlotSmallVectorSize> + et1_vec = {{et1}}; paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0]; auto* ret_et1_ptr = diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index d592b5ccf6..6687b6621a 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -34,7 +34,9 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { auto grad_test_node0 = std::make_shared( /* val */ 5.0, /* in_num */ 2, /* out_num */ 2); auto grad_test_node1 = std::make_shared(); - std::vector> grads; + paddle::small_vector, + egr::kSlotSmallVectorSize> + grads; phi::DenseTensorMeta meta = phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); std::shared_ptr dt = std::make_shared( @@ -51,28 +53,9 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { CHECK_EQ(std::dynamic_pointer_cast(res[0][0].impl()) ->data()[0], 6.0f); - VLOG(6) << "Test Add Edges"; - egr::Edge tmp_edge0(grad_test_node1, 1, 2); - auto auto_grad0 = std::make_shared(tmp_edge0); - auto_grad0->SetStopGradient(false); - egr::Edge tmp_edge1(grad_test_node1, 3, 4); auto auto_grad1 = std::make_shared(tmp_edge1); et1.set_autograd_meta(auto_grad1); - auto_grad1->SetStopGradient(false); - grad_test_node0->AddEdges(auto_grad0.get(), 0); - - CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first, - size_t(1)); - CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second, - size_t(2)); - std::vector metas = {auto_grad1.get()}; - - grad_test_node0->AddEdges(&metas, 1); - CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first, - size_t(3)); - CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().second, - size_t(4)); VLOG(6) << "Test Set Meta and Get Meta"; auto_grad1->SetStopGradient(true); diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h index 6237944aa4..a00e629d10 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h @@ -31,9 +31,12 @@ class GradTestNode : public egr::GradNodeBase { : GradNodeBase(in_num, out_num), val_(val) {} GradTestNode() : GradNodeBase() { val_ = 1.0; } std::string name() override { return "GradTestNode"; } - std::vector> operator()( - std::vector>& grads, // NOLINT - bool create_graph = false, bool is_new_grad = false) override { + paddle::small_vector, + egr::kSlotSmallVectorSize> + operator()(paddle::small_vector, + egr::kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) override { val_ = std::dynamic_pointer_cast(grads[0][0].impl()) ->data()[0]; phi::DenseTensorMeta meta = @@ -46,7 +49,9 @@ class GradTestNode : public egr::GradNodeBase { auto* dt_ptr = dt->mutable_data(paddle::platform::CPUPlace()); dt_ptr[0] = 6.0f; paddle::experimental::Tensor et1(dt); - std::vector> res = {{et1}}; + paddle::small_vector, + egr::kSlotSmallVectorSize> + res = {{et1}}; return res; } void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc index 7d2aafc636..0fe349294b 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc @@ -45,7 +45,9 @@ TEST(GradTensorHolder, Constructor) { meta); paddle::experimental::Tensor et = paddle::experimental::Tensor(dt); - std::vector> inputs; + paddle::small_vector, + kSlotSmallVectorSize> + inputs; inputs.push_back({et}); GradTensorHolder grad_tensor_holder4 = GradTensorHolder(std::move(inputs)); diff --git a/paddle/fluid/eager/tests/task_tests/backward_test.cc b/paddle/fluid/eager/tests/task_tests/backward_test.cc index 8c127efa4f..7552ad83fa 100644 --- a/paddle/fluid/eager/tests/task_tests/backward_test.cc +++ b/paddle/fluid/eager/tests/task_tests/backward_test.cc @@ -76,8 +76,7 @@ TEST(Backward, SingleNodeEmptyGrad) { auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetStopGradient(false); - std::vector res = {auto_grad_meta1}; - node0_ptr->AddEdges(&res, 0); + node0_ptr->SetGradOutMeta({leaf_tensor}, 0); } std::vector outs = {target_tensor}; // Run Backward @@ -135,8 +134,7 @@ TEST(Backward, SingleNodeCustomGrad) { std::dynamic_pointer_cast(acc_node_ptr)); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetStopGradient(false); - std::vector res = {auto_grad_meta1}; - node0_ptr->AddEdges(&res, 0); + node0_ptr->SetGradOutMeta({leaf_tensor}, 0); } // Run Backward @@ -191,12 +189,12 @@ TEST(Backward, LinearNodes) { auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetStopGradient(false); // Connect Node0 -> Node1 via Edge - auto meta0 = egr::AutogradMeta(); - meta0.SetStopGradient(false); - meta0.SetSingleOutRankWithSlot(0, 0); - meta0.SetGradNode(node1_ptr); - std::vector res0 = {&meta0}; - node0_ptr->AddEdges(&res0, 0); + auto tmp_tensor = paddle::experimental::Tensor(); + auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor); + meta0->SetStopGradient(false); + meta0->SetSingleOutRankWithSlot(0, 0); + meta0->SetGradNode(node1_ptr); + node0_ptr->SetGradOutMeta(tmp_tensor, 0); AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); // Connect Tensor and AccumulationNode via AutoGradMeta @@ -208,8 +206,7 @@ TEST(Backward, LinearNodes) { auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetStopGradient(false); - std::vector res1 = {auto_grad_meta1}; - node1_ptr->AddEdges(&res1, 0); + node1_ptr->SetGradOutMeta(leaf_tensor, 0); } // Use Empty Grad Tensor @@ -288,20 +285,20 @@ TEST(Backward, WithAccumulation) { auto_grad_meta1->SetStopGradient(false); // Connect Node0 -> Node2 via Edge - auto meta0 = egr::AutogradMeta(); - meta0.SetStopGradient(false); - meta0.SetSingleOutRankWithSlot(0, 0); - meta0.SetGradNode(node2_ptr); - std::vector res0 = {&meta0}; - node0_ptr->AddEdges(&res0, 0); + auto tmp_tensor0 = paddle::experimental::Tensor(); + auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor0); + meta0->SetStopGradient(false); + meta0->SetSingleOutRankWithSlot(0, 0); + meta0->SetGradNode(node2_ptr); + node0_ptr->SetGradOutMeta(tmp_tensor0, 0); // Connect Node1 -> Node2 via Edge - auto meta1 = egr::AutogradMeta(); - meta1.SetStopGradient(false); - meta1.SetSingleOutRankWithSlot(0, 0); - meta1.SetGradNode(node2_ptr); - std::vector res1 = {&meta1}; - node1_ptr->AddEdges(&res1, 0); + auto tmp_tensor1 = paddle::experimental::Tensor(); + auto* meta1 = EagerUtils::autograd_meta(&tmp_tensor1); + meta1->SetStopGradient(false); + meta1->SetSingleOutRankWithSlot(0, 0); + meta1->SetGradNode(node2_ptr); + node1_ptr->SetGradOutMeta(tmp_tensor1, 0); AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor); // Connect Tensor and AccumulationNode via AutoGradMeta @@ -314,7 +311,7 @@ TEST(Backward, WithAccumulation) { auto_grad_meta2->SetStopGradient(false); std::vector res2 = {auto_grad_meta2}; - node2_ptr->AddEdges(&res2, 0); + node2_ptr->SetGradOutMeta(leaf_tensor, 0); } Backward(target_tensors, grad_tensors); diff --git a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc index 8b0759c17e..4337c0d092 100644 --- a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc +++ b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc @@ -69,7 +69,7 @@ TEST(CrossBatchAccumulation, SingleScaleNode) { meta->SetSingleOutRankWithSlot(0, 0); meta->SetGradNode(acc_node_ptr); std::vector res = {meta}; - scale_node_ptr->AddEdges(&res, 0); + scale_node_ptr->SetGradOutMeta(leaf_tensor, 0); Backward(target_tensors, {}); diff --git a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc index 0bd1f3bdb3..bcb9820419 100644 --- a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc @@ -251,10 +251,11 @@ TEST(EagerUtils, GetGradAccumulationNode) { } TEST(EagerUtils, FillZeroForEmptyGradInputs) { - std::vector> grads = { - std::vector(1)}; - std::vector> slot_metas = { - std::vector(1)}; + paddle::small_vector, + egr::kSlotSmallVectorSize> + grads = {std::vector(1)}; + paddle::small_vector, egr::kSlotSmallVectorSize> + slot_metas = {std::vector(1)}; phi::DenseTensorMeta tensor_meta; tensor_meta.dtype = paddle::experimental::DataType::FLOAT32; diff --git a/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc b/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc index dc44d95daa..4cb316380a 100644 --- a/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc +++ b/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc @@ -137,12 +137,16 @@ TEST(Forward, LinearNodes) { // 2. TensorWrapper: No TensorWrapper for ScaleNode // 3. NextEdges: Node 1 -> Node 0 - const std::vector>& node1_edges = grad_node1->GetEdges(); - const auto& node1_edge = node1_edges[0]; - - CHECK_EQ(static_cast(node1_edge[0].GetEdgeRankInfo().first), 0); - CHECK_EQ(static_cast(node1_edge[0].GetEdgeRankInfo().second), 0); - CHECK_EQ(node1_edge[0].GetGradNode(), grad_node0); + const paddle::small_vector, + egr::kSlotSmallVectorSize>& node1_metas = + grad_node1->OutputMeta(); + const auto& node1_meta = node1_metas[0]; + + CHECK_EQ(static_cast(node1_meta[0].GetEdge().GetEdgeRankInfo().first), + 0); + CHECK_EQ(static_cast(node1_meta[0].GetEdge().GetEdgeRankInfo().second), + 0); + CHECK_EQ(node1_meta[0].GetEdge().GetGradNode(), grad_node0); } } @@ -232,16 +236,19 @@ TEST(Forward, BranchedNodes) { // 2. TensorWrapper: No TensorWrapper for ScaleNode // 3. NextEdges // Node 1 -> Node 0 - const std::vector>& node1_edges = grad_node1->GetEdges(); - const Edge& node1_edge = node1_edges[0][0]; + const paddle::small_vector, kSlotSmallVectorSize>& + node1_metas = grad_node1->OutputMeta(); + const Edge& node1_edge = node1_metas[0][0].GetEdge(); CHECK_EQ(static_cast(node1_edge.GetEdgeRankInfo().first), 0); CHECK_EQ(static_cast(node1_edge.GetEdgeRankInfo().second), 0); CHECK_EQ(node1_edge.GetGradNode(), grad_node0); // Node 2 -> Node 0 - const std::vector>& node2_edges = grad_node2->GetEdges(); - const Edge& node2_edge = node2_edges[0][0]; + const paddle::small_vector, + egr::kSlotSmallVectorSize>& node2_metas = + grad_node2->OutputMeta(); + const Edge& node2_edge = node2_metas[0][0].GetEdge(); CHECK_EQ(static_cast(node2_edge.GetEdgeRankInfo().first), 0); CHECK_EQ(static_cast(node2_edge.GetEdgeRankInfo().second), 0); diff --git a/paddle/fluid/eager/tests/task_tests/grad_test.cc b/paddle/fluid/eager/tests/task_tests/grad_test.cc index 7e64c65d82..72a94b40ed 100644 --- a/paddle/fluid/eager/tests/task_tests/grad_test.cc +++ b/paddle/fluid/eager/tests/task_tests/grad_test.cc @@ -87,7 +87,7 @@ TEST(Grad, SingleNodeEmptyGrad) { // grad_node Add Edges std::vector res = {auto_grad_meta1}; - node0_ptr->AddEdges(&res, 0); + node0_ptr->SetGradOutMeta(leaf_tensor, 0); } std::vector outs = {output_tensor}; @@ -150,7 +150,7 @@ TEST(Grad, SingleNodeCustomGrad) { auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetStopGradient(false); std::vector res = {auto_grad_meta1}; - node0_ptr->AddEdges(&res, 0); + node0_ptr->SetGradOutMeta(leaf_tensor, 0); } auto result = Grad(target_tensors, {leaf_tensor}, grad_tensors); @@ -207,12 +207,12 @@ TEST(Grad, LinearNodes) { auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetStopGradient(false); // Connect Node0 -> Node1 via Edge - auto meta0 = egr::AutogradMeta(); - meta0.SetStopGradient(false); - meta0.SetSingleOutRankWithSlot(0, 0); - meta0.SetGradNode(node1_ptr); - std::vector res0 = {&meta0}; - node0_ptr->AddEdges(&res0, 0); + auto tmp_tensor = paddle::experimental::Tensor(); + auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor); + meta0->SetStopGradient(false); + meta0->SetSingleOutRankWithSlot(0, 0); + meta0->SetGradNode(node1_ptr); + node0_ptr->SetGradOutMeta(tmp_tensor, 0); AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); // Connect Tensor and AccumulationNode via AutoGradMeta @@ -224,8 +224,7 @@ TEST(Grad, LinearNodes) { auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetStopGradient(false); - std::vector res1 = {auto_grad_meta1}; - node1_ptr->AddEdges(&res1, 0); + node1_ptr->SetGradOutMeta(leaf_tensor, 0); } // Use Empty Grad Tensor @@ -304,20 +303,20 @@ TEST(Grad, WithAccumulation) { auto_grad_meta1->SetStopGradient(false); // Connect Node0 -> Node2 via Edge - auto meta0 = egr::AutogradMeta(); - meta0.SetStopGradient(false); - meta0.SetSingleOutRankWithSlot(0, 0); - meta0.SetGradNode(node2_ptr); - std::vector res0 = {&meta0}; - node0_ptr->AddEdges(&res0, 0); + auto tmp_tensor0 = paddle::experimental::Tensor(); + auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor0); + meta0->SetStopGradient(false); + meta0->SetSingleOutRankWithSlot(0, 0); + meta0->SetGradNode(node2_ptr); + node0_ptr->SetGradOutMeta(tmp_tensor0, 0); // Connect Node1 -> Node2 via Edge - auto meta1 = egr::AutogradMeta(); - meta1.SetStopGradient(false); - meta1.SetSingleOutRankWithSlot(0, 0); - meta1.SetGradNode(node2_ptr); - std::vector res1 = {&meta1}; - node1_ptr->AddEdges(&res1, 0); + auto tmp_tensor1 = paddle::experimental::Tensor(); + auto meta1 = EagerUtils::autograd_meta(&tmp_tensor1); + meta1->SetStopGradient(false); + meta1->SetSingleOutRankWithSlot(0, 0); + meta1->SetGradNode(node2_ptr); + node1_ptr->SetGradOutMeta(tmp_tensor1, 0); AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor); // Connect Tensor and AccumulationNode via AutoGradMeta @@ -329,8 +328,7 @@ TEST(Grad, WithAccumulation) { auto_grad_meta2->SetSingleOutRankWithSlot(0, 0); auto_grad_meta2->SetStopGradient(false); - std::vector res2 = {auto_grad_meta2}; - node2_ptr->AddEdges(&res2, 0); + node2_ptr->SetGradOutMeta(leaf_tensor, 0); } auto result = Grad(target_tensors, {leaf_tensor}, grad_tensors); diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index 2c53fc89f6..855fe526c1 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -110,21 +110,20 @@ TEST(RetainGrad, HookBeforeRetainGrad) { paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor(); { // AccumulationNode Hook: +3 + auto tmp_tensor0 = paddle::experimental::Tensor(); + auto auto_grad_meta = EagerUtils::autograd_meta(&tmp_tensor0); - auto auto_grad_meta = std::make_shared(); - - auto acc_node_ptr = - std::make_shared(auto_grad_meta.get()); + auto acc_node_ptr = std::make_shared(auto_grad_meta); auto_grad_meta->SetStopGradient(false); auto_grad_meta->SetGradNode(acc_node_ptr); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); - std::vector res = {auto_grad_meta.get()}; - scale_node_ptr->AddEdges(&res, 0); + std::vector res = {auto_grad_meta}; + scale_node_ptr->SetGradOutMeta(tmp_tensor0, 0); leaf_tensor.set_autograd_meta( std::dynamic_pointer_cast( - auto_grad_meta)); + tmp_tensor0.mutable_autograd_meta())); egr_utils_api::RegisterGradientHookForTensor( leaf_tensor, std::make_shared(hook_function)); @@ -181,19 +180,17 @@ TEST(RetainGrad, HookAfterRetainGrad) { paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor(); { // AccumulationNode Hook: +3 - - auto auto_grad_meta = std::make_shared(); - auto acc_node_ptr = - std::make_shared(auto_grad_meta.get()); + auto tmp_tensor0 = paddle::experimental::Tensor(); + auto auto_grad_meta = EagerUtils::autograd_meta(&tmp_tensor0); + auto acc_node_ptr = std::make_shared(auto_grad_meta); auto_grad_meta->SetGradNode(acc_node_ptr); auto_grad_meta->SetStopGradient(false); - std::vector res = {auto_grad_meta.get()}; - scale_node_ptr->AddEdges(&res, 0); + scale_node_ptr->SetGradOutMeta(tmp_tensor0, 0); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); leaf_tensor.set_autograd_meta( std::dynamic_pointer_cast( - auto_grad_meta)); + tmp_tensor0.mutable_autograd_meta())); egr_utils_api::RegisterGradientHookForTensor( leaf_tensor, std::make_shared(hook_function)); diff --git a/paddle/fluid/eager/to_static/run_program_op_func.h b/paddle/fluid/eager/to_static/run_program_op_func.h index 416739bbbb..6b0a848350 100644 --- a/paddle/fluid/eager/to_static/run_program_op_func.h +++ b/paddle/fluid/eager/to_static/run_program_op_func.h @@ -69,9 +69,6 @@ inline void run_program_dygraph_function( grad_node->SetGradOutMeta(params, /*slot id*/ 1); grad_node->SetGradInMeta(deref_out, 0); - // Set Next Edges - grad_node->AddEdges(&p_autograd_x, /*slot id*/ 0); - grad_node->AddEdges(&p_autograd_params, /*slot id*/ 1); egr::EagerUtils::SetOutRankWithSlot(&p_autograd_outs, 0); diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index 180e18f22e..fe1cdefb7d 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -364,12 +364,16 @@ class GradNodeRunProgram : public egr::GradNodeBase { ~GradNodeRunProgram() override = default; // Functor: perform backward computations - virtual std::vector> operator()( - std::vector> &grads, // NOLINT - bool create_graph, bool is_new_grad) override { + virtual paddle::small_vector, + egr::kSlotSmallVectorSize> + operator()(paddle::small_vector, + egr::kSlotSmallVectorSize> &grads, // NOLINT + bool create_graph, + bool is_new_grad) override { VLOG(3) << "Running Eager Backward Node: GradNodeRunProgram"; - std::vector> hooked_grads = - GradNodeRunProgram::ApplyGradientHooks(grads); + paddle::small_vector, + egr::kSlotSmallVectorSize> + hooked_grads = GradNodeRunProgram::ApplyGradientHooks(grads); PADDLE_ENFORCE_EQ(hooked_grads.size(), 1, paddle::platform::errors::InvalidArgument( "The hooked_grads.size() of RunProgramGradOp should " diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 66d877f06e..033af5c496 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -441,8 +441,10 @@ std::shared_ptr EagerUtils::GetGradAccumulationNode( } void EagerUtils::FillZeroForEmptyGradInputs( - std::vector>* in_grads, - const std::vector>& grad_in_metas) { + paddle::small_vector, + kSlotSmallVectorSize>* in_grads, + const paddle::small_vector, kSlotSmallVectorSize>& + grad_in_metas) { for (size_t i = 0; i < in_grads->size(); i++) { for (size_t j = 0; j < (*in_grads)[i].size(); j++) { paddle::experimental::Tensor& grad = (*in_grads)[i][j]; diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index 51a322c852..ef2b1baac6 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -234,8 +234,10 @@ class EagerUtils { * Fill Zero * **/ static void FillZeroForEmptyGradInputs( - std::vector>* out_grads, - const std::vector>& grad_out_metas); + paddle::small_vector, + kSlotSmallVectorSize>* out_grads, + const paddle::small_vector, + kSlotSmallVectorSize>& grad_out_metas); }; } // namespace egr diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 4d7b50943d..2a8bedfe32 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -406,12 +406,9 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, if (slot_map[0].find(i) != slot_map[0].end()) { grad_node->SetGradOutMeta(in_tensors, slot_map[0][i]); - grad_node->AddEdges(&ins_auto_grad_metas[i], slot_map[0][i]); } else { grad_node->SetGradOutMeta(in_tensors, ins_auto_grad_metas.size() - 1 - no_grad_cnt); - grad_node->AddEdges(&ins_auto_grad_metas[i], - ins_auto_grad_metas.size() - 1 - no_grad_cnt); no_grad_cnt++; } } diff --git a/paddle/fluid/pybind/eager_py_layer.cc b/paddle/fluid/pybind/eager_py_layer.cc index 605056e7af..46381a9e9e 100644 --- a/paddle/fluid/pybind/eager_py_layer.cc +++ b/paddle/fluid/pybind/eager_py_layer.cc @@ -346,10 +346,8 @@ PyObject* pylayer_method_apply(PyObject* cls, PyObject* args, for (auto t : inputs_tensor[i]) { grad_node->SetGradOutMeta(*t, i); } - grad_node->AddEdges(&inputs_autograd_meta[i], i); } else { grad_node->SetGradOutMeta(*inputs_tensor[i][0], i); - grad_node->AddEdges(inputs_autograd_meta[i][0], i); } } -- GitLab