diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 3a9bac833d5889fc00844e8ac790a2f9ab32efef..817a0de6e0ca9594d6e9e09d41538071def1b47f 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -2043,6 +2043,32 @@ static std::string GenerateSingleOpBase( const std::string& ins_name = "ins" + std::to_string(*outs_size); const std::string& outs_name = "outs" + std::to_string(*outs_size); const std::string& attrs_name = "attrs_map" + std::to_string(*outs_size); + const std::string& hooked_grads = "hooked_grads" + std::to_string(*outs_size); + + // [Generation] Get Full Zero + std::string fill_zero_str = ""; + if (ops_to_fill_zero_for_empty_grads.count(fwd_op_type)) { + for (auto iter : grad_ins) { + const std::string& grad_input_name = iter.first; + if (grad_ins_grad_slotname_map.count(grad_input_name)) { + size_t fwd_output_position = fwd_outputs_name_pos_map.at( + grad_ins_grad_slotname_map.at(grad_input_name)); + const char* FILL_ZERO_TEMPLATE = + "egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[%d], " + "this->InputMeta()[%d]);\n"; + fill_zero_str += paddle::string::Sprintf( + FILL_ZERO_TEMPLATE, fwd_output_position, fwd_output_position); + } + } + } + generated_grad_function_body += fill_zero_str; + generated_grad_function_body += + " paddle::small_vector, " + "egr::kSlotSmallVectorSize> " + + hooked_grads + + " = " + "GradNode" + + fwd_op_type + "::ApplyGradientHooks(grads);\n"; // [Generation] Get Ins Map std::unordered_set dispensable_input_name_set; @@ -2117,16 +2143,16 @@ static std::string GenerateSingleOpBase( size_t fwd_output_position = fwd_outputs_name_pos_map.at( grad_ins_grad_slotname_map.at(grad_input_name)); const char* GRAD_INS_GRAD_CONTENT_TEMPLATE = - "{ \"%s\", egr::EagerUtils::TrySyncToVars(hooked_grads[%d]) },"; + "{ \"%s\", egr::EagerUtils::TrySyncToVars(%s[%d]) },"; ins_contents_str += paddle::string::Sprintf( - GRAD_INS_GRAD_CONTENT_TEMPLATE, grad_input_name, fwd_output_position); + GRAD_INS_GRAD_CONTENT_TEMPLATE, grad_input_name, hooked_grads, + fwd_output_position); if (!backward_inplace_map.empty() && backward_inplace_map.count(grad_input_name)) { process_backward_inplace = true; - const char* GRAD_INS_HOOKED_GRAD_TEMPLATE = - "auto& %s = hooked_grads[%d][0];"; + const char* GRAD_INS_HOOKED_GRAD_TEMPLATE = "auto& %s = %s[%d][0];"; std::string hooked_grads_tensor_str = paddle::string::Sprintf( - GRAD_INS_HOOKED_GRAD_TEMPLATE, bwd_inplace_input_name, + GRAD_INS_HOOKED_GRAD_TEMPLATE, bwd_inplace_input_name, hooked_grads, fwd_output_position); const char* GRAD_INS_GRAD_TENSOR_TEMPLATE = "grads[%d][0]"; std::string grads_tensor_str = paddle::string::Sprintf( @@ -2239,10 +2265,10 @@ static std::string GenerateSingleOpBase( const char* GRAD_OUTS_CONTENT_TEMPLATE = " if((!out_metas[%d].empty()) && " "(!(out_metas[%d][0].IsStopGradient()))){ \n %s.insert({ \"%s\", " - "egr::EagerUtils::TrySyncToVars(hooked_grads[%d])});} \n "; + "egr::EagerUtils::TrySyncToVars(%s[%d])});} \n "; outs_contents_str += paddle::string::Sprintf( GRAD_OUTS_CONTENT_TEMPLATE, grads_position, grads_position, - outs_name, grad_output_name, grads_position); + outs_name, grad_output_name, hooked_grads, grads_position); } else { if (dispensable_input_name_set.count(fwd_name) && @@ -2561,9 +2587,6 @@ static std::string GenerateGradNodeCCContents( } const char* BWD_RETURN_TEMPLATE = - " paddle::small_vector, " - "egr::kSlotSmallVectorSize> hooked_grads = " - "GradNode%s::ApplyGradientHooks(grads);\n" " const auto& out_metas = OutputMeta();\n" " paddle::small_vector, " "egr::kSlotSmallVectorSize> outputs(%d);\n" @@ -2571,9 +2594,8 @@ static std::string GenerateGradNodeCCContents( " if(NeedComplexToRealConversion()) " "HandleComplexGradToRealGrad(&outputs);\n" " return outputs;\n"; - generated_grad_function_body = - paddle::string::Sprintf(BWD_RETURN_TEMPLATE, fwd_op_type, in_vars.size(), - generated_grad_function_body); + generated_grad_function_body = paddle::string::Sprintf( + BWD_RETURN_TEMPLATE, in_vars.size(), generated_grad_function_body); // [Generation] Get Full Grad Function const char* GRAD_FUNCTION_TEMPLATE = @@ -2584,17 +2606,9 @@ static std::string GenerateGradNodeCCContents( "egr::kSlotSmallVectorSize>& grads, bool " "create_graph, bool is_new_grad) {\n" "%s" - "%s" "\n}"; - std::string fill_zero_str = ""; - if (ops_to_fill_zero_for_empty_grads.count(fwd_op_type)) { - fill_zero_str = - "egr::EagerUtils::FillZeroForEmptyGradInputs(&grads, " - "this->InputMeta());\n"; - } - std::string grad_function_str = - paddle::string::Sprintf(GRAD_FUNCTION_TEMPLATE, fwd_op_type, - fill_zero_str, generated_grad_function_body); + std::string grad_function_str = paddle::string::Sprintf( + GRAD_FUNCTION_TEMPLATE, fwd_op_type, generated_grad_function_body); VLOG(6) << "Generated returns"; diff --git a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc index bcb9820419d0f0dee8c37ee44b39f044a8331eb2..551262d259e08f6eba54d5fd3620753ca8113b2e 100644 --- a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc @@ -250,7 +250,7 @@ TEST(EagerUtils, GetGradAccumulationNode) { ASSERT_ANY_THROW(egr::EagerUtils::GetGradAccumulationNode(t0)); } -TEST(EagerUtils, FillZeroForEmptyGradInputs) { +TEST(EagerUtils, FillZeroForEmptyOptionalGradInput) { paddle::small_vector, egr::kSlotSmallVectorSize> grads = {std::vector(1)}; @@ -263,7 +263,7 @@ TEST(EagerUtils, FillZeroForEmptyGradInputs) { slot_metas[0][0].SetTensorMeta(tensor_meta); slot_metas[0][0].SetPlace(phi::CPUPlace()); - EagerUtils::FillZeroForEmptyGradInputs(&grads, slot_metas); + EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[0], slot_metas[0]); eager_test::CompareTensorWithValue(grads[0][0], 0.0); } diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index fe1cdefb7d572aa2fcccccf3980b9f9c973a8ea4..5a730e4dbf164d98f20936bcd5cd0f80d9ab4c56 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -379,8 +379,8 @@ class GradNodeRunProgram : public egr::GradNodeBase { "The hooked_grads.size() of RunProgramGradOp should " "be equal to 1.")); - egr::EagerUtils::FillZeroForEmptyGradInputs(&hooked_grads, - this->InputMeta()); + egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&hooked_grads[0], + this->InputMeta()[0]); VLOG(3) << "hooked_grads[0].size() : " << hooked_grads[0].size(); std::vector x_grad; std::vector params_grad; diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 9ccd91ca6573389e61305b973faae83f55151a2d..7d9554c52eb6c94f31d77afddb93a99cf37c4682 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -467,26 +467,16 @@ std::shared_ptr EagerUtils::GetGradAccumulationNode( } } -void EagerUtils::FillZeroForEmptyGradInputs( - paddle::small_vector, - kSlotSmallVectorSize>* in_grads, - const paddle::small_vector, kSlotSmallVectorSize>& - grad_in_metas) { +void EagerUtils::FillZeroForEmptyOptionalGradInput( + std::vector* in_grads, + const std::vector& grad_in_metas) { for (size_t i = 0; i < in_grads->size(); i++) { - for (size_t j = 0; j < (*in_grads)[i].size(); j++) { - paddle::experimental::Tensor& grad = (*in_grads)[i][j]; - if (!grad.initialized()) { - const GradSlotMeta& grad_in_meta = grad_in_metas[i][j]; - PADDLE_ENFORCE( - grad_in_meta.HasTensorMeta(), - paddle::platform::errors::Fatal( - "Unable to fill empty grad inputs due to empty GradSlotMeta")); - const auto& tensor_meta = grad_in_meta.GetTensorMeta(); - auto tensor_with_zero = paddle::experimental::full( - phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, - grad_in_meta.GetPlace()); - grad.set_impl(tensor_with_zero.impl()); - } + paddle::experimental::Tensor& grad = (*in_grads)[i]; + if (!grad.initialized() && grad_in_metas[i].HasTensorMeta()) { + auto tensor_with_zero = paddle::experimental::full( + phi::vectorize(grad_in_metas[i].GetTensorMeta().dims), 0.0, + grad_in_metas[i].GetTensorMeta().dtype, grad_in_metas[i].GetPlace()); + grad.set_impl(tensor_with_zero.impl()); } } } diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index 63baebca53c3766435f7858d6a0532b8804a6eb2..c6389e998315c8a58e07cf6785d72ce3000d9880 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -236,11 +236,9 @@ class EagerUtils { /** * Fill Zero * **/ - static void FillZeroForEmptyGradInputs( - paddle::small_vector, - kSlotSmallVectorSize>* out_grads, - const paddle::small_vector, - kSlotSmallVectorSize>& grad_out_metas); + static void FillZeroForEmptyOptionalGradInput( + std::vector* in_grads, + const std::vector& grad_in_metas); static void FillZeroForEmptyGradInput(paddle::experimental::Tensor* in_grad, const GradSlotMeta& grad_in_meta); static void FillZeroForEmptyOptionalGradInput(