From 4427f1b1726bbe148d2c9663b841f939fd0eeda8 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Thu, 19 May 2022 14:52:21 +0800 Subject: [PATCH] [Phi] Change the output format of C++ backward api (Part2) (#42545) * change the output format of C++ backward api * fix merge conflict * fix sparse api code auto-gen * fix eager_gen bug * fix bug of output is null * fix bug of conv2d_grad_impl * fix optional grad * fix bug of eager-gen double_grad * fix bug * fix multiply_double_grad bug * fix bug of higher order derivative * fix bug of FillZeroForEmptyGradInput * remove redundant vector in grad_node * fix bug of test_deformable_conv_v1_op * fix bug of test_deformable_conv_v1_op * some refacotr --- .../final_state_generator/codegen_utils.py | 9 +- .../final_state_generator/eager_gen.py | 83 ++++++++++++------- paddle/fluid/eager/grad_node_info.cc | 2 + paddle/fluid/eager/utils.cc | 40 ++++++++- paddle/fluid/eager/utils.h | 7 ++ paddle/phi/api/lib/kernel_dispatch.h | 7 +- paddle/phi/kernels/activation_grad_kernel.h | 3 +- paddle/phi/kernels/funcs/activation_functor.h | 17 ++-- .../phi/kernels/impl/activation_grad_impl.h | 8 +- python/paddle/utils/code_gen/backward.yaml | 1 + 10 files changed, 127 insertions(+), 50 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py index ca4b1ff686e..3f4fcc4608e 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py @@ -24,10 +24,11 @@ import os ops_to_fill_zero_for_empty_grads = set([ "split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad", "sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad", - "add_triple_grad", "multiply_double_grad", "multiply_triple_grad", - "conv2d_grad_grad", "batch_norm_double_grad", "tanh_double_grad", - "tanh_triple_grad", "subtract_double_grad", "divide_double_grad", - "log_double_grad", "elu_double_grad", "leaky_relu_double_grad" + "add_triple_grad", "multiply_grad", "multiply_double_grad", + "multiply_triple_grad", "conv2d_grad_grad", "batch_norm_double_grad", + "tanh_double_grad", "tanh_triple_grad", "subtract_double_grad", + "divide_double_grad", "log_double_grad", "elu_double_grad", + "leaky_relu_double_grad" ]) # For API dispatch used at python-level diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index de1fbce6dd9..1ce5216ddce 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -337,14 +337,14 @@ AMP_LOGIC_TEMPLATE = \ CREATE_PLAIN_OPTIONAL_TENSOR_TEMPLATE = \ """ - paddle::optional {}_optional = paddle::none; - if({}.initialized()) {}_optional = paddle::make_optional({}); + paddle::optional {}_optional = paddle::none; + if({}.initialized()) {}_optional = paddle::make_optional({}); """ CREATE_RECOVER_OPTIONAL_TENSOR_TEMPLATE = \ """ - paddle::optional {}_optional = paddle::none; - if( {}.impl() ) {}_optional = paddle::make_optional({}); + paddle::optional {}_optional = paddle::none; + if( {}.impl() ) {}_optional = paddle::make_optional({}); """ CHECK_NAN_AND_INF_TEMPLATE = \ @@ -738,9 +738,14 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): num_outputs = len(forward_outputs_position_map.keys()) for name, (_, pos) in forward_outputs_position_map.items(): output_autograd_meta_name = GetAutoGradMetaName(name) + set_out_rank = f"""{indent}if ({output_autograd_meta_name}) {{ +{indent} egr::EagerUtils::SetOutRankWithSlot({output_autograd_meta_name}, {pos}); +{indent}}}""" + + set_history = f"""{indent}if ({output_autograd_meta_name}) {{ +{indent} egr::EagerUtils::SetHistory({output_autograd_meta_name}, grad_node); +{indent}}}""" - set_out_rank = f"{indent}egr::EagerUtils::SetOutRankWithSlot({output_autograd_meta_name}, {pos});" - set_history = f"{indent}egr::EagerUtils::SetHistory({output_autograd_meta_name}, grad_node);" set_grad_in_meta = f"{indent}grad_node->SetGradInMeta({name}, {pos});" set_retain_grad = f"{indent}egr::EagerUtils::CheckAndRetainGrad({name});" @@ -1265,7 +1270,17 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): # Fill Grad Ins with Zero fill_zero_str = "" if backward_api_name in ops_to_fill_zero_for_empty_grads: - fill_zero_str = f"{indent}egr::EagerUtils::FillZeroForEmptyGradInputs(&grads, this->InputMeta());\n" + fill_zero_str = f"{indent}const auto& input_metas = this->InputMeta();\n" + for name, (ttype, fwd_position, + grad_api_position) in backward_grad_inputs_map.items(): + if name in self.optional_inputs: + if IsPlainTensorType(ttype): + fill_zero_str += f"{indent}egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[{fwd_position}][0], input_metas[{fwd_position}][0]);\n" + else: + if IsPlainTensorType(ttype): + fill_zero_str += f"{indent}egr::EagerUtils::FillZeroForEmptyGradInput(&grads[{fwd_position}][0], input_metas[{fwd_position}][0]);\n" + else: + fill_zero_str += f"{indent}egr::EagerUtils::FillZeroForEmptyGradInput(&grads[{fwd_position}], input_metas[{fwd_position}]);\n" # Grad Ins from TensorWrappers for name, (_, is_fwd_input, @@ -1322,40 +1337,46 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): get_grad_in_args_str = "\n".join(get_grad_in_args_list) - # Grad Outputs - for name, (ttype, fwd_position, - grad_api_position) in backward_grad_outputs_map.items(): - transformed_tensor_name = self.TransformToNextGradName(name) - if IsPlainTensorType(ttype): - grad_api_args.append(f"api_output[{fwd_position}][0]") - else: - assert IsVectorTensorType(ttype) - grad_api_args.append(f"api_output[{fwd_position}]") - - grad_api_args_str = ", ".join(grad_api_args) - # Grad Function Call String slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) grad_api_namespace = f"paddle::experimental::{namespace}" grad_function_call_str = f""" const auto& out_metas = OutputMeta(); paddle::small_vector, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs}); - paddle::small_vector, egr::kSlotSmallVectorSize> api_output({slot_num_bwd_outputs}); for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{ returns[i].resize(out_metas[i].size()); - if(returns[i].size() == 0) {{ - api_output[i].reserve(1); - api_output[i].push_back(nullptr); - continue; - }} - api_output[i].reserve(returns[i].size()); - for (size_t j = 0; j < returns[i].size(); ++j) {{ - api_output[i].push_back(&returns[i][j]); - }} }} """ - grad_function_call_str = grad_function_call_str + f"{indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str});" + # Grad Outputs + out_index = -1 + for name, (ttype, fwd_position, + grad_api_position) in backward_grad_outputs_map.items(): + transformed_tensor_name = self.TransformToNextGradName(name) + out_index = out_index + 1 + grad_api_args.append(f"api_output_{out_index}") + + if IsPlainTensorType(ttype): + grad_function_call_str += f""" + auto* api_output_{out_index} = (out_metas[{fwd_position}].empty() || out_metas[{fwd_position}][0].IsStopGradient()) ? nullptr : &returns[{fwd_position}][0];""" + + else: + assert IsVectorTensorType(ttype) + grad_function_call_str += f""" + std::vector api_output_{out_index}; + api_output_{out_index}.reserve(returns[{fwd_position}].size()); + for (size_t i = 0; i < returns[{fwd_position}].size(); ++i) {{ + if (out_metas[{fwd_position}].empty() || out_metas[{fwd_position}][i].IsStopGradient()) {{ + api_output_{out_index}.push_back(nullptr); + }} else {{ + api_output_{out_index}.push_back(&returns[{fwd_position}][i]); + }} + }}""" + + grad_api_args_str = ", ".join(grad_api_args) + + grad_function_call_str = grad_function_call_str + f""" +{indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str});""" # Check Nan and Inf check_nan_inf_str = CHECK_NAN_AND_INF_TEMPLATE.format(backward_api_name, @@ -1425,7 +1446,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): if IsPlainTensorType(rtype): output_autograd_meta = f""" auto& {transformed_tensor_name} = returns[{pos}][0]; - egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});""" + egr::AutogradMeta* {output_autograd_meta_name} = returns[{pos}][0].initialized() ? egr::EagerUtils::autograd_meta(&{transformed_tensor_name}) : nullptr;""" else: assert IsVectorTensorType(rtype) diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 2d4db8cb529..af387bb3238 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -218,6 +218,8 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in, // Set Stop_gradient if (fwd_in_meta) { meta.SetStopGradient(fwd_in_meta->StopGradient()); + } else { + meta.SetStopGradient(true); } // Set Adj Edges if (fwd_in_meta && !fwd_in_meta->StopGradient()) { diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 65294a8eb7a..f253c4cb513 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -454,16 +454,48 @@ void EagerUtils::FillZeroForEmptyGradInputs( grad_in_meta.HasTensorMeta(), paddle::platform::errors::Fatal( "Unable to fill empty grad inputs due to empty GradSlotMeta")); - const auto& tensor_meta = grad_in_meta.GetTensorMeta(); - phi::Place place = grad_in_meta.GetPlace(); - auto tensor_with_zero = paddle::experimental::full( - phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, place); + phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, + grad_in_meta.GetPlace()); grad.set_impl(tensor_with_zero.impl()); } } } } +void EagerUtils::FillZeroForEmptyGradInput( + paddle::experimental::Tensor* in_grad, const GradSlotMeta& grad_in_meta) { + if (!in_grad->initialized()) { + PADDLE_ENFORCE( + grad_in_meta.HasTensorMeta(), + paddle::platform::errors::Fatal( + "Unable to fill empty grad inputs due to empty GradSlotMeta")); + const auto& tensor_meta = grad_in_meta.GetTensorMeta(); + auto tensor_with_zero = + paddle::experimental::full(phi::vectorize(tensor_meta.dims), 0.0, + tensor_meta.dtype, grad_in_meta.GetPlace()); + in_grad->set_impl(tensor_with_zero.impl()); + } +} + +void EagerUtils::FillZeroForEmptyOptionalGradInput( + paddle::experimental::Tensor* in_grad, const GradSlotMeta& grad_in_meta) { + if (!in_grad->initialized() && grad_in_meta.HasTensorMeta()) { + const auto& tensor_meta = grad_in_meta.GetTensorMeta(); + auto tensor_with_zero = + paddle::experimental::full(phi::vectorize(tensor_meta.dims), 0.0, + tensor_meta.dtype, grad_in_meta.GetPlace()); + in_grad->set_impl(tensor_with_zero.impl()); + } +} + +void EagerUtils::FillZeroForEmptyGradInput( + std::vector* in_grads, + const std::vector& grad_in_metas) { + for (size_t i = 0; i < in_grads->size(); i++) { + FillZeroForEmptyGradInput(&in_grads->at(i), grad_in_metas[i]); + } +} + } // namespace egr diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index ef2b1baac66..b96244f0d13 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -238,6 +238,13 @@ class EagerUtils { kSlotSmallVectorSize>* out_grads, const paddle::small_vector, kSlotSmallVectorSize>& grad_out_metas); + static void FillZeroForEmptyGradInput(paddle::experimental::Tensor* in_grad, + const GradSlotMeta& grad_in_meta); + static void FillZeroForEmptyOptionalGradInput( + paddle::experimental::Tensor* in_grad, const GradSlotMeta& grad_in_meta); + static void FillZeroForEmptyGradInput( + std::vector* in_grads, + const std::vector& grad_in_metas); }; } // namespace egr diff --git a/paddle/phi/api/lib/kernel_dispatch.h b/paddle/phi/api/lib/kernel_dispatch.h index 9f2ad6c62c7..29254a0486d 100644 --- a/paddle/phi/api/lib/kernel_dispatch.h +++ b/paddle/phi/api/lib/kernel_dispatch.h @@ -109,7 +109,12 @@ struct KernelKeyParser : ArgsIterator { } } - void operator()(const Tensor& x) { AssignKernelKeySet(*x.impl()); } + void operator()(const Tensor& x) { + const auto* tensor = x.impl().get(); + if (tensor) { + AssignKernelKeySet(*tensor); + } + } void operator()(const std::vector& x) { const phi::TensorBase& tensor = *x.at(0).impl(); diff --git a/paddle/phi/kernels/activation_grad_kernel.h b/paddle/phi/kernels/activation_grad_kernel.h index 084843c31cf..0eb54e8d638 100644 --- a/paddle/phi/kernels/activation_grad_kernel.h +++ b/paddle/phi/kernels/activation_grad_kernel.h @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/infermeta/unary.h" +#include "paddle/utils/optional.h" namespace phi { @@ -136,7 +137,7 @@ void SigmoidTripleGradKernel(const Context& dev_ctx, const DenseTensor& dout, const DenseTensor& ddx, const DenseTensor& d_dout_new, - const DenseTensor& d_ddout, + paddle::optional d_ddout, DenseTensor* d_out_new, DenseTensor* d_dout, DenseTensor* d_ddx); diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h index b75477a1af9..315d540541f 100644 --- a/paddle/phi/kernels/funcs/activation_functor.h +++ b/paddle/phi/kernels/funcs/activation_functor.h @@ -1428,16 +1428,19 @@ struct SigmoidTripleGradFunctor : public BaseActivationFunctor { GET_DATA_SAFELY(Out, "Input", "Out", "SigmoidTripleGrad")); auto dout = EigenVector::Flatten( GET_DATA_SAFELY(dOut, "Input", "DOut", "SigmoidTripleGrad")); - auto d_ddOut = EigenVector::Flatten( - GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "SigmoidTripleGrad")); auto d_dOutNew = EigenVector::Flatten(GET_DATA_SAFELY( d_dOut_New, "Input", "D_DOut_New", "SigmoidTripleGrad")); if (d_Out_New) { auto d_OutNew = EigenVector::Flatten(GET_DATA_SAFELY( d_Out_New, "Output", "D_OutNew", "SigmoidTripleGrad")); - d_OutNew.device(*d) = (ddx - static_cast(2) * out * ddx) * d_ddOut - - static_cast(2) * dout * ddx * d_dOutNew; + d_OutNew.device(*d) = -static_cast(2) * dout * ddx * d_dOutNew; + if (d_DDOut) { + auto d_ddOut = EigenVector::Flatten( + GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "SigmoidTripleGrad")); + d_OutNew.device(*d) = + (ddx - static_cast(2) * out * ddx) * d_ddOut + d_OutNew; + } } if (d_d_Out) { auto d_dOut = EigenVector::Flatten( @@ -1449,8 +1452,12 @@ struct SigmoidTripleGradFunctor : public BaseActivationFunctor { auto d_ddx = EigenVector::Flatten( GET_DATA_SAFELY(d_DDx, "Output", "D_DDx", "SigmoidTripleGrad")); d_ddx.device(*d) = - (static_cast(1) - out) * out * d_ddOut + (static_cast(1) - static_cast(2) * out) * dout * d_dOutNew; + if (d_DDOut) { + auto d_ddOut = EigenVector::Flatten( + GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "SigmoidTripleGrad")); + d_ddx.device(*d) = d_ddx + (static_cast(1) - out) * out * d_ddOut; + } } } static constexpr ActBwdOpFwdDeps FwdDeps() { diff --git a/paddle/phi/kernels/impl/activation_grad_impl.h b/paddle/phi/kernels/impl/activation_grad_impl.h index 2f35acc0950..12fcac7d62d 100644 --- a/paddle/phi/kernels/impl/activation_grad_impl.h +++ b/paddle/phi/kernels/impl/activation_grad_impl.h @@ -265,7 +265,7 @@ void SigmoidTripleGradKernel(const Context& dev_ctx, const DenseTensor& dout, const DenseTensor& ddx, const DenseTensor& d_dout_new, - const DenseTensor& d_ddout, + paddle::optional d_ddout, DenseTensor* d_out_new, DenseTensor* d_dout, DenseTensor* d_ddx) { @@ -274,11 +274,11 @@ void SigmoidTripleGradKernel(const Context& dev_ctx, dev_ctx.template Alloc(d_dout); } if (d_out_new) { - d_dout->Resize(out.dims()); + d_out_new->Resize(out.dims()); dev_ctx.template Alloc(d_out_new); } if (d_ddx) { - d_dout->Resize(ddx.dims()); + d_ddx->Resize(ddx.dims()); dev_ctx.template Alloc(d_ddx); } funcs::SigmoidTripleGradFunctor functor; @@ -286,7 +286,7 @@ void SigmoidTripleGradKernel(const Context& dev_ctx, &out, &ddx, &dout, - &d_ddout, + d_ddout.get_ptr(), &d_dout_new, d_dout, d_out_new, diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index d7fb7d2611e..a720c27543c 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -1655,6 +1655,7 @@ param : [out, fwd_grad_out, grad_grad_x] kernel : func : sigmoid_triple_grad + optional : grad_grad_out_grad - backward_api : silu_grad forward : silu (Tensor x) -> Tensor(out) -- GitLab