未验证 提交 4427f1b1 编写于 作者: Z zyfncg 提交者: GitHub

[Phi] Change the output format of C++ backward api (Part2) (#42545)

* change the output format of C++ backward api

* fix merge conflict

* fix sparse api code auto-gen

* fix eager_gen bug

* fix bug of output is null

* fix bug of conv2d_grad_impl

* fix optional grad

* fix bug of eager-gen double_grad

* fix bug

* fix multiply_double_grad bug

* fix bug of higher order derivative

* fix bug of FillZeroForEmptyGradInput

* remove redundant vector in grad_node

* fix bug of test_deformable_conv_v1_op

* fix bug of test_deformable_conv_v1_op

* some refacotr
上级 892f6850
......@@ -24,10 +24,11 @@ import os
ops_to_fill_zero_for_empty_grads = set([
"split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad",
"sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad",
"add_triple_grad", "multiply_double_grad", "multiply_triple_grad",
"conv2d_grad_grad", "batch_norm_double_grad", "tanh_double_grad",
"tanh_triple_grad", "subtract_double_grad", "divide_double_grad",
"log_double_grad", "elu_double_grad", "leaky_relu_double_grad"
"add_triple_grad", "multiply_grad", "multiply_double_grad",
"multiply_triple_grad", "conv2d_grad_grad", "batch_norm_double_grad",
"tanh_double_grad", "tanh_triple_grad", "subtract_double_grad",
"divide_double_grad", "log_double_grad", "elu_double_grad",
"leaky_relu_double_grad"
])
# For API dispatch used at python-level
......
......@@ -738,9 +738,14 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
num_outputs = len(forward_outputs_position_map.keys())
for name, (_, pos) in forward_outputs_position_map.items():
output_autograd_meta_name = GetAutoGradMetaName(name)
set_out_rank = f"""{indent}if ({output_autograd_meta_name}) {{
{indent} egr::EagerUtils::SetOutRankWithSlot({output_autograd_meta_name}, {pos});
{indent}}}"""
set_history = f"""{indent}if ({output_autograd_meta_name}) {{
{indent} egr::EagerUtils::SetHistory({output_autograd_meta_name}, grad_node);
{indent}}}"""
set_out_rank = f"{indent}egr::EagerUtils::SetOutRankWithSlot({output_autograd_meta_name}, {pos});"
set_history = f"{indent}egr::EagerUtils::SetHistory({output_autograd_meta_name}, grad_node);"
set_grad_in_meta = f"{indent}grad_node->SetGradInMeta({name}, {pos});"
set_retain_grad = f"{indent}egr::EagerUtils::CheckAndRetainGrad({name});"
......@@ -1265,7 +1270,17 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
# Fill Grad Ins with Zero
fill_zero_str = ""
if backward_api_name in ops_to_fill_zero_for_empty_grads:
fill_zero_str = f"{indent}egr::EagerUtils::FillZeroForEmptyGradInputs(&grads, this->InputMeta());\n"
fill_zero_str = f"{indent}const auto& input_metas = this->InputMeta();\n"
for name, (ttype, fwd_position,
grad_api_position) in backward_grad_inputs_map.items():
if name in self.optional_inputs:
if IsPlainTensorType(ttype):
fill_zero_str += f"{indent}egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[{fwd_position}][0], input_metas[{fwd_position}][0]);\n"
else:
if IsPlainTensorType(ttype):
fill_zero_str += f"{indent}egr::EagerUtils::FillZeroForEmptyGradInput(&grads[{fwd_position}][0], input_metas[{fwd_position}][0]);\n"
else:
fill_zero_str += f"{indent}egr::EagerUtils::FillZeroForEmptyGradInput(&grads[{fwd_position}], input_metas[{fwd_position}]);\n"
# Grad Ins from TensorWrappers
for name, (_, is_fwd_input,
......@@ -1322,40 +1337,46 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
get_grad_in_args_str = "\n".join(get_grad_in_args_list)
# Grad Outputs
for name, (ttype, fwd_position,
grad_api_position) in backward_grad_outputs_map.items():
transformed_tensor_name = self.TransformToNextGradName(name)
if IsPlainTensorType(ttype):
grad_api_args.append(f"api_output[{fwd_position}][0]")
else:
assert IsVectorTensorType(ttype)
grad_api_args.append(f"api_output[{fwd_position}]")
grad_api_args_str = ", ".join(grad_api_args)
# Grad Function Call String
slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys())
grad_api_namespace = f"paddle::experimental::{namespace}"
grad_function_call_str = f"""
const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs});
paddle::small_vector<std::vector<paddle::experimental::Tensor*>, egr::kSlotSmallVectorSize> api_output({slot_num_bwd_outputs});
for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{
returns[i].resize(out_metas[i].size());
if(returns[i].size() == 0) {{
api_output[i].reserve(1);
api_output[i].push_back(nullptr);
continue;
}}
api_output[i].reserve(returns[i].size());
for (size_t j = 0; j < returns[i].size(); ++j) {{
api_output[i].push_back(&returns[i][j]);
}}
}}
"""
grad_function_call_str = grad_function_call_str + f"{indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str});"
# Grad Outputs
out_index = -1
for name, (ttype, fwd_position,
grad_api_position) in backward_grad_outputs_map.items():
transformed_tensor_name = self.TransformToNextGradName(name)
out_index = out_index + 1
grad_api_args.append(f"api_output_{out_index}")
if IsPlainTensorType(ttype):
grad_function_call_str += f"""
auto* api_output_{out_index} = (out_metas[{fwd_position}].empty() || out_metas[{fwd_position}][0].IsStopGradient()) ? nullptr : &returns[{fwd_position}][0];"""
else:
assert IsVectorTensorType(ttype)
grad_function_call_str += f"""
std::vector<paddle::experimental::Tensor*> api_output_{out_index};
api_output_{out_index}.reserve(returns[{fwd_position}].size());
for (size_t i = 0; i < returns[{fwd_position}].size(); ++i) {{
if (out_metas[{fwd_position}].empty() || out_metas[{fwd_position}][i].IsStopGradient()) {{
api_output_{out_index}.push_back(nullptr);
}} else {{
api_output_{out_index}.push_back(&returns[{fwd_position}][i]);
}}
}}"""
grad_api_args_str = ", ".join(grad_api_args)
grad_function_call_str = grad_function_call_str + f"""
{indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str});"""
# Check Nan and Inf
check_nan_inf_str = CHECK_NAN_AND_INF_TEMPLATE.format(backward_api_name,
......@@ -1425,7 +1446,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
if IsPlainTensorType(rtype):
output_autograd_meta = f"""
auto& {transformed_tensor_name} = returns[{pos}][0];
egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});"""
egr::AutogradMeta* {output_autograd_meta_name} = returns[{pos}][0].initialized() ? egr::EagerUtils::autograd_meta(&{transformed_tensor_name}) : nullptr;"""
else:
assert IsVectorTensorType(rtype)
......
......@@ -218,6 +218,8 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
// Set Stop_gradient
if (fwd_in_meta) {
meta.SetStopGradient(fwd_in_meta->StopGradient());
} else {
meta.SetStopGradient(true);
}
// Set Adj Edges
if (fwd_in_meta && !fwd_in_meta->StopGradient()) {
......
......@@ -454,16 +454,48 @@ void EagerUtils::FillZeroForEmptyGradInputs(
grad_in_meta.HasTensorMeta(),
paddle::platform::errors::Fatal(
"Unable to fill empty grad inputs due to empty GradSlotMeta"));
const auto& tensor_meta = grad_in_meta.GetTensorMeta();
phi::Place place = grad_in_meta.GetPlace();
auto tensor_with_zero = paddle::experimental::full(
phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, place);
phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype,
grad_in_meta.GetPlace());
grad.set_impl(tensor_with_zero.impl());
}
}
}
}
void EagerUtils::FillZeroForEmptyGradInput(
paddle::experimental::Tensor* in_grad, const GradSlotMeta& grad_in_meta) {
if (!in_grad->initialized()) {
PADDLE_ENFORCE(
grad_in_meta.HasTensorMeta(),
paddle::platform::errors::Fatal(
"Unable to fill empty grad inputs due to empty GradSlotMeta"));
const auto& tensor_meta = grad_in_meta.GetTensorMeta();
auto tensor_with_zero =
paddle::experimental::full(phi::vectorize(tensor_meta.dims), 0.0,
tensor_meta.dtype, grad_in_meta.GetPlace());
in_grad->set_impl(tensor_with_zero.impl());
}
}
void EagerUtils::FillZeroForEmptyOptionalGradInput(
paddle::experimental::Tensor* in_grad, const GradSlotMeta& grad_in_meta) {
if (!in_grad->initialized() && grad_in_meta.HasTensorMeta()) {
const auto& tensor_meta = grad_in_meta.GetTensorMeta();
auto tensor_with_zero =
paddle::experimental::full(phi::vectorize(tensor_meta.dims), 0.0,
tensor_meta.dtype, grad_in_meta.GetPlace());
in_grad->set_impl(tensor_with_zero.impl());
}
}
void EagerUtils::FillZeroForEmptyGradInput(
std::vector<paddle::experimental::Tensor>* in_grads,
const std::vector<GradSlotMeta>& grad_in_metas) {
for (size_t i = 0; i < in_grads->size(); i++) {
FillZeroForEmptyGradInput(&in_grads->at(i), grad_in_metas[i]);
}
}
} // namespace egr
......@@ -238,6 +238,13 @@ class EagerUtils {
kSlotSmallVectorSize>* out_grads,
const paddle::small_vector<std::vector<GradSlotMeta>,
kSlotSmallVectorSize>& grad_out_metas);
static void FillZeroForEmptyGradInput(paddle::experimental::Tensor* in_grad,
const GradSlotMeta& grad_in_meta);
static void FillZeroForEmptyOptionalGradInput(
paddle::experimental::Tensor* in_grad, const GradSlotMeta& grad_in_meta);
static void FillZeroForEmptyGradInput(
std::vector<paddle::experimental::Tensor>* in_grads,
const std::vector<GradSlotMeta>& grad_in_metas);
};
} // namespace egr
......@@ -109,7 +109,12 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
}
}
void operator()(const Tensor& x) { AssignKernelKeySet(*x.impl()); }
void operator()(const Tensor& x) {
const auto* tensor = x.impl().get();
if (tensor) {
AssignKernelKeySet(*tensor);
}
}
void operator()(const std::vector<Tensor>& x) {
const phi::TensorBase& tensor = *x.at(0).impl();
......
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/unary.h"
#include "paddle/utils/optional.h"
namespace phi {
......@@ -136,7 +137,7 @@ void SigmoidTripleGradKernel(const Context& dev_ctx,
const DenseTensor& dout,
const DenseTensor& ddx,
const DenseTensor& d_dout_new,
const DenseTensor& d_ddout,
paddle::optional<const DenseTensor&> d_ddout,
DenseTensor* d_out_new,
DenseTensor* d_dout,
DenseTensor* d_ddx);
......
......@@ -1428,16 +1428,19 @@ struct SigmoidTripleGradFunctor : public BaseActivationFunctor<T> {
GET_DATA_SAFELY(Out, "Input", "Out", "SigmoidTripleGrad"));
auto dout = EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Input", "DOut", "SigmoidTripleGrad"));
auto d_ddOut = EigenVector<T>::Flatten(
GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "SigmoidTripleGrad"));
auto d_dOutNew = EigenVector<T>::Flatten(GET_DATA_SAFELY(
d_dOut_New, "Input", "D_DOut_New", "SigmoidTripleGrad"));
if (d_Out_New) {
auto d_OutNew = EigenVector<T>::Flatten(GET_DATA_SAFELY(
d_Out_New, "Output", "D_OutNew", "SigmoidTripleGrad"));
d_OutNew.device(*d) = (ddx - static_cast<T>(2) * out * ddx) * d_ddOut -
static_cast<T>(2) * dout * ddx * d_dOutNew;
d_OutNew.device(*d) = -static_cast<T>(2) * dout * ddx * d_dOutNew;
if (d_DDOut) {
auto d_ddOut = EigenVector<T>::Flatten(
GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "SigmoidTripleGrad"));
d_OutNew.device(*d) =
(ddx - static_cast<T>(2) * out * ddx) * d_ddOut + d_OutNew;
}
}
if (d_d_Out) {
auto d_dOut = EigenVector<T>::Flatten(
......@@ -1449,8 +1452,12 @@ struct SigmoidTripleGradFunctor : public BaseActivationFunctor<T> {
auto d_ddx = EigenVector<T>::Flatten(
GET_DATA_SAFELY(d_DDx, "Output", "D_DDx", "SigmoidTripleGrad"));
d_ddx.device(*d) =
(static_cast<T>(1) - out) * out * d_ddOut +
(static_cast<T>(1) - static_cast<T>(2) * out) * dout * d_dOutNew;
if (d_DDOut) {
auto d_ddOut = EigenVector<T>::Flatten(
GET_DATA_SAFELY(d_DDOut, "Input", "D_DDOut", "SigmoidTripleGrad"));
d_ddx.device(*d) = d_ddx + (static_cast<T>(1) - out) * out * d_ddOut;
}
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() {
......
......@@ -265,7 +265,7 @@ void SigmoidTripleGradKernel(const Context& dev_ctx,
const DenseTensor& dout,
const DenseTensor& ddx,
const DenseTensor& d_dout_new,
const DenseTensor& d_ddout,
paddle::optional<const DenseTensor&> d_ddout,
DenseTensor* d_out_new,
DenseTensor* d_dout,
DenseTensor* d_ddx) {
......@@ -274,11 +274,11 @@ void SigmoidTripleGradKernel(const Context& dev_ctx,
dev_ctx.template Alloc<T>(d_dout);
}
if (d_out_new) {
d_dout->Resize(out.dims());
d_out_new->Resize(out.dims());
dev_ctx.template Alloc<T>(d_out_new);
}
if (d_ddx) {
d_dout->Resize(ddx.dims());
d_ddx->Resize(ddx.dims());
dev_ctx.template Alloc<T>(d_ddx);
}
funcs::SigmoidTripleGradFunctor<T> functor;
......@@ -286,7 +286,7 @@ void SigmoidTripleGradKernel(const Context& dev_ctx,
&out,
&ddx,
&dout,
&d_ddout,
d_ddout.get_ptr(),
&d_dout_new,
d_dout,
d_out_new,
......
......@@ -1655,6 +1655,7 @@
param : [out, fwd_grad_out, grad_grad_x]
kernel :
func : sigmoid_triple_grad
optional : grad_grad_out_grad
- backward_api : silu_grad
forward : silu (Tensor x) -> Tensor(out)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册