diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index b4cf9e9e009def25e53f690d914a38fbcb19abec..41f813b436293e102a7680e1603a77beaa2b9f31 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -161,13 +161,6 @@ $$out = \max(x, 0)$$ )DOC"; -UNUSED constexpr char TanhDoc[] = R"DOC( -Tanh Activation Operator. - -$$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ - -)DOC"; - UNUSED constexpr char TanhShrinkDoc[] = R"DOC( TanhShrink Activation Operator. @@ -529,7 +522,6 @@ It is recommended to use the defaults for this activation. REGISTER_ACTIVATION_OP_MAKER(Sigmoid, SigmoidDoc); REGISTER_ACTIVATION_OP_MAKER(Relu, ReluDoc); -REGISTER_ACTIVATION_OP_MAKER(Tanh, TanhDoc); REGISTER_ACTIVATION_OP_MAKER(TanhShrink, TanhShrinkDoc); REGISTER_ACTIVATION_OP_MAKER(Sqrt, SqrtDoc); REGISTER_ACTIVATION_OP_MAKER(Rsqrt, RsqrtDoc); @@ -699,54 +691,6 @@ class SigmoidTripleGradMaker } }; -template -class TanhDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { - public: - using ::paddle::framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("tanh_grad_grad"); - // input1: Out - op->SetInput("Out", this->Input("Out")); - // input2: ddx - op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); - op->SetInput("DOut", this->Input(framework::GradVarName("Out"))); - op->SetAttrMap(this->Attrs()); - // output: ddy - op->SetOutput("DOutNew", this->InputGrad("Out")); - op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out"))); - } -}; - -template -class TanhTripleGradMaker : public ::paddle::framework::SingleGradOpMaker { - public: - using ::paddle::framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr op) const override { - op->SetType("tanh_triple_grad"); - // Out, DDX, DOut, D_DDOut, D_DOut_New // input - // D_OutNew, D_DOut, D_DDx // output - // input1: Out - op->SetInput("Out", this->Input("Out")); - // input2: ddx - op->SetInput("DDX", this->Input("DDX")); - // input3: dout - op->SetInput("DOut", this->Input("DOut")); - // input4: d_ddout - op->SetInput("D_DDOut", this->OutputGrad("DDOut")); - // input5: d_dout_new - op->SetInput("D_DOut_New", this->OutputGrad("DOutNew")); - op->SetAttrMap(this->Attrs()); - - // output: d_dOut, d_OutNew, d_ddx - op->SetOutput("D_OutNew", this->InputGrad("Out")); - op->SetOutput("D_DOut", this->InputGrad("DOut")); - op->SetOutput("D_DDx", this->InputGrad("DDX")); - } -}; // ReluGrad: dx = dy if y >= 0 else 0 // ReluGradGrad: ddy = ddx if y >= 0 else 0 template @@ -1103,38 +1047,6 @@ REGISTER_OPERATOR(sigmoid_triple_grad, /* ========================================================================== */ -/* ========================== tanh register ============================= */ -REGISTER_OPERATOR( - tanh, - ops::ActivationOp, - ops::TanhOpMaker, - ops::ActivationOpInferVarType, - ops::ActivationGradOpMaker::FwdDeps(), - paddle::framework::OpDesc>, - ops::ActivationGradOpMaker::FwdDeps(), - paddle::imperative::OpBase>, - std::conditional>(), - ops::ActFwdInplaceInferer, - void>::type); -REGISTER_OPERATOR(tanh_grad, - ops::ActivationOpGrad, - ops::ActivationGradOpInplaceInferer, - ops::TanhDoubleGradMaker, - ops::TanhDoubleGradMaker) -REGISTER_OPERATOR( - tanh_grad_grad, - ops::ActivationOpDoubleGrad::FwdDeps()>, - ops::ActivationDoubleGradOpInplaceInferer, - ops::TanhTripleGradMaker, - ops::TanhTripleGradMaker); - -REGISTER_OPERATOR( - tanh_triple_grad, - ops::ActivationOpTripleGrad::FwdDeps()>, - ops::ActivationTripleGradOpInplaceInferer); - -/* ========================================================================== */ - /* ========================== relu register ============================= */ REGISTER_OPERATOR( relu, diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index a3611bcca3477314067523ca4584a73b436dbc19..faf2d7660ea3704c853fe3cc072a4ea154b4a90a 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -521,6 +521,41 @@ func : tan_grad inplace : (out_grad -> x_grad) +- backward_op : tanh_double_grad + forward : tanh_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x) + args : (Tensor out, Tensor grad_out, Tensor grad_x_grad) + output : Tensor(out_grad), Tensor(grad_out_grad) + infer_meta : + func : GeneralBinaryGradInferMeta + param : [out, out] + kernel : + func : tanh_double_grad + backward : tanh_triple_grad + inplace : (grad_x_grad -> grad_out_grad) + +- backward_op : tanh_grad + forward : tanh (Tensor x) -> Tensor(out) + args : (Tensor out, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [out] + kernel : + func : tanh_grad + backward : tanh_double_grad + inplace : (out_grad -> x_grad) + +- backward_op : tanh_triple_grad + forward : tanh_double_grad (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward) -> Tensor(grad_out_new), Tensor(grad_out_grad) + args : (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward, Tensor grad_out_new_grad, Tensor grad_out_grad_grad) + output : Tensor(out_grad), Tensor(grad_out_forward_grad), Tensor(grad_x_grad_forward_grad) + infer_meta : + func : GeneralTernaryGradInferMeta + param : [out, out, grad_x_grad_forward] + kernel : + func : tanh_triple_grad + inplace : (grad_x_grad_forward -> grad_out_forward_grad) + - backward_op : trace_grad forward : trace (Tensor x, int offset, int axis1, int axis2) -> Tensor(out) args : (Tensor x, Tensor out_grad, int offset, int axis1, int axis2) diff --git a/paddle/phi/api/yaml/generator/generate_op.py b/paddle/phi/api/yaml/generator/generate_op.py index df2281ee3d8a1054d282a231712aff6e431da77a..777b5283743fd06ba3ca3db8f5ca19658b1a2aac 100644 --- a/paddle/phi/api/yaml/generator/generate_op.py +++ b/paddle/phi/api/yaml/generator/generate_op.py @@ -86,12 +86,30 @@ def replace_compat_name(api_op_map, forward_api_dict, backward_api_dict): if api_name != op_name: forward_api_item['op_name'] = op_name if 'backward' in api_args and has_backward: - bw_api_name, bw_op_name = get_api_and_op_name( - api_args['backward'].split(',')[0] - ) + backward_op_list = api_args['backward'].split(',') + bw_api_name, bw_op_name = get_api_and_op_name(backward_op_list[0]) forward_api_item['backward'] = bw_op_name backward_api_item['op_name'] = bw_op_name + # for double grad + if len(backward_op_list) > 1: + double_grad_api_name, double_grad_op_name = get_api_and_op_name( + backward_op_list[1] + ) + double_grad_item = backward_api_dict[double_grad_api_name] + backward_api_item['backward'] = double_grad_op_name + double_grad_item['op_name'] = double_grad_op_name + + # for triple grad + if len(backward_op_list) > 2: + ( + triple_grad_api_name, + triple_grad_op_name, + ) = get_api_and_op_name(backward_op_list[2]) + triple_grad_item = backward_api_dict[triple_grad_api_name] + double_grad_item['backward'] = triple_grad_op_name + triple_grad_item['op_name'] = triple_grad_op_name + key_set = ['inputs', 'attrs', 'outputs'] args_map = {} for key in key_set: diff --git a/paddle/phi/api/yaml/generator/templates/operator_utils.c.j2 b/paddle/phi/api/yaml/generator/templates/operator_utils.c.j2 index 60fd251f446d2d4272852e10ea6033e262d63776..502266ce7a994694a7c412a7a3a800fc79f688fb 100644 --- a/paddle/phi/api/yaml/generator/templates/operator_utils.c.j2 +++ b/paddle/phi/api/yaml/generator/templates/operator_utils.c.j2 @@ -389,7 +389,7 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker forward_output_orig_names)}}); {% endfor %} - grad_op->SetAttrMap(this->Attrs()); + grad_op->SetAttrMap(this->Attrs()); {% for attr in api["attrs"] %} {% set attr_name = attr["name"] %} {% if attr_name in forward_attr_names %} @@ -456,15 +456,15 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker input_orig_names, output_orig_names) %}{# inline #} {% if name in input_names %} {% set name_in_forward_orig = input_orig_names[input_names.index(name)]%} -Input("{{name_in_forward_orig}}") +Input({{name_in_forward_orig | to_opmaker_name}}) {%- elif name in output_names %} {% set name_in_forward_orig = output_orig_names[output_names.index(name)]%} -Output("{{name}}") +Output({{name | to_opmaker_name}}) {%- elif name.endswith("_grad") %}{# output grad#} {% set name_in_forward = name[:-5] %} {% if name_in_forward in output_names %} {% set name_in_forward_orig = output_orig_names[output_names.index(name_in_forward)] %} -OutputGrad("{{name_in_forward_orig}}") +OutputGrad({{name_in_forward_orig | to_opmaker_name}}) {%- endif %} {%- endif %} {%- endmacro %} @@ -474,11 +474,11 @@ OutputGrad("{{name_in_forward_orig}}") {% if name[:-5] in input_names %} {% set name_in_forward = name[:-5] %} {% set name_in_forward_orig = input_orig_names[input_names.index(name_in_forward)]%} -InputGrad("{{name[:-5]}}") +InputGrad({{name_in_forward_orig | to_opmaker_name}}) {%- elif (name | to_input_name) in input_names %} {% set name_in_forward = name | to_input_name %} {% set name_in_forward_orig = input_orig_names[input_names.index(name_in_forward)]%} -InputGrad("{{name | to_input_name}}") +InputGrad({{name | to_input_name | to_opmaker_name}}) {%- endif %} {%- endmacro %} diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 4e9a4abfcdb6564ab9d435e001927804004b20e3..d4eade92eeda2d684fa19a1c73a52d7bc0ebaddb 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -2112,30 +2112,6 @@ kernel : func : take_along_axis_grad -- backward_op : tanh_double_grad - forward : tanh_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x) - args : (Tensor out, Tensor grad_out, Tensor grad_x_grad) - output : Tensor(out_grad), Tensor(grad_out_grad) - infer_meta : - func : GeneralBinaryGradInferMeta - param : [out, out] - kernel : - func : tanh_double_grad - backward : tanh_triple_grad - inplace : (grad_x_grad -> grad_out_grad) - -- backward_op : tanh_grad - forward : tanh (Tensor x) -> Tensor(out) - args : (Tensor out, Tensor out_grad) - output : Tensor(x_grad) - infer_meta : - func : UnchangedInferMeta - param : [out] - kernel : - func : tanh_grad - backward : tanh_double_grad - inplace : (out_grad -> x_grad) - - backward_op : tanh_shrink_grad forward : tanh_shrink (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) @@ -2147,17 +2123,6 @@ func : tanh_shrink_grad inplace : (out_grad -> x_grad) -- backward_op : tanh_triple_grad - forward : tanh_double_grad (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward) -> Tensor(grad_out_new), Tensor(grad_out_grad) - args : (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward, Tensor grad_out_new_grad, Tensor grad_out_grad_grad) - output : Tensor(out_grad), Tensor(grad_out_forward_grad), Tensor(grad_x_grad_forward_grad) - infer_meta : - func : GeneralTernaryGradInferMeta - param : [out, out, grad_x_grad_forward] - kernel : - func : tanh_triple_grad - inplace : (grad_x_grad_forward -> grad_out_forward_grad) - - backward_op : temporal_shift_grad forward : temporal_shift(Tensor x, int seg_num, float shift_ratio, str data_format_str) -> Tensor(out) args : (Tensor out_grad, int seg_num, float shift_ratio, str data_format_str) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 03dbb08b59fb173b17c253ead634ec8868c5423a..3c3e050b38e7b89514d6addfb3b2ba58786a7c11 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -2394,16 +2394,6 @@ data_type : arr backward : take_along_axis_grad -- op : tanh - args : (Tensor x) - output : Tensor(out) - infer_meta : - func : UnchangedInferMeta - kernel : - func : tanh - inplace : (x -> out) - backward : tanh_grad - - op : tanh_shrink args : (Tensor x) output : Tensor diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 2857beccb10d2dfcffe09361f489bc11cc18cfc6..e8a587a5da1a266ae78471627b334c0f8e6e399f 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -5,6 +5,10 @@ - op : abs backward : abs_grad + inputs : + x : X + outputs : + out : Out extra : attrs : [bool use_mkldnn = false] @@ -889,7 +893,11 @@ attrs : [bool use_mkldnn = false, bool use_cudnn = false] - op : tanh - backward : tanh_grad + backward : tanh_grad, tanh_double_grad (tanh_grad_grad), tanh_triple_grad + inputs : + x : X + outputs : + out : Out extra : attrs : [bool use_mkldnn = false, bool use_cudnn = false] diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 5fd80df6864cf231dd64fab35e4d177d13b78f27..78eb97984013eeb4da149ff110763105af0dba31 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -461,6 +461,16 @@ func : tan backward : tan_grad +- op : tanh + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + kernel : + func : tanh + inplace : (x -> out) + backward : tanh_grad + - op : trace args : (Tensor x, int offset = 0, int axis1 = 0, int axis2 = 1) output : Tensor diff --git a/paddle/phi/ops/compat/activation_sig.cc b/paddle/phi/ops/compat/activation_sig.cc index 85e8f7c2de721d94a771444111b65acd5366b0f1..4d115eed62607d696e2fd2d541506c5a64c59aa8 100644 --- a/paddle/phi/ops/compat/activation_sig.cc +++ b/paddle/phi/ops/compat/activation_sig.cc @@ -67,7 +67,6 @@ DEFINE_ACT_GRAD_DEPX_OP_ARGMAP(Softplus, "beta" comma "threshold"); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Relu, "relu", ); // NOLINT -DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Tanh, "tanh", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sigmoid, "sigmoid", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Sqrt, "sqrt", ); // NOLINT DEFINE_ACT_GRAD_DEPOUT_OP_ARGMAP(Rsqrt, "rsqrt", ); // NOLINT @@ -94,20 +93,6 @@ KernelSignature ReluDoubleGradOpArgumentMapping( return KernelSignature("relu_double_grad", {"Out", "DDX"}, {}, {"DDOut"}); } -KernelSignature TanhDoubleGradOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature( - "tanh_double_grad", {"Out", "DOut", "DDX"}, {}, {"DOutNew", "DDOut"}); -} - -KernelSignature TanhTripleGradOpArgumentMapping( - const ArgumentMappingContext& ctx) { - return KernelSignature("tanh_triple_grad", - {"Out", "DOut", "DDX", "D_DOut_New", "D_DDOut"}, - {}, - {"D_OutNew", "D_DOut", "D_DDx"}); -} - KernelSignature SigmoidDoubleGradOpArgumentMapping( const ArgumentMappingContext& ctx) { return KernelSignature( @@ -198,7 +183,6 @@ KernelSignature PowGradOpArgumentMapping(const ArgumentMappingContext& ctx) { } // namespace phi PD_REGISTER_BASE_KERNEL_NAME(relu_grad_grad, relu_double_grad); -PD_REGISTER_BASE_KERNEL_NAME(tanh_grad_grad, tanh_double_grad); PD_REGISTER_BASE_KERNEL_NAME(leaky_relu_grad_grad, leaky_relu_double_grad); PD_REGISTER_BASE_KERNEL_NAME(softshrink, soft_shrink); PD_REGISTER_BASE_KERNEL_NAME(softshrink_grad, soft_shrink_grad); @@ -227,11 +211,6 @@ PD_REGISTER_ARG_MAPPING_FN(softplus_grad, phi::SoftplusGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(relu_grad_grad, phi::ReluDoubleGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(tanh_grad, phi::TanhGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(tanh_grad_grad, - phi::TanhDoubleGradOpArgumentMapping); -PD_REGISTER_ARG_MAPPING_FN(tanh_triple_grad, - phi::TanhTripleGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(brelu_grad, phi::HardTanhGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(leaky_relu, phi::LeakyReluOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(leaky_relu_grad,