add autogen code support for rnn op (#52799)

* add autogen code support for rnn op * fix bug * fix bug

add autogen code support for rnn op (#52799)
* add autogen code support for rnn op * fix bug * fix bug
aba6af4f · Zhenghai Zhang · GitHub · f9fadfc4 · f9fadfc4 · aba6af4f
5 changed file
--- a/paddle/fluid/operators/rnn_op.cc
+++ b/paddle/fluid/operators/rnn_op.cc
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <memory>
-#include <string>
-
-#include "paddle/fluid/framework/infershape_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/op_version_registry.h"
-#include "paddle/phi/core/infermeta_utils.h"
-#include "paddle/phi/infermeta/backward.h"
-#include "paddle/phi/infermeta/multiary.h"
-
-namespace paddle {
-namespace operators {
-
-class RNNOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Input"),
-                          ctx.GetPlace());
-  }
-};
-
-class RNNOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
-  void Make() override {
-    AddInput(
-        "Input",
-        "(Tensor) RNN input tensor, which support variable-time length input "
-        "sequence."
-        "The shape of the Tensor MUST be ( seq_len * batch_size * input_size)"
-        "seq_len is the total time step in this mini-batch (CAN be change in "
-        "different batch)"
-        "batch_size is the instance number of this batch"
-        "input_size is the hidden size of the input."
-        "input_size and the hidden_size in the next may not be same");
-    AddInput("PreState",
-             "(Tensor) the initial hidden state of the LSTM"
-             "input. This is a tensor with shape (num_layers x batch_size x "
-             "hidden_size)"
-             "and When is_bidirec is True, the shape will be (num_layers*2 x "
-             "batch_size x hidden_size)")
-        .AsDuplicable();
-    AddInput("WeightList",
-             "(vector<Tensor>), stores weight and bias data when the weight "
-             "use the list format. ")
-        .AsDuplicable();
-    AddInput("SequenceLength",
-             "(Tensor) When the input data is padding, "
-             "set this parameter. This parameter represents "
-             "the variable sequence lengths in a batch. "
-             "The size of the vector has to equal the batch_size.")
-        .AsDispensable();
-    AddOutput("DropoutState",
-              "Store the global drop state when training, needed by cudnn rnn.")
-        .AsDispensable();
-    // maybe need add intermediate outputs for cpu kernel
-    AddOutput("Reserve",
-              "(Tensor, a temporary output Tensor to store the reserve_data "
-              "of cudnn kernel.")
-        .AsIntermediate();
-    AddOutput("Out",
-              "(Tensor) the hidden state of LSTM operator. "
-              "The shape is ( seq_len x batch_size x hidden_size) if "
-              "is_bidirec is False"
-              "and When is_bidirec is True, the shape will be ( seq_len x "
-              "batch_size x hidden_size * 2) ");
-    AddOutput("State",
-              "(Tensor) the hidden state of the last step. "
-              "The shape is ( num_layers x batch_size x hidden_size) if "
-              "is_bidirec is False"
-              "and When is_bidirec is True, the shape will be (num_layers*2 x "
-              "batch_size x hidden_size)")
-        .AsDuplicable();
-    AddAttr<float>(
-        "dropout_prob",
-        "dropout prob of the dropout op"
-        "the dropout ONLY work between rnn layers, not between time steps"
-        "There is no dropout work on the Out tensor")
-        .SetDefault(0.0);
-    AddAttr<bool>("is_bidirec", "whether it is bidirectional rnn")
-        .SetDefault(false);
-    AddAttr<int>("input_size", "input size ot the Input Tensor").SetDefault(10);
-    AddAttr<int>("hidden_size", "hidden size of rnn").SetDefault(100);
-    AddAttr<int>("num_layers", "the total layer number").SetDefault(1);
-    AddAttr<std::string>(
-        "mode",
-        "(string) rnn types, including: LSTM, GRU, RNN_RELU, RNN_TANH.");
-    AddAttr<int>("seed", "seed to used if fix_seed is True").SetDefault(0);
-    AddAttr<bool>("is_test", "True if in test phase.")
-        .SetDefault(false)
-        .AsExtra();
-    AddComment(R"DOC(
-)DOC");
-  }
-};
-
-class RNNGradOp : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
-
-  phi::KernelKey GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(
-                              ctx, framework::GradVarName("Out")),
-                          ctx.GetPlace());
-  }
-};
-
-template <typename T>
-class RNNGradOpMaker : public framework::SingleGradOpMaker<T> {
- public:
-  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
-
- protected:
-  void Apply(GradOpPtr<T> op) const override {
-    op->SetType("rnn_grad");
-    op->SetInput("Input", this->Input("Input"));
-    op->SetInput("PreState", this->Input("PreState"));
-    op->SetInput("WeightList", this->Input("WeightList"));
-    if (this->HasInput("SequenceLength")) {
-      op->SetInput("SequenceLength", this->Input("SequenceLength"));
-    }
-    op->SetInput("DropoutState", this->Output("DropoutState"));
-    op->SetInput("Reserve", this->Output("Reserve"));
-    op->SetInput("Out", this->Output("Out"));
-    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
-    op->SetInput(framework::GradVarName("State"), this->OutputGrad("State"));
-
-    op->SetOutput(framework::GradVarName("WeightList"),
-                  this->InputGrad("WeightList", false));
-
-    op->SetOutput(framework::GradVarName("Input"), this->InputGrad("Input"));
-    op->SetOutput(framework::GradVarName("PreState"),
-                  this->InputGrad("PreState", false));
-    op->SetAttrMap(this->Attrs());
-  }
-};
-
-template <typename T>
-class NotImpleKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    PADDLE_THROW(platform::errors::Unimplemented(
-        "CPU is not support for this kernel now. Will be add in the future"));
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-DECLARE_INFER_SHAPE_FUNCTOR(rnn,
-                            RnnInferShapeFunctor,
-                            PD_INFER_META(phi::RnnInferMeta));
-DECLARE_INFER_SHAPE_FUNCTOR(rnn_grad,
-                            RnnGradInferShapeFunctor,
-                            PD_INFER_META(phi::RnnGradInferMeta));
-
-REGISTER_OPERATOR(rnn,
-                  ops::RNNOp,
-                  ops::RNNOpMaker,
-                  ops::RNNGradOpMaker<paddle::framework::OpDesc>,
-                  ops::RNNGradOpMaker<paddle::imperative::OpBase>,
-                  RnnInferShapeFunctor);
-REGISTER_OPERATOR(rnn_grad, ops::RNNGradOp, RnnGradInferShapeFunctor);
--- a/paddle/phi/api/yaml/op_compat.yaml
+++ b/paddle/phi/api/yaml/op_compat.yaml
@@ -1810,6 +1810,14 @@
  outputs :
    {param_out: ParamOut, moment_out: MomentOut, mean_square_out: MeanSquareOut, mean_grad_out: MeanGradOut, master_param_outs: MasterParamOut}

+- op : rnn
+  backward : rnn_grad
+  inputs:
+    { x : Input, pre_state : PreState, weight_list : WeightList, sequence_length : SequenceLength}
+  outputs:
+    { out : Out, dropout_state_out : DropoutState, state : State, reserve : Reserve}
+  drop_empty_grad : [pre_state_grad, weight_list_grad]
+
 - op : roll
  backward : roll_grad
  inputs :

--- a/paddle/phi/api/yaml/static_backward.yaml
+++ b/paddle/phi/api/yaml/static_backward.yaml
@@ -42,3 +42,14 @@
  kernel :
    func : frobenius_norm_grad
    param : [x, out, out_grad, axis, keepdim, reduce_all]
+
+- backward_op : rnn_grad
+  forward : rnn (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, float dropout_prob=0.0, bool is_bidirec=false, int input_size=10, int hidden_size=100, int num_layers=1, str mode="RNN_TANH", int seed=0, bool is_test=false) -> Tensor(out), Tensor(dropout_state_out), Tensor[](state), Tensor(reserve)
+  args : (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, Tensor out, Tensor dropout_state_out, Tensor reserve, Tensor out_grad, Tensor[] state_grad, float dropout_prob, bool is_bidirec, int input_size, int hidden_size, int num_layers, str mode, int seed, bool is_test)
+  output : Tensor(x_grad), Tensor[](pre_state_grad){pre_state.size()}, Tensor[](weight_list_grad){weight_list.size()}
+  infer_meta :
+    func : RnnGradInferMeta
+    param : [x, pre_state, weight_list]
+  kernel :
+    func : rnn_grad
+    data_type: out_grad
--- a/paddle/phi/api/yaml/static_ops.yaml
+++ b/paddle/phi/api/yaml/static_ops.yaml
@@ -313,6 +313,20 @@
    func : reduce_scatter
    param: [x, nranks]

+- op : rnn
+  args: (Tensor x, Tensor[] pre_state, Tensor[] weight_list, Tensor sequence_length, float dropout_prob=0.0, bool is_bidirec=false, int input_size=10, int hidden_size=100, int num_layers=1, str mode="RNN_TANH", int seed=0, bool is_test=false)
+  output: Tensor(out), Tensor(dropout_state_out), Tensor[](state){pre_state.size()}, Tensor(reserve)
+  infer_meta:
+    func: RnnInferMeta
+    param : [x, pre_state, weight_list, sequence_length, dropout_prob, is_bidirec, input_size, hidden_size, num_layers, mode, seed, is_test]
+  kernel:
+    func: rnn
+    param : [x, pre_state, weight_list, sequence_length, dropout_prob, is_bidirec, input_size, hidden_size, num_layers, mode, seed, is_test]
+    data_type: x
+  backward: rnn_grad
+  optional : sequence_length, dropout_state_out
+  intermediate : reserve
+
 - op : share_buffer
  args : (Tensor[] x, bool[] share_dims_and_dtype={})
  output : Tensor[](out){x.size()}, Tensor[](xout){x.size()}

--- a/paddle/phi/ops/compat/rnn_sig.cc
+++ b/paddle/phi/ops/compat/rnn_sig.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/phi/core/compat/op_utils.h"
-
-namespace phi {
-
-KernelSignature RnnOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("rnn",
-                         {"Input", "PreState", "WeightList", "SequenceLength"},
-                         {"dropout_prob",
-                          "is_bidirec",
-                          "input_size",
-                          "hidden_size",
-                          "num_layers",
-                          "mode",
-                          "seed",
-                          "is_test"},
-                         {"Out", "DropoutState", "State", "Reserve"});
-}
-
-KernelSignature RnnGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
-  return KernelSignature("rnn_grad",
-                         {"Input",
-                          "PreState",
-                          "WeightList",
-                          "SequenceLength",
-                          "Out",
-                          "DropoutState",
-                          "Reserve",
-                          "Out@GRAD",
-                          "State@GRAD"},
-                         {"dropout_prob",
-                          "is_bidirec",
-                          "input_size",
-                          "hidden_size",
-                          "num_layers",
-                          "mode",
-                          "seed",
-                          "is_test"},
-                         {"Input@GRAD", "PreState@GRAD", "WeightList@GRAD"});
-}
-
-}  // namespace phi
-
-PD_REGISTER_ARG_MAPPING_FN(rnn, phi::RnnOpArgumentMapping);
-PD_REGISTER_ARG_MAPPING_FN(rnn_grad, phi::RnnGradOpArgumentMapping);