From 6cdaa371be301b9b7a7c34b8f8c45319b0ce70a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9F=A0=E6=AA=AC=E5=91=B3=7E?=
 <93066842+Lemon-er@users.noreply.github.com>
Date: Mon, 5 Dec 2022 11:20:08 +0800
Subject: [PATCH] DenseTensor (#48419)

---
 .../sequence_ops/sequence_conv_op.cc          | 29 ++++++------
 .../operators/sequence_ops/sequence_conv_op.h | 34 +++++++-------
 .../sequence_ops/sequence_conv_op_xpu.cc      | 32 ++++++-------
 .../sequence_ops/sequence_enumerate_op.cc     |  8 ++--
 .../sequence_ops/sequence_enumerate_op.cu     |  5 +-
 .../sequence_ops/sequence_enumerate_op.h      |  7 ++-
 .../sequence_ops/sequence_erase_op.cc         | 37 ++++++++-------
 .../sequence_ops/sequence_erase_op.cu         |  5 +-
 .../sequence_ops/sequence_expand_as_op.cc     | 23 ++++-----
 .../sequence_ops/sequence_expand_as_op.cu     | 10 ++--
 .../sequence_ops/sequence_expand_op.h         | 34 +++++++-------
 .../sequence_ops/sequence_mask_op.cc          |  2 +-
 .../operators/sequence_ops/sequence_mask_op.h | 14 +++---
 .../sequence_ops/sequence_mask_op_npu.cc      | 12 ++---
 .../operators/sequence_ops/sequence_pad_op.cc | 34 +++++++-------
 .../operators/sequence_ops/sequence_pad_op.h  | 26 +++++-----
 .../sequence_ops/sequence_pool_op.cc          | 17 ++++---
 .../operators/sequence_ops/sequence_pool_op.h | 22 ++++-----
 .../sequence_ops/sequence_softmax_op.h        | 47 +++++++++----------
 19 files changed, 195 insertions(+), 203 deletions(-)

diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc
index 7056c52cd8..57669dbcd6 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.cc
@@ -145,30 +145,31 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker {
   void Make() override {
     AddInput(
         "X",
-        "(LoDTensor) the input(X) is a LodTensor, which supports "
+        "(phi::DenseTensor) the input(X) is a LodTensor, which supports "
         "variable-time length input sequence. The underlying tensor in "
-        "this LoDTensor is a matrix with shape (T, N), where T is the "
+        "this phi::DenseTensor is a matrix with shape (T, N), where T is the "
         "total time steps in this mini-batch and N is the input_hidden_size.");
-    AddInput("PaddingData",
-             "(Tensor, optional) the input(PaddingData) is an optional "
-             "parameter, and it is learnable. "
-             "This is a tensor with shape (P, N), where P is the "
-             "top_pad + bottom_pad, N is the input_hidden_size. In order to "
-             "ensure the equal length of sequence before and after "
-             "convolution, it is necessary to fill the top and bottom of each "
-             "sequence according to context_length, context_stride and "
-             "context_start")
+    AddInput(
+        "PaddingData",
+        "(phi::DenseTensor, optional) the input(PaddingData) is an optional "
+        "parameter, and it is learnable. "
+        "This is a tensor with shape (P, N), where P is the "
+        "top_pad + bottom_pad, N is the input_hidden_size. In order to "
+        "ensure the equal length of sequence before and after "
+        "convolution, it is necessary to fill the top and bottom of each "
+        "sequence according to context_length, context_stride and "
+        "context_start")
         .AsDispensable();
     AddInput(
         "Filter",
-        "(Tensor) the input(Filter) is an learnable parameter."
+        "(phi::DenseTensor) the input(Filter) is an learnable parameter."
         "This is a tensor with shape (K, M), where K is the "
         "context_length * input_hidden_size, M is the output feature size.");
     AddOutput(
         "Out",
-        "(LoDTensor) the output(Out) is a LodTensor, which support "
+        "(phi::DenseTensor) the output(Out) is a LodTensor, which support "
         "variable-time length output sequence. The underlying tensor in "
-        "this LoDTensor is a matrix with shape (T, M), where, T is the "
+        "this phi::DenseTensor is a matrix with shape (T, M), where, T is the "
         "total time steps in this mini-batch, M is the output feature size.");
 
     AddAttr<bool>("paddingTrainable",
diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h
index 5dec776c32..cf34cde478 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h
@@ -22,15 +22,12 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
-
 template <typename DeviceContext, typename T>
 class SequenceConvKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     auto filter = *context.Input<phi::DenseTensor>("Filter");
 
     out->mutable_data<T>(context.GetPlace());
@@ -40,11 +37,11 @@ class SequenceConvKernel : public framework::OpKernel<T> {
     int context_stride = context.Attr<int>("contextStride");
     bool padding_trainable = context.Attr<bool>("paddingTrainable");
 
-    PADDLE_ENFORCE_EQ(
-        in->lod().empty(),
-        false,
-        platform::errors::InvalidArgument("Input(X) Tensor of SequenceConvOp "
-                                          "does not contain LoD information."));
+    PADDLE_ENFORCE_EQ(in->lod().empty(),
+                      false,
+                      platform::errors::InvalidArgument(
+                          "Input(X) phi::DenseTensor of SequenceConvOp "
+                          "does not contain LoD information."));
     PADDLE_ENFORCE_EQ(
         in->lod().size(),
         1UL,
@@ -64,7 +61,7 @@ class SequenceConvKernel : public framework::OpKernel<T> {
 
     framework::DDim col_shape = {in->dims()[0],
                                  context_length * sequence_width};
-    Tensor col;
+    phi::DenseTensor col;
     col.mutable_data<T>(col_shape, context.GetPlace());
     // Because if padding_trainable is false, padding data should be zeros.
     phi::funcs::SetConstant<DeviceContext, T> set_zero;
@@ -92,13 +89,14 @@ template <typename DeviceContext, typename T>
 class SequenceConvGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
-    auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto* in_g = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
+    auto* out_g =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
     auto* filter_g =
         context.Output<phi::DenseTensor>(framework::GradVarName("Filter"));
     auto* padding_data_g =
         context.Output<phi::DenseTensor>(framework::GradVarName("PaddingData"));
-    auto* in = context.Input<LoDTensor>("X");
+    auto* in = context.Input<phi::DenseTensor>("X");
     auto* filter = context.Input<phi::DenseTensor>("Filter");
 
     int context_start = context.Attr<int>("contextStart");
@@ -125,7 +123,7 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
     // use col_shape in the im2col calculation
     framework::DDim col_shape = {in->dims()[0],
                                  sequence_width * context_length};
-    Tensor col;
+    phi::DenseTensor col;
 
     if (in_g || filter_g || (padding_trainable && padding_data_g)) {
       col.mutable_data<T>(col_shape, context.GetPlace());
@@ -159,7 +157,7 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
       padding_data_g->mutable_data<T>(context.GetPlace());
       set_zero(dev_ctx, padding_data_g, static_cast<T>(0));
 
-      LoDTensor* input = const_cast<LoDTensor*>(in);
+      phi::DenseTensor* input = const_cast<phi::DenseTensor*>(in);
       seq_project_grad_functor(dev_ctx,
                                *input,
                                padding_trainable,
@@ -178,8 +176,8 @@ class SequenceConvGradKernel : public framework::OpKernel<T> {
       filter_g->mutable_data<T>(context.GetPlace());
       set_zero(dev_ctx, filter_g, static_cast<T>(0));
 
-      Tensor filter_grad = *filter_g;
-      LoDTensor out_grad = *out_g;
+      phi::DenseTensor filter_grad = *filter_g;
+      phi::DenseTensor out_grad = *out_g;
 
       const phi::DenseTensor* padding_data = nullptr;
       if (padding_trainable) {
diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc
index f0083ec404..f7b0b5c3b5 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc
@@ -19,14 +19,13 @@ limitations under the License. */
 
 namespace paddle {
 namespace operators {
-using Tensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class SequenceConvXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     auto filter = *context.Input<phi::DenseTensor>("Filter");
 
     out->mutable_data<T>(context.GetPlace());
@@ -36,11 +35,11 @@ class SequenceConvXPUKernel : public framework::OpKernel<T> {
     int context_stride = context.Attr<int>("contextStride");
     bool padding_trainable = context.Attr<bool>("paddingTrainable");
 
-    PADDLE_ENFORCE_EQ(
-        in->lod().empty(),
-        false,
-        platform::errors::InvalidArgument("Input(X) Tensor of SequenceConvOp "
-                                          "does not contain LoD information."));
+    PADDLE_ENFORCE_EQ(in->lod().empty(),
+                      false,
+                      platform::errors::InvalidArgument(
+                          "Input(X) phi::DenseTensor of SequenceConvOp "
+                          "does not contain LoD information."));
     PADDLE_ENFORCE_EQ(
         in->lod().size(),
         1UL,
@@ -159,11 +158,12 @@ template <typename DeviceContext, typename T>
 class SequenceConvGradXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
-    auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto* in_g = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
+    auto* out_g =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
     auto* filter_g =
         context.Output<phi::DenseTensor>(framework::GradVarName("Filter"));
-    auto* in = context.Input<LoDTensor>("X");
+    auto* in = context.Input<phi::DenseTensor>("X");
     auto* filter = context.Input<phi::DenseTensor>("Filter");
 
     int context_start = context.Attr<int>("contextStart");
@@ -171,11 +171,11 @@ class SequenceConvGradXPUKernel : public framework::OpKernel<T> {
     int context_stride = context.Attr<int>("contextStride");
     bool padding_trainable = context.Attr<bool>("paddingTrainable");
 
-    PADDLE_ENFORCE_EQ(
-        in->lod().empty(),
-        false,
-        platform::errors::InvalidArgument("Input(X) Tensor of SequenceConvOp "
-                                          "does not contain LoD information."));
+    PADDLE_ENFORCE_EQ(in->lod().empty(),
+                      false,
+                      platform::errors::InvalidArgument(
+                          "Input(X) phi::DenseTensor of SequenceConvOp "
+                          "does not contain LoD information."));
     PADDLE_ENFORCE_EQ(
         in->lod().size(),
         1UL,
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
index 337ea46b26..979296eb04 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc
@@ -36,11 +36,11 @@ class SequenceEnumerateOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(2-D LoDTensor with the 2nd dimension equal to 1) "
-             "Input LoDTensor of SequenceEnumerate operator.");
+             "(2-D phi::DenseTensor with the 2nd dimension equal to 1) "
+             "Input phi::DenseTensor of SequenceEnumerate operator.");
     AddOutput("Out",
-              "(2-D LoDTensor with the 2nd dimension equal to win_size) "
-              "Output LoDTensor of SequenceEnumerate operator.");
+              "(2-D phi::DenseTensor with the 2nd dimension equal to win_size) "
+              "Output phi::DenseTensor of SequenceEnumerate operator.");
     AddAttr<int>("win_size", "(int) The enumerate sequence window size.")
         .AddCustomChecker([](const int& win_size) {
           PADDLE_ENFORCE_GE(win_size,
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
index 0f53f292ef..ee69333f92 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
@@ -21,7 +21,6 @@
 namespace paddle {
 namespace operators {
 using phi::PADDLE_CUDA_NUM_THREADS;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 __global__ void CalcOutPut(const T* in_data,
@@ -52,8 +51,8 @@ template <typename T>
 class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     int win_size = context.Attr<int>("win_size");
     int pad_value = context.Attr<int>("pad_value");
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h
index 90cb930062..048f28d859 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h
@@ -18,14 +18,13 @@
 
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class SequenceEnumerateKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     int win_size = context.Attr<int>("win_size");
     auto pad_value = static_cast<T>(context.Attr<int>("pad_value"));
 
@@ -33,7 +32,7 @@ class SequenceEnumerateKernel : public framework::OpKernel<T> {
         in->lod().empty(),
         false,
         platform::errors::InvalidArgument(
-            "Input(X) Tensor of SequenceEnumerateOp does not contain "
+            "Input(X) phi::DenseTensor of SequenceEnumerateOp does not contain "
             "LoD information."));
 
     auto in_dims = in->dims();
diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc
index 2943b88959..fe50d8502c 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc
@@ -27,20 +27,21 @@ class SequenceEraseOp : public framework::OperatorWithKernel {
     OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SequenceErase");
     OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SequenceErase");
     auto x_dims = ctx->GetInputDim("X");
-    PADDLE_ENFORCE(x_dims.size() == 2 && x_dims[1] == 1,
-                   platform::errors::InvalidArgument(
-                       "Input(X) of SequenceEraseOp should be a 2-D LoDTensor "
-                       "with the 2nd dimension equal to 1,"
-                       "but received size %d with the 2nd dimension %d.",
-                       x_dims.size(),
-                       x_dims[1]));
+    PADDLE_ENFORCE(
+        x_dims.size() == 2 && x_dims[1] == 1,
+        platform::errors::InvalidArgument(
+            "Input(X) of SequenceEraseOp should be a 2-D phi::DenseTensor "
+            "with the 2nd dimension equal to 1,"
+            "but received size %d with the 2nd dimension %d.",
+            x_dims.size(),
+            x_dims[1]));
     ctx->SetOutputDim("Out", x_dims);
-    // The output LoDTensor's lod_level should be input X's lod_level.
+    // The output phi::DenseTensor's lod_level should be input X's lod_level.
     // For compile-time, we call SetLoDLevel to set output's lod_level.
-    // For runtime, output LoDTensor's lod is determined by input X's lod and
-    // the level specified by input RandTable.
-    // We cannot get X's detail lod and RankTable's level in this function, so
-    // leave this work to the detail kernel implementation.
+    // For runtime, output phi::DenseTensor's lod is determined by input X's lod
+    // and the level specified by input RandTable. We cannot get X's detail lod
+    // and RankTable's level in this function, so leave this work to the detail
+    // kernel implementation.
     if (!ctx->IsRuntime()) {
       ctx->SetLoDLevel("Out", ctx->GetLoDLevel("X"));
     }
@@ -51,11 +52,11 @@ class SequenceEraseOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(2-D LoDTensor with the 2nd dim. equal to 1) "
-             "Input LoDTensor of SequenceEraseOp.");
+             "(2-D phi::DenseTensor with the 2nd dim. equal to 1) "
+             "Input phi::DenseTensor of SequenceEraseOp.");
     AddOutput("Out",
-              "(2-D LoDTensor with the 2nd dim. equal to 1) "
-              "Output LoDTensor of SequenceEraseOp.");
+              "(2-D phi::DenseTensor with the 2nd dim. equal to 1) "
+              "Output phi::DenseTensor of SequenceEraseOp.");
     AddAttr<std::vector<int>>("tokens",
                               "(vector<int>) Tokens need to be erased from "
                               "input sequences.");
@@ -64,7 +65,7 @@ Sequence Erase Operator.
 
 Sequence erase operator erases tokens specified by Attr(tokens) from the input
 sequences Input(X), and outputs the remaining data and modifies the LoD
-information at the same time. For example, given a 2-D LoDTensor
+information at the same time. For example, given a 2-D phi::DenseTensor
 
     X = [[2, 2, 6, 1, 3, 9, 6, 1, 0, 1]]^T
 
@@ -77,7 +78,7 @@ operation, the three sequences become
 
     X1' = [[6]]^T, X2' = [[1, 9]]^T and X3' = [[6, 1, 0, 1]]^T.
 
-Hence the LoDTensor Output(Out) should be
+Hence the phi::DenseTensor Output(Out) should be
 
     Out = [[6, 1, 9, 6, 1, 0, 1]]^T,
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
index d8b0afbc85..b573df956d 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
@@ -21,7 +21,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 using phi::PADDLE_CUDA_NUM_THREADS;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 __global__ void LabelErasedIdx(const T* in_dat,
@@ -67,8 +66,8 @@ template <typename T>
 class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* in = ctx.Input<LoDTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* in = ctx.Input<phi::DenseTensor>("X");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
 
     auto lod = in->lod();
     PADDLE_ENFORCE_EQ(
diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc
index aa27516a33..b1223618ee 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc
@@ -20,8 +20,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 class SequenceExpandAsOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
@@ -49,8 +47,8 @@ class SequenceExpandAsOp : public framework::OperatorWithKernel {
       framework::Variable* y_var =
           PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("Y")[0]);
 
-      auto& x_dim = x_var->Get<LoDTensor>().dims();
-      auto& y_lod = y_var->Get<LoDTensor>().lod();
+      auto& x_dim = x_var->Get<phi::DenseTensor>().dims();
+      auto& y_lod = y_var->Get<phi::DenseTensor>().lod();
 
       PADDLE_ENFORCE_EQ(y_lod.size(),
                         1,
@@ -96,13 +94,16 @@ class SequenceExpandAsOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor, default LoDTensor<float>) A 2-D LoDTensor whose lod "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) A 2-D "
+             "phi::DenseTensor whose lod "
              "level is at most 1.");
     AddInput("Y",
-             "(LoDTensor, default LoDTensor<float>) Referred LoDTensor whose "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) Referred "
+             "phi::DenseTensor whose "
              "lod (specified level) is referred by Input(X).");
     AddOutput("Out",
-              "(LodTensor, default LoDTensor<float>) Output LoDTensor which is "
+              "(phi::DenseTensor, default phi::DenseTensor<float>) Output "
+              "phi::DenseTensor which is "
               "generated from Input(X) by referring lod of Input(Y).");
     AddComment(R"DOC(
 Sequence Expand As Operator.
@@ -116,26 +117,26 @@ Following are cases to better explain how this works:
 
 Case 1:
 
-Given a 1-level LoDTensor input(X)
+Given a 1-level phi::DenseTensor input(X)
     X.data = [[a], [b], [c], [d]]
     X.dims = [4, 1]
 and input(Y)
     Y.lod = [[0, 3, 6, 7, 8]]
 ref_level: 0
-then we get 1-level LoDTensor
+then we get 1-level phi::DenseTensor
     Out.lod =  [[0,            3,              6,  7,  8]]
     Out.data = [[a], [a], [a], [b], [b], [b], [c], [d]]
     Out.dims = [8, 1]
 
 Case 2:
 
-Given a common Tensor input(X)
+Given a common phi::DenseTensor input(X)
     X.data = [[a, b], [c, d], [e, f]]
     X.dims = [3, 2]
 and input(Y)
     Y.lod = [[0, 2, 3, 6]]
 ref_level: 0
-then we get a common LoDTensor
+then we get a common phi::DenseTensor
     Out.lod =  [[0,             2,     3,                    6]]
     Out.data = [[a, b], [a, b] [c, d], [e, f], [e, f], [e, f]]
     Out.dims = [6, 2]
diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
index f565e0d438..d5fecace6d 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
@@ -20,8 +20,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 template <typename T>
 static __global__ void sequence_expand_as_kernel(const T *in_data,
                                                  const size_t *expand_offset,
@@ -69,9 +67,9 @@ template <typename T>
 struct SequenceExpandAsFunctor<phi::GPUContext, T> {
   void operator()(
       const phi::GPUContext &context,
-      const LoDTensor &x,
+      const phi::DenseTensor &x,
       const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
-      LoDTensor *out) {
+      phi::DenseTensor *out) {
     int height = x.dims()[0];
     int width = phi::product(x.dims()) / height;
 
@@ -99,9 +97,9 @@ struct SequenceExpandAsFunctor<phi::GPUContext, T> {
 template <typename T>
 struct SequenceExpandAsGradFunctor<phi::GPUContext, T> {
   void operator()(const phi::GPUContext &context,
-                  const LoDTensor &dout,
+                  const phi::DenseTensor &dout,
                   const framework::Vector<size_t> &ref_lod, /*expand based lod*/
-                  LoDTensor *dx) {
+                  phi::DenseTensor *dx) {
     int height = dx->dims()[0];
     int width = phi::product(dx->dims()) / height;
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h
index af43aec793..1366fe87ab 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h
@@ -22,7 +22,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 template <typename T,
           int MajorType = Eigen::RowMajor,
           typename IndexType = Eigen::DenseIndex>
@@ -32,30 +31,30 @@ template <typename DeviceContext, typename T>
 struct SequenceExpandFunctor {
   void operator()(
       const DeviceContext& ctx,
-      const LoDTensor& x,
+      const phi::DenseTensor& x,
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
-      LoDTensor* out);
+      phi::DenseTensor* out);
 };
 
 template <typename DeviceContext, typename T>
 struct SequenceExpandGradFunctor {
   void operator()(
       const DeviceContext& ctx,
-      const LoDTensor& dout,
+      const phi::DenseTensor& dout,
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
-      LoDTensor* dx);
+      phi::DenseTensor* dx);
 };
 
 template <typename T>
 struct SequenceExpandFunctor<phi::CPUContext, T> {
   void operator()(
       const phi::CPUContext& context,
-      const LoDTensor& x,
+      const phi::DenseTensor& x,
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
-      LoDTensor* out) {
+      phi::DenseTensor* out) {
     int out_offset = 0;
     int x_item_length = x.numel() / x.dims()[0];
     auto out_data = out->data<T>();
@@ -88,9 +87,9 @@ template <typename DeviceContext, typename T>
 class SequenceExpandKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* x = context.Input<LoDTensor>("X");
-    auto* y = context.Input<LoDTensor>("Y");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* x = context.Input<phi::DenseTensor>("X");
+    auto* y = context.Input<phi::DenseTensor>("Y");
+    auto* out = context.Output<phi::DenseTensor>("Out");
 
     int ref_level = context.Attr<int>("ref_level");
     auto& x_lod = x->lod();
@@ -100,7 +99,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
         y_lod.empty(),
         false,
         platform::errors::InvalidArgument(
-            "Input(Y) Tensor of SequenceExpandOp does not contain "
+            "Input(Y) phi::DenseTensor of SequenceExpandOp does not contain "
             "LoD information."));
 
     if (ref_level == -1) ref_level = y_lod.size() - 1;
@@ -164,10 +163,10 @@ template <typename T>
 struct SequenceExpandGradFunctor<phi::CPUContext, T> {
   void operator()(
       const phi::CPUContext& context,
-      const LoDTensor& dout,
+      const phi::DenseTensor& dout,
       const framework::Vector<size_t>& x_lod,   /*expand source lod*/
       const framework::Vector<size_t>& ref_lod, /*expand referenced lod*/
-      LoDTensor* dx) {
+      phi::DenseTensor* dx) {
     int dout_offset = 0;
     for (size_t i = 1; i < ref_lod.size(); ++i) {
       int repeat_num = ref_lod[i] - ref_lod[i - 1];
@@ -193,10 +192,11 @@ template <typename DeviceContext, typename T>
 class SequenceExpandGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* g_out = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto* x = context.Input<LoDTensor>("X");
-    auto* y = context.Input<LoDTensor>("Y");
-    auto* g_x = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* g_out =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto* x = context.Input<phi::DenseTensor>("X");
+    auto* y = context.Input<phi::DenseTensor>("Y");
+    auto* g_x = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
     int ref_level = context.Attr<int>("ref_level");
 
     g_x->mutable_data<T>(context.GetPlace());
diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc
index 6c14fa997f..c380779861 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc
@@ -82,7 +82,7 @@ class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker {
 SequenceMask Operator
 
 This operator outputs a Mask according to Input(X) and Attr(maxlen).
-Supposing Input(X) is a Tensor with shape [d_1, d_2, ..., d_n], the
+Supposing Input(X) is a phi::DenseTensor with shape [d_1, d_2, ..., d_n], the
 Output(Y) is a mask with shape [d_1, d_2, ..., d_n, maxlen], where:
 
 Y(i_1, i_2, ..., i_n, j) = (j < X(i_1, i_2, ..., i_n))
diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.h b/paddle/fluid/operators/sequence_ops/sequence_mask_op.h
index 87b52174aa..d541f712a5 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.h
@@ -28,9 +28,6 @@
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-using Tensor = phi::DenseTensor;
-
 template <typename Tx, typename Ty>
 struct SequenceMaskForRangeFunctor {
   HOSTDEVICE SequenceMaskForRangeFunctor(const Tx *x, Ty *y, int maxlen)
@@ -50,8 +47,11 @@ struct SequenceMaskForRangeFunctor {
 
 template <typename DeviceContext, typename Tx>
 struct SequenceMaskFunctor {
-  SequenceMaskFunctor(
-      const DeviceContext &ctx, const Tx *x, Tensor *y, int limits, int maxlen)
+  SequenceMaskFunctor(const DeviceContext &ctx,
+                      const Tx *x,
+                      phi::DenseTensor *y,
+                      int limits,
+                      int maxlen)
       : ctx_(ctx), x_(x), y_(y), limits_(limits), maxlen_(maxlen) {}
 
   template <typename Ty>
@@ -64,15 +64,13 @@ struct SequenceMaskFunctor {
  private:
   const DeviceContext &ctx_;
   const Tx *x_;
-  Tensor *y_;
+  phi::DenseTensor *y_;
   int limits_;
   int maxlen_;
 };
 
 template <typename DeviceContext, typename Tx>
 class SequenceMaskKernel : public framework::OpKernel<Tx> {
-  using Tensor = phi::DenseTensor;
-
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
     auto *x = ctx.Input<phi::DenseTensor>("X");
diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
index 1290e79bc0..f3b18676ab 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc
@@ -18,8 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using Tensor = phi::DenseTensor;
-
 template <typename DeviceContext, typename T>
 class SequenceMaskNPUKernel : public framework::OpKernel<T> {
  public:
@@ -58,7 +56,7 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
     auto y_dim = phi::vectorize<int>(x->dims());
     y_dim.push_back(maxlen);
 
-    Tensor cast_x;
+    phi::DenseTensor cast_x;
     cast_x.mutable_data<int32_t>(x->dims(), ctx.GetPlace());
     const auto& cast1_runner = NpuOpRunner(
         "Cast",
@@ -68,7 +66,7 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
           ConvertToNpuDtype(framework::TransToProtoVarType(cast_x.dtype()))}});
     cast1_runner.Run(dev_ctx.stream());
 
-    Tensor tmp;
+    phi::DenseTensor tmp;
     tmp.mutable_data<int32_t>(phi::make_ddim({maxlen}), ctx.GetPlace());
     NpuOpRunner range_runner;
     range_runner.SetType("Range");
@@ -78,7 +76,7 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
     range_runner.AddOutput(tmp);
     range_runner.Run(dev_ctx.stream());
 
-    Tensor expand_tmp;
+    phi::DenseTensor expand_tmp;
     expand_tmp.mutable_data<int32_t>(phi::make_ddim(y_dim), ctx.GetPlace());
     const auto& expand_runner =
         NpuOpRunner("ExpandD", {tmp}, {expand_tmp}, {{"shape", y_dim}});
@@ -87,7 +85,7 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
     auto x_dims = phi::vectorize<int>(x->dims());
     x_dims.push_back(1);
     cast_x.Resize(phi::make_ddim({x_dims}));
-    Tensor x_tmp;
+    phi::DenseTensor x_tmp;
     x_tmp.mutable_data<int32_t>(phi::make_ddim(y_dim), ctx.GetPlace());
     const auto& tile_runner =
         NpuOpRunner("TileWithAxis",
@@ -96,7 +94,7 @@ class SequenceMaskNPUKernel : public framework::OpKernel<T> {
                     {{"axis", x->dims().size()}, {"tiles", maxlen}});
     tile_runner.Run(dev_ctx.stream());
 
-    Tensor y_tmp;
+    phi::DenseTensor y_tmp;
     y_tmp.mutable_data<uint8_t>(phi::make_ddim(y_dim), ctx.GetPlace());
     const auto& less_runner =
         NpuOpRunner("Less", {expand_tmp, x_tmp}, {y_tmp}, {});
diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
index d427e339fb..6957920131 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
@@ -69,7 +69,7 @@ class SequencePadOp : public framework::OperatorWithKernel {
       // run time
       framework::Variable* x_var =
           PADDLE_GET(framework::Variable*, ctx->GetInputVarPtrs("X")[0]);
-      const auto& x_lod = x_var->Get<LoDTensor>().lod();
+      const auto& x_lod = x_var->Get<phi::DenseTensor>().lod();
       PADDLE_ENFORCE_EQ(x_lod.empty(),
                         false,
                         platform::errors::NotFound(
@@ -145,20 +145,22 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor, default LoDTensor<float>) Input variable which "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) Input "
+             "variable which "
              "should contain lod information.");
     AddInput("PadValue",
-             "(LoDTensor), this Tensor holds values that will be fill into "
+             "(phi::DenseTensor), this phi::DenseTensor holds values that will "
+             "be fill into "
              "padded steps. It can be a scalar or a tensor whose shape equals "
              "to time steps in sequences. If it's a scalar, it will be "
              "automatically broadcasted to the shape of time step.");
-    AddOutput(
-        "Out",
-        "(LoDTensor) The output vairable, which contains padded sequences.");
-    AddOutput(
-        "Length",
-        "(LoDTensor) The output vairable, which contains the actual length of "
-        "sequences before padding.");
+    AddOutput("Out",
+              "(phi::DenseTensor) The output vairable, which contains padded "
+              "sequences.");
+    AddOutput("Length",
+              "(phi::DenseTensor) The output vairable, which contains the "
+              "actual length of "
+              "sequences before padding.");
     AddAttr<int>(
         "padded_length",
         "The length of padded sequences. It can be set to -1 or "
@@ -179,41 +181,41 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
 
       Case 1:
 
-      Given a 1-level LoDTensor input(X):
+      Given a 1-level phi::DenseTensor input(X):
           X.lod = [[0, 2,       5]]
           X.data = [a, b, c, d, e]
       and Input(PadValue):
           PadValue.data = [0]
       and attribite 'padded_length' = 4,
-      then we get LoDTensor:
+      then we get phi::DenseTensor:
           Out.data = [[a, b, 0, 0],
                       [c, d, e, 0]]
           Length.data = [2, 3]
 
       Case 2:
 
-      Given a 1-level LoDTensor input(X):
+      Given a 1-level phi::DenseTensor input(X):
           X.lod = [[0,               2,                           5]]
           X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]]
       and Input(PadValue):
           PadValue.data = [0]
       and attribite 'padded_length' = -1, which mean using the length
       of longest input sequence(3 in this case),
-      then we get LoDTensor:
+      then we get phi::DenseTensor:
           Out.data = [[[a1, a2], [b1, b2], [0, 0]],
                       [[c1, c2], [d1, d2], [e1, e2]]]
           Length.data = [2, 3]
 
       Case 3:
 
-      Given a 1-level LoDTensor input(X):
+      Given a 1-level phi::DenseTensor input(X):
           X.lod = [[0,               2,                           5]]
           X.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]]
       and Input(PadValue):
           PadValue.data = [p1, p2]
       and attribite 'padded_length' = -1, which mean using the length
       of longest input sequence(3 in this case),
-      then we get LoDTensor:
+      then we get phi::DenseTensor:
           Out.data = [[[a1, a2], [b1, b2], [p1, p2]],
                       [[c1, c2], [d1, d2], [e1, e2]]]
           Length.data = [2, 3]
diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h
index 6f90260957..0615e0c943 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h
@@ -24,25 +24,24 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using LoD = framework::LoD;
 
 template <typename DeviceContext, typename T>
 class SequencePadOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    const auto* x = ctx.Input<LoDTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
-    auto* len_t = ctx.Output<LoDTensor>("Length");
+    const auto* x = ctx.Input<phi::DenseTensor>("X");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
+    auto* len_t = ctx.Output<phi::DenseTensor>("Length");
     out->mutable_data<T>(ctx.GetPlace());
 
-    PADDLE_ENFORCE_EQ(
-        x->lod().empty(),
-        false,
-        platform::errors::NotFound("Input(X) Tensor of SequencePadOp does not "
-                                   "contain LoD information."));
+    PADDLE_ENFORCE_EQ(x->lod().empty(),
+                      false,
+                      platform::errors::NotFound(
+                          "Input(X) phi::DenseTensor of SequencePadOp does not "
+                          "contain LoD information."));
 
-    const auto* pad_value = ctx.Input<LoDTensor>("PadValue");
+    const auto* pad_value = ctx.Input<phi::DenseTensor>("PadValue");
 
     int padded_length = ctx.Attr<int>("padded_length");
 
@@ -56,7 +55,7 @@ class SequencePadOpKernel : public framework::OpKernel<T> {
         false,
         math::kBatchLengthWidth);
 
-    LoDTensor seq_len;
+    phi::DenseTensor seq_len;
     seq_len.Resize(len_t->dims());
     int64_t* len_data = seq_len.mutable_data<int64_t>(platform::CPUPlace());
     for (size_t i = 1; i < x->lod()[0].size(); ++i) {
@@ -73,9 +72,10 @@ template <typename DeviceContext, typename T>
 class SequencePadGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* d_x = ctx.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* d_x = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
     if (d_x) {
-      const auto* d_out = ctx.Input<LoDTensor>(framework::GradVarName("Out"));
+      const auto* d_out =
+          ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
       d_x->mutable_data<T>(ctx.GetPlace());
 
       int padded_length = ctx.Attr<int>("padded_length");
diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
index 9b8697b976..778b2f8854 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
@@ -53,12 +53,15 @@ class SequencePoolOp : public framework::OperatorWithKernel {
 class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(LoDTensor) The variable-length input of SequencePoolOp");
-    AddOutput("Out",
-              "(Tensor) The output of SequencePoolOp does not contain LoD "
-              "information.");
+    AddInput("X",
+             "(phi::DenseTensor) The variable-length input of SequencePoolOp");
+    AddOutput(
+        "Out",
+        "(phi::DenseTensor) The output of SequencePoolOp does not contain LoD "
+        "information.");
     AddOutput("MaxIndex",
-              "(Tensor<int>) This tensor is used for the sequence max-pooling "
+              "(phi::DenseTensor<int>) This tensor is used for the sequence "
+              "max-pooling "
               "to record the max indexes.")
         .AsIntermediate();
     AddAttr<bool>("is_test",
@@ -92,11 +95,11 @@ The following example explains how this works:
 For a mini-batch of 3 variable-length sentences,
 containing 2, 3, and 2 time-steps:
 
-Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.
+Assume X is a [7,M,N] phi::DenseTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.
 Besides, for the sake of simplicity, we assume M=1 and N=1,
 and the value of X = [[1, 3], [2, 4, 6], [5, 1]].
 
-Thus, Out is a [3,1,1] Tensor without LoD information.
+Thus, Out is a [3,1,1] phi::DenseTensor without LoD information.
 And for different pooltype, the value of Out is as follows:
 
 - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
index 199187a154..78acb4eef2 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
@@ -23,15 +23,12 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
-
 template <typename DeviceContext, typename T>
 class SequencePoolKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* in = context.Input<LoDTensor>("X");
-    auto* out = context.Output<LoDTensor>("Out");
+    auto* in = context.Input<phi::DenseTensor>("X");
+    auto* out = context.Output<phi::DenseTensor>("Out");
     std::string pooltype = context.Attr<std::string>("pooltype");
     T pad_value = static_cast<T>(context.Attr<float>("pad_value"));
 
@@ -39,11 +36,11 @@ class SequencePoolKernel : public framework::OpKernel<T> {
     auto lod = in->lod();
     auto lod_level = lod.size();
     // InferShape by lod
-    PADDLE_ENFORCE_GT(
-        lod_level,
-        0,
-        platform::errors::InvalidArgument("Input(X) Tensor of SequencePoolOp "
-                                          "does not contain LoD information."));
+    PADDLE_ENFORCE_GT(lod_level,
+                      0,
+                      platform::errors::InvalidArgument(
+                          "Input(X) phi::DenseTensor of SequencePoolOp "
+                          "does not contain LoD information."));
     PADDLE_ENFORCE_LE(lod_level,
                       2UL,
                       platform::errors::InvalidArgument(
@@ -100,8 +97,9 @@ template <typename DeviceContext, typename T>
 class SequencePoolGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* out_g = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto* in_g = context.Output<LoDTensor>(framework::GradVarName("X"));
+    auto* out_g =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto* in_g = context.Output<phi::DenseTensor>(framework::GradVarName("X"));
     std::string pooltype = context.Attr<std::string>("pooltype");
     const phi::DenseTensor* index = nullptr;
     if (pooltype == "MAX") {
diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
index aeae0a0e1f..03036a0bab 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
@@ -19,33 +19,30 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
-
 template <typename DeviceContext, typename T>
 struct SequenceSoftmaxFunctor {
   void operator()(
       const DeviceContext &ctx,
-      const LoDTensor &x,
+      const phi::DenseTensor &x,
       const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
-      LoDTensor *out);
+      phi::DenseTensor *out);
 };
 
 template <typename DeviceContext, typename T>
 struct SequenceSoftmaxGradFunctor {
   void operator()(const DeviceContext &ctx,
-                  const LoDTensor &dout,
-                  const LoDTensor &out,
+                  const phi::DenseTensor &dout,
+                  const phi::DenseTensor &out,
                   const framework::Vector<size_t> &ref_lod, /*referenced lod*/
-                  LoDTensor *dx);
+                  phi::DenseTensor *dx);
 };
 
 template <typename T>
 struct SequenceSoftmaxFunctor<phi::CPUContext, T> {
   void operator()(const phi::CPUContext &ctx,
-                  const LoDTensor &x,
+                  const phi::DenseTensor &x,
                   const framework::Vector<size_t> &ref_lod, /*referenced lod*/
-                  LoDTensor *out) {
+                  phi::DenseTensor *out) {
     size_t height = ref_lod.size() - 1;
     const T *in_data = x.data<T>();
     T *out_data = out->mutable_data<T>(ctx.GetPlace());
@@ -65,10 +62,10 @@ struct SequenceSoftmaxFunctor<phi::CPUContext, T> {
 template <typename T>
 struct SequenceSoftmaxGradFunctor<phi::CPUContext, T> {
   void operator()(const phi::CPUContext &ctx,
-                  const LoDTensor &dout,
-                  const LoDTensor &out,
+                  const phi::DenseTensor &dout,
+                  const phi::DenseTensor &out,
                   const framework::Vector<size_t> &ref_lod, /*referenced lod*/
-                  LoDTensor *dx) {
+                  phi::DenseTensor *dx) {
     size_t height = ref_lod.size() - 1;
 
     const T *softmax_grad_data = dout.data<T>();
@@ -94,17 +91,17 @@ template <typename DeviceContext, typename T>
 class SequenceSoftmaxKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
-    auto *x = ctx.Input<LoDTensor>("X");
-    auto *out = ctx.Output<LoDTensor>("Out");
+    auto *x = ctx.Input<phi::DenseTensor>("X");
+    auto *out = ctx.Output<phi::DenseTensor>("Out");
 
     auto lod = x->lod();
     auto dims = x->dims();
-    PADDLE_ENFORCE_EQ(
-        lod.empty(),
-        false,
-        platform::errors::InvalidArgument(
-            "Input(X) Tensor of SequenceSoftmax operator does not contain "
-            "LoD information."));
+    PADDLE_ENFORCE_EQ(lod.empty(),
+                      false,
+                      platform::errors::InvalidArgument(
+                          "Input(X) phi::DenseTensor of SequenceSoftmax "
+                          "operator does not contain "
+                          "LoD information."));
 
     const size_t level = lod.size() - 1;
     PADDLE_ENFORCE_EQ(
@@ -138,10 +135,10 @@ template <typename DeviceContext, typename T>
 class SequenceSoftmaxGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
-    auto *out = ctx.Input<LoDTensor>("Out");
-    auto *out_grad = ctx.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto *x = ctx.Input<LoDTensor>("X");
-    auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
+    auto *out = ctx.Input<phi::DenseTensor>("Out");
+    auto *out_grad = ctx.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto *x = ctx.Input<phi::DenseTensor>("X");
+    auto *x_grad = ctx.Output<phi::DenseTensor>(framework::GradVarName("X"));
     if (!x_grad) {
       return;
     }
-- 
GitLab