From 30a31a532807b8cddc64f025d239917eb22ded23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?=
 <83450930+Liyulingyue@users.noreply.github.com>
Date: Mon, 28 Nov 2022 15:34:26 +0800
Subject: [PATCH] replace LoDTensor with phi::DenseTensor in fluid\operators\*\
 except sequence_ops (#48418)

---
 .../operators/cinn/cinn_instruction_run_op.cc |  4 +-
 .../cinn/cinn_instruction_run_op_test.cc      |  8 +-
 .../operators/cinn/cinn_launch_context.cc     | 14 ++--
 .../cinn/cinn_launch_context_test.cc          |  7 +-
 paddle/fluid/operators/cinn/cinn_launch_op.cc |  6 +-
 paddle/fluid/operators/cinn/cinn_launch_op.h  | 14 ++--
 .../operators/cinn/cinn_launch_op_test.cc     |  4 +-
 paddle/fluid/operators/cinn/test_helper.h     | 14 ++--
 .../operators/collective/c_embedding_op.cc    |  2 +-
 .../operators/collective/c_embedding_op.cu    | 14 ++--
 .../operators/collective/c_embedding_op.h     | 18 ++---
 .../collective/c_embedding_op_npu.cc          | 18 +++--
 .../collective/c_embedding_op_xpu.cc          |  8 +-
 .../controlflow/conditional_block_op.cc       | 24 +++---
 .../controlflow/conditional_block_op_test.cc  |  5 +-
 paddle/fluid/operators/controlflow/feed_op.cc |  8 +-
 .../fluid/operators/controlflow/fetch_op.cc   |  8 +-
 .../operators/controlflow/fetch_v2_op.cc      |  6 +-
 .../fluid/operators/controlflow/logical_op.cc | 15 ++--
 .../controlflow/tensor_array_read_write_op.cc | 13 ++--
 .../fluid/operators/controlflow/while_op.cc   | 25 +++---
 .../fluid/operators/detection/bbox_util.cu.h  |  1 -
 .../operators/detection/bipartite_match_op.cc | 10 +--
 .../fluid/operators/detection/box_clip_op.cc  |  8 +-
 .../fluid/operators/detection/box_clip_op.cu  |  4 +-
 .../fluid/operators/detection/box_clip_op.h   |  7 +-
 .../fluid/operators/detection/box_coder_op.cc |  5 +-
 .../detection/box_decoder_and_assign_op.cc    | 14 ++--
 .../detection/collect_fpn_proposals_op.cc     | 14 ++--
 .../detection/collect_fpn_proposals_op.cu     |  7 +-
 .../detection/collect_fpn_proposals_op.h      |  2 +-
 .../detection/distribute_fpn_proposals_op.cc  |  6 +-
 .../detection/generate_mask_labels_op.cc      | 47 ++++++-----
 .../detection/generate_proposal_labels_op.cc  | 64 +++++++++------
 .../detection/generate_proposals_op.cc        | 12 +--
 .../detection/generate_proposals_op.cu        |  5 +-
 .../detection/generate_proposals_v2_op.cc     |  8 +-
 .../operators/detection/iou_similarity_op.cc  | 27 +++----
 .../detection/locality_aware_nms_op.cc        | 19 ++---
 .../operators/detection/matrix_nms_op.cc      |  9 ++-
 .../detection/mine_hard_examples_op.cc        | 18 ++---
 .../operators/detection/multiclass_nms_op.cc  | 24 +++---
 .../retinanet_detection_output_op.cc          | 13 ++--
 .../detection/roi_perspective_transform_op.cc |  9 +--
 .../detection/rpn_target_assign_op.cc         | 59 +++++++-------
 .../operators/detection/target_assign_op.cc   |  6 +-
 .../elementwise/elementwise_add_op.cc         | 14 ++--
 .../elementwise/elementwise_div_op.cc         | 14 ++--
 .../elementwise/elementwise_floordiv_op.cc    | 14 ++--
 .../elementwise/elementwise_mul_op.cc         | 14 ++--
 .../operators/elementwise/elementwise_op.h    | 15 ++--
 .../elementwise/elementwise_op_function.h     |  6 +-
 .../elementwise/elementwise_sub_op.cc         | 14 ++--
 .../operators/elementwise/elementwise_xpu.h   |  4 +-
 .../operators/fused/fused_bn_activation_op.cc |  6 +-
 .../fused/fused_bn_add_activation_op.cc       |  6 +-
 .../fused/fused_embedding_fc_lstm_op.cc       | 40 +++++-----
 .../fused/fused_embedding_fc_lstm_op.h        |  1 -
 .../fused/fused_embedding_seq_pool_op.cc      |  2 +-
 .../fused/fused_embedding_seq_pool_op.h       | 34 ++++----
 .../operators/fused/fused_seqpool_cvm_op.cc   |  4 +-
 .../operators/fused/fused_seqpool_cvm_op.cu   | 12 +--
 .../operators/fused/fused_seqpool_cvm_op.h    |  2 -
 .../fluid/operators/fused/fusion_group_op.cc  |  4 +-
 paddle/fluid/operators/fused/fusion_gru_op.cc | 29 +++----
 paddle/fluid/operators/fused/fusion_gru_op.h  |  1 -
 .../fluid/operators/fused/fusion_lstm_op.cc   | 77 ++++++++++---------
 paddle/fluid/operators/fused/fusion_lstm_op.h |  1 -
 .../fused/fusion_repeated_fc_relu_op.cc       |  4 +-
 .../fused/fusion_repeated_fc_relu_op.h        |  1 -
 .../fused/fusion_seqconv_eltadd_relu_op.cc    | 19 ++---
 .../fused/fusion_seqconv_eltadd_relu_op.h     |  1 -
 .../fused/fusion_seqexpand_concat_fc_op.cc    |  9 ++-
 .../fused/fusion_seqexpand_concat_fc_op.h     |  1 -
 .../fused/fusion_seqpool_concat_op.cc         |  9 ++-
 .../fused/fusion_seqpool_concat_op.h          |  1 -
 .../fused/fusion_seqpool_cvm_concat_op.cc     |  9 ++-
 .../fused/fusion_seqpool_cvm_concat_op.h      |  1 -
 .../fused/fusion_squared_mat_sub_op.h         |  1 -
 .../fused/mkldnn/multi_gru_mkldnn_op.cc       | 11 +--
 paddle/fluid/operators/fused/multi_gru_op.cc  | 13 ++--
 paddle/fluid/operators/fused/multi_gru_op.h   |  1 -
 paddle/fluid/operators/math/context_project.h | 10 +--
 .../fluid/operators/math/sequence_padding.h   | 16 ++--
 .../fluid/operators/math/sequence_pooling.cc  |  1 -
 paddle/fluid/operators/math/sequence_scale.h  |  3 +-
 paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 11 ++-
 .../fluid/operators/mkldnn/mul_mkldnn_op.cc   |  1 -
 paddle/fluid/operators/nccl/nccl_op.cu.cc     | 14 ++--
 .../fluid/operators/optimizers/adam_op_mlu.cc | 45 ++++++-----
 .../fluid/operators/optimizers/adam_op_npu.cc | 37 +++++----
 .../optimizers/decayed_adagrad_op.cc          | 28 +++----
 .../operators/optimizers/decayed_adagrad_op.h |  4 +-
 paddle/fluid/operators/optimizers/dpsgd_op.cc | 24 +++---
 paddle/fluid/operators/optimizers/dpsgd_op.h  |  4 +-
 paddle/fluid/operators/optimizers/lamb_op.cc  |  6 +-
 .../operators/optimizers/lars_momentum_op.cc  | 26 +++----
 .../fluid/operators/optimizers/momentum_op.h  | 12 +--
 .../operators/optimizers/rmsprop_op_npu.cc    | 19 +++--
 paddle/fluid/operators/optimizers/sgd_op.cu   |  4 +-
 paddle/fluid/operators/optimizers/sgd_op.h    |  4 +-
 .../pscore/distributed_lookup_table_op.cc     |  7 +-
 .../pscore/distributed_push_sparse_op.cc      | 11 +--
 paddle/fluid/operators/pscore/fake_init_op.cc |  2 +-
 .../operators/reader/create_py_reader_op.cc   |  2 +-
 paddle/fluid/operators/reader/read_op.cc      |  2 +-
 106 files changed, 683 insertions(+), 638 deletions(-)

diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
index f02946bddcc..c0dafd85344 100644
--- a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
+++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
@@ -68,11 +68,11 @@ class CinnInstructionRunOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput(kX,
-             "(vector<LoDTensor>)"
+             "(vector<phi::DenseTensor>)"
              "which are the input arguments of this cinn instruction")
         .AsDuplicable();
     AddOutput(kOutputs,
-              "(vector<LoDTensor>)"
+              "(vector<phi::DenseTensor>)"
               "which are the output arguments of this cinn instruction")
         .AsDuplicable();
     AddAttr<int64_t>(
diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
index 47f89735149..6469ac3ab21 100644
--- a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
@@ -74,7 +74,7 @@ class TestCinnInstructionRunOp : public ::testing::Test {
     // so a cinn_instruction_run_op will throw an error
     framework::Scope scope;
     InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
-    scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
     ASSERT_THROW(cinn_instruction_run_op->Run(scope, place),
                  paddle::platform::EnforceNotMet);
 
@@ -83,7 +83,7 @@ class TestCinnInstructionRunOp : public ::testing::Test {
     // of both type float and int
     cinn_launch_op->Run(scope, place);
     scope.EraseVars({"x", "y", test_op_out_name});
-    scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
     InitVariablesWithRandomValue<int>({"x", "y"}, {30, 40}, place, &scope);
     cinn_launch_op->Run(scope, place);
   }
@@ -92,8 +92,8 @@ class TestCinnInstructionRunOp : public ::testing::Test {
     // Run ops and check the computation results
     framework::Scope scope;
     InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
-    scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
-    scope.Var(add_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
+    scope.Var(add_op_out_name)->GetMutable<phi::DenseTensor>();
     elementwise_add_op->Run(scope, place);
     cinn_launch_op->Run(scope, place);
     CompareOpResult<float>(scope.GetVar(test_op_out_name),
diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc
index cc439e44866..3b9d7d00eda 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_context.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc
@@ -45,7 +45,6 @@
 namespace paddle {
 namespace operators::details {
 
-using LoDTensor = phi::DenseTensor;
 using framework::ParallelExecutor;
 using framework::Scope;
 using CinnInstruction = ::cinn::hlir::framework::Instruction;
@@ -268,7 +267,8 @@ void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) {
   // assign external malloc/free callbacks of cinn_buffer_t
   cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
       [this, var_name](void* ctx, cinn_buffer_t* buffer) {
-        auto* tensor = cached_scope_->GetVar(var_name)->GetMutable<LoDTensor>();
+        auto* tensor =
+            cached_scope_->GetVar(var_name)->GetMutable<phi::DenseTensor>();
         tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions));
         buffer->memory = reinterpret_cast<uint8_t*>(tensor->mutable_data(
             *cached_place_,
@@ -294,7 +294,7 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
   cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
       [this, var_name](void* ctx, cinn_buffer_t* buffer) {
         auto* tensor =
-            cached_temp_scope_->Var(var_name)->GetMutable<LoDTensor>();
+            cached_temp_scope_->Var(var_name)->GetMutable<phi::DenseTensor>();
         tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions));
         buffer->memory = reinterpret_cast<uint8_t*>(tensor->mutable_data(
             *cached_place_,
@@ -306,8 +306,8 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
   // if no instruction use it
   cinn_buffer->external_free = new std::function<int(void*, cinn_buffer_t*)>(
       [this, var_name](void* ctx, cinn_buffer_t* buffer) {
-        auto* tensor =
-            cached_temp_scope_->GetVar(var_name)->GetMutable<LoDTensor>();
+        auto* tensor = cached_temp_scope_->GetVar(var_name)
+                           ->GetMutable<phi::DenseTensor>();
         tensor->clear();
         return 0;
       });
@@ -438,8 +438,8 @@ ParallelExecutor* CinnLaunchContext::InitializePE(const platform::Place& place,
     auto* var = scope->GetVar(var_name);
     auto* buffer = GetCinnBufferOfVar(var_name);
     auto dim = framework::DDim(buffer->dims, buffer->dimensions);
-    var->GetMutable<LoDTensor>()->Resize(dim);
-    var->GetMutable<LoDTensor>()->mutable_data(
+    var->GetMutable<phi::DenseTensor>()->Resize(dim);
+    var->GetMutable<phi::DenseTensor>()->mutable_data(
         place, framework::paddle2cinn::TransToPaddleDataType(buffer->type));
   }
   return parallel_executor_.get();
diff --git a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
index 34f182d57a3..2b4bc9acf12 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
@@ -40,7 +40,6 @@ USE_OP(cinn_instruction_run);
 namespace paddle {
 namespace operators::details {
 
-using LoDTensor = phi::DenseTensor;
 using framework::OpDesc;
 using framework::ParallelExecutor;
 using framework::ProgramDesc;
@@ -203,8 +202,8 @@ TEST_F(CinnLaunchContextTest, TestConstructResult) {
 TEST_F(CinnLaunchContextTest, TestCheckTensorEquivalent) {
   platform::CPUPlace place;
   framework::Scope scope;
-  auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
-  auto* tensor2 = scope.Var("var2")->GetMutable<LoDTensor>();
+  auto* tensor1 = scope.Var("var1")->GetMutable<phi::DenseTensor>();
+  auto* tensor2 = scope.Var("var2")->GetMutable<phi::DenseTensor>();
 
   // dimension not equivalent
   tensor1->mutable_data<float>(phi::make_ddim({3, 5}), place);
@@ -264,7 +263,7 @@ TEST_F(CinnLaunchContextTest, TestCallbackAssignment) {
   launch_context->UpdateCapturedEnv(scope, place);
 
   // assign external variables
-  auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
+  auto* tensor1 = scope.Var("var1")->GetMutable<phi::DenseTensor>();
   float* data1 = tensor1->mutable_data<float>(phi::make_ddim({3, 4}), place);
   data1[0] = 9.99f;
   data1[10] = 19.99f;
diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.cc b/paddle/fluid/operators/cinn/cinn_launch_op.cc
index cd0a31dc0cd..8147541cbaa 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_op.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_op.cc
@@ -128,18 +128,18 @@ class CinnLaunchOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput(kX,
-             "(vector<LoDTensor>)"
+             "(vector<phi::DenseTensor>)"
              "which are the input of graph inside the CinnLaunchOp"
              "excluding kNoNeedBufferX.")
         .AsDuplicable();
     AddInput(kNoNeedBufferX,
-             "(vector<LoDTensor>)"
+             "(vector<phi::DenseTensor>)"
              "which are the input of graph inside the CinnLaunchOp but"
              "their buffer are not needed.")
         .AsDuplicable()
         .AsDispensable();
     AddOutput(kOutputs,
-              "(vector<LoDTensor>)"
+              "(vector<phi::DenseTensor>)"
               "which are the output of graph inside the CinnLaunchOp.")
         .AsDuplicable();
     AddAttr<int64_t>(
diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.h b/paddle/fluid/operators/cinn/cinn_launch_op.h
index 8efe623064b..e27ef607917 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_op.h
+++ b/paddle/fluid/operators/cinn/cinn_launch_op.h
@@ -34,7 +34,6 @@ DECLARE_bool(enable_pe_launch_cinn);
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using CinnCompiler = framework::paddle2cinn::CinnCompiler;
 using CinnCompiledObject = framework::paddle2cinn::CinnCompiledObject;
 
@@ -76,29 +75,30 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
             << "value:\n"
             << CinnCompiler::GetInstance()->ReadableKey(compilation_key);
 
-    std::map<std::string, const LoDTensor*> inputs_name2tensor;
+    std::map<std::string, const phi::DenseTensor*> inputs_name2tensor;
     std::vector<std::string> input_x_variable_names;
     std::vector<std::string> input_no_need_buffer_variable_names;
     auto add_name2tensor_fn =
-        [&inputs_name2tensor](const std::vector<std::string>& variable_names,
-                              const std::vector<const LoDTensor*>& tensors) {
+        [&inputs_name2tensor](
+            const std::vector<std::string>& variable_names,
+            const std::vector<const phi::DenseTensor*>& tensors) {
           std::transform(
               variable_names.begin(),
               variable_names.end(),
               tensors.begin(),
               std::inserter(inputs_name2tensor, inputs_name2tensor.end()),
-              [](const std::string& name, const LoDTensor* tensor) {
+              [](const std::string& name, const phi::DenseTensor* tensor) {
                 return std::make_pair(name, tensor);
               });
         };
 
-    auto input_x_tensors = ctx.MultiInput<LoDTensor>(kX);
+    auto input_x_tensors = ctx.MultiInput<phi::DenseTensor>(kX);
     if (!input_x_tensors.empty()) {
       input_x_variable_names = std::move(ctx.InputNames(kX));
       add_name2tensor_fn(input_x_variable_names, input_x_tensors);
     }
     auto input_no_need_buffer_tensors =
-        ctx.MultiInput<LoDTensor>(kNoNeedBufferX);
+        ctx.MultiInput<phi::DenseTensor>(kNoNeedBufferX);
     if (!input_no_need_buffer_tensors.empty()) {
       input_no_need_buffer_variable_names =
           std::move(ctx.InputNames(kNoNeedBufferX));
diff --git a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
index b26c97dda18..dad26dc6374 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
@@ -78,8 +78,8 @@ class TestCinnLaunchOp : public ::testing::Test {
     // Run ops and check the computation results
     framework::Scope scope;
     InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
-    scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
-    scope.Var(add_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
+    scope.Var(add_op_out_name)->GetMutable<phi::DenseTensor>();
     elementwise_add_op->Run(scope, place);
     cinn_launch_op->Run(scope, place);
     CompareOpResult<float>(scope.GetVar(test_op_out_name),
diff --git a/paddle/fluid/operators/cinn/test_helper.h b/paddle/fluid/operators/cinn/test_helper.h
index eed1b72420b..d35996771b4 100644
--- a/paddle/fluid/operators/cinn/test_helper.h
+++ b/paddle/fluid/operators/cinn/test_helper.h
@@ -29,7 +29,6 @@ limitations under the License. */
 
 namespace paddle::operators {
 
-using LoDTensor = phi::DenseTensor;
 using Variable = framework::Variable;
 using Graph = framework::ir::Graph;
 using Node = framework::ir::Node;
@@ -97,11 +96,11 @@ void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
   std::default_random_engine engine(seed());
   std::uniform_real_distribution<float> dist(0, 100);
 
-  LoDTensor tmp_tensor;
+  phi::DenseTensor tmp_tensor;
   auto* tmp_data =
       tmp_tensor.mutable_data<DataType>(common_ddim, platform::CPUPlace());
   for (const auto& var_name : var_names) {
-    auto* tensor = scope->Var(var_name)->GetMutable<LoDTensor>();
+    auto* tensor = scope->Var(var_name)->GetMutable<phi::DenseTensor>();
     tensor->mutable_data<DataType>(common_ddim, place);
     for (auto i = 0; i < tensor->numel(); ++i) {
       tmp_data[i] = static_cast<DataType>(dist(engine));
@@ -112,11 +111,12 @@ void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
 
 template <typename DataType>
 void CompareOpResult(Variable* test_out, Variable* expected_out) {
-  LoDTensor test_tensor, expected_tensor;
+  phi::DenseTensor test_tensor, expected_tensor;
   paddle::framework::TensorCopySync(
-      test_out->Get<LoDTensor>(), platform::CPUPlace(), &test_tensor);
-  paddle::framework::TensorCopySync(
-      expected_out->Get<LoDTensor>(), platform::CPUPlace(), &expected_tensor);
+      test_out->Get<phi::DenseTensor>(), platform::CPUPlace(), &test_tensor);
+  paddle::framework::TensorCopySync(expected_out->Get<phi::DenseTensor>(),
+                                    platform::CPUPlace(),
+                                    &expected_tensor);
 
   ASSERT_TRUE(test_tensor.IsInitialized());
   ASSERT_TRUE(expected_tensor.IsInitialized());
diff --git a/paddle/fluid/operators/collective/c_embedding_op.cc b/paddle/fluid/operators/collective/c_embedding_op.cc
index c11ac829932..caea70c223b 100644
--- a/paddle/fluid/operators/collective/c_embedding_op.cc
+++ b/paddle/fluid/operators/collective/c_embedding_op.cc
@@ -162,7 +162,7 @@ class CEmbeddingOpGradVarTypeInference : public framework::VarTypeInference {
   void operator()(framework::InferVarTypeContext* ctx) const override {
     auto out_var_name = framework::GradVarName("W");
     VLOG(3) << "c_embedding_grad op " << framework::GradVarName("W")
-            << " is set to LoDTensor";
+            << " is set to phi::DenseTensor";
     ctx->SetOutputType(out_var_name, framework::proto::VarType::LOD_TENSOR);
     ctx->SetOutputDataType(out_var_name, ctx->GetInputDataType("W"));
   }
diff --git a/paddle/fluid/operators/collective/c_embedding_op.cu b/paddle/fluid/operators/collective/c_embedding_op.cu
index e1fa8795d42..3a8294f52fc 100644
--- a/paddle/fluid/operators/collective/c_embedding_op.cu
+++ b/paddle/fluid/operators/collective/c_embedding_op.cu
@@ -86,9 +86,9 @@ template <typename T>
 class CEmbeddingCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *table_t = context.Input<LoDTensor>("W");
-    auto *ids_t = context.Input<LoDTensor>("Ids");
-    auto *output_t = context.Output<LoDTensor>("Out");
+    auto *table_t = context.Input<phi::DenseTensor>("W");
+    auto *ids_t = context.Input<phi::DenseTensor>("Ids");
+    auto *output_t = context.Output<phi::DenseTensor>("Out");
 
     const auto &dev_ctx = context.template device_context<phi::GPUContext>();
     const int64_t start_idx = context.Attr<int64_t>("start_index");
@@ -142,9 +142,11 @@ class CEmbeddingGradCUDAKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &context) const override {
     const auto &dev_ctx = context.template device_context<phi::GPUContext>();
     const int64_t start_idx = context.Attr<int64_t>("start_index");
-    auto ids_t = context.Input<LoDTensor>("Ids");
-    auto d_output_t = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto d_table_t = context.Output<LoDTensor>(framework::GradVarName("W"));
+    auto ids_t = context.Input<phi::DenseTensor>("Ids");
+    auto d_output_t =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto d_table_t =
+        context.Output<phi::DenseTensor>(framework::GradVarName("W"));
 
     int N = d_table_t->dims()[0];
     int D = d_table_t->dims()[1];
diff --git a/paddle/fluid/operators/collective/c_embedding_op.h b/paddle/fluid/operators/collective/c_embedding_op.h
index 55fd021a7cd..1245d1bafd2 100644
--- a/paddle/fluid/operators/collective/c_embedding_op.h
+++ b/paddle/fluid/operators/collective/c_embedding_op.h
@@ -25,8 +25,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 inline void CheckTableValid() {}
 
 template <typename TIds, typename TData>
@@ -57,9 +55,9 @@ template <typename T>
 class CEmbeddingOpCPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* table_t = ctx.Input<LoDTensor>("W");
-    auto* ids_t = ctx.Input<LoDTensor>("Ids");
-    auto* output_t = ctx.Output<LoDTensor>("Out");
+    auto* table_t = ctx.Input<phi::DenseTensor>("W");
+    auto* ids_t = ctx.Input<phi::DenseTensor>("Ids");
+    auto* output_t = ctx.Output<phi::DenseTensor>("Out");
     const int64_t start_idx = ctx.Attr<int64_t>("start_index");
 
     VLOG(10) << "table_dims:" << table_t->dims();
@@ -119,10 +117,12 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     const int64_t start_idx = context.Attr<int64_t>("start_index");
-    auto ids_t = context.Input<LoDTensor>("Ids");
-    auto d_output_t = context.Input<LoDTensor>(framework::GradVarName("Out"));
-    auto table_t = context.Input<LoDTensor>("W");
-    auto table_grad_t = context.Output<LoDTensor>(framework::GradVarName("W"));
+    auto ids_t = context.Input<phi::DenseTensor>("Ids");
+    auto d_output_t =
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto table_t = context.Input<phi::DenseTensor>("W");
+    auto table_grad_t =
+        context.Output<phi::DenseTensor>(framework::GradVarName("W"));
 
     T* table_grad_data =
         table_grad_t->mutable_data<T>(table_t->dims(), context.GetPlace());
diff --git a/paddle/fluid/operators/collective/c_embedding_op_npu.cc b/paddle/fluid/operators/collective/c_embedding_op_npu.cc
index f7fd7fbd83e..c52d6bbe025 100644
--- a/paddle/fluid/operators/collective/c_embedding_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_embedding_op_npu.cc
@@ -111,9 +111,9 @@ void shard_index(const Tensor &table_t,
 
 template <typename TIds, typename T>
 void NPUGetIdsEmbedding(const framework::ExecutionContext &context) {
-  auto *table_t = context.Input<LoDTensor>("W");
-  auto *ids_t = context.Input<LoDTensor>("Ids");
-  auto *output_t = context.Output<LoDTensor>("Out");
+  auto *table_t = context.Input<phi::DenseTensor>("W");
+  auto *ids_t = context.Input<phi::DenseTensor>("Ids");
+  auto *output_t = context.Output<phi::DenseTensor>("Out");
   const int64_t start_idx = context.Attr<int64_t>("start_index");
 
   auto stream =
@@ -165,7 +165,7 @@ template <typename T>
 class CEmbeddingNPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *ids_t = context.Input<LoDTensor>("Ids");
+    auto *ids_t = context.Input<phi::DenseTensor>("Ids");
 
     const auto &index_type = framework::TransToProtoVarType(ids_t->dtype());
     if (index_type == framework::proto::VarType::INT32) {
@@ -181,10 +181,12 @@ template <typename TIds, typename T>
 void NPUUpdateEmbedding(const framework::ExecutionContext &context) {
   // get inputs
   const int64_t start_idx = context.Attr<int64_t>("start_index");
-  auto ids_t = context.Input<LoDTensor>("Ids");
-  auto d_output_t = context.Input<LoDTensor>(framework::GradVarName("Out"));
+  auto ids_t = context.Input<phi::DenseTensor>("Ids");
+  auto d_output_t =
+      context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
   auto table_t = context.Input<phi::DenseTensor>("W");
-  auto table_grad_t = context.Output<LoDTensor>(framework::GradVarName("W"));
+  auto table_grad_t =
+      context.Output<phi::DenseTensor>(framework::GradVarName("W"));
 
   VLOG(10) << "ids_t:" << ids_t << ", d_output_t:" << d_output_t
            << ", table_t:" << table_t << ", table_grad_t" << table_grad_t;
@@ -243,7 +245,7 @@ template <typename T>
 class CEmbeddingGradNPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *ids_t = context.Input<LoDTensor>("Ids");
+    auto *ids_t = context.Input<phi::DenseTensor>("Ids");
 
     const auto &index_type = framework::TransToProtoVarType(ids_t->dtype());
     if (index_type == framework::proto::VarType::INT32) {
diff --git a/paddle/fluid/operators/collective/c_embedding_op_xpu.cc b/paddle/fluid/operators/collective/c_embedding_op_xpu.cc
index d70da7e3a7d..c966ed3354a 100644
--- a/paddle/fluid/operators/collective/c_embedding_op_xpu.cc
+++ b/paddle/fluid/operators/collective/c_embedding_op_xpu.cc
@@ -18,15 +18,13 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 template <typename DeviceContext, typename T>
 class CEmbeddingOpXPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* table_t = ctx.Input<LoDTensor>("W");
-    auto* ids_t = ctx.Input<LoDTensor>("Ids");
-    auto* output_t = ctx.Output<LoDTensor>("Out");
+    auto* table_t = ctx.Input<phi::DenseTensor>("W");
+    auto* ids_t = ctx.Input<phi::DenseTensor>("Ids");
+    auto* output_t = ctx.Output<phi::DenseTensor>("Out");
     const int64_t start_index = ctx.Attr<int64_t>("start_index");
     const T* table_data = table_t->data<T>();
     T* output_data = output_t->mutable_data<T>(ctx.GetPlace());
diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc
index d441a84bc63..6425c3519e9 100644
--- a/paddle/fluid/operators/controlflow/conditional_block_op.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc
@@ -363,13 +363,14 @@ class ConditionalBlockGradOp : public ConditionalOp {
       }
 
       if (input_var->IsType<phi::DenseTensor>()) {
-        PADDLE_ENFORCE_EQ(outside_var->IsType<phi::DenseTensor>(),
-                          true,
-                          platform::errors::InvalidArgument(
-                              "Type of outside_var %s is NOT LoDTensor, which "
-                              "doesn't match input_var %s.",
-                              outside_grad_name,
-                              input_name));
+        PADDLE_ENFORCE_EQ(
+            outside_var->IsType<phi::DenseTensor>(),
+            true,
+            platform::errors::InvalidArgument(
+                "Type of outside_var %s is NOT phi::DenseTensor, which "
+                "doesn't match input_var %s.",
+                outside_grad_name,
+                input_name));
         AssignZeroToOutsideTensor(place,
                                   scope,
                                   input_var->Get<phi::DenseTensor>(),
@@ -402,7 +403,8 @@ class ConditionalBlockGradOp : public ConditionalOp {
       } else {
         // TODO(huihuangzheng): add support for SelectedRows
         PADDLE_THROW(platform::errors::InvalidArgument(
-            "Conditional block grad op doesn't support non-LoDTensor output "
+            "Conditional block grad op doesn't support non-phi::DenseTensor "
+            "output "
             "now."));
       }
     }
@@ -475,9 +477,9 @@ class ConditionalBlockGradInferShape : public framework::InferShapeBase {
 class ConditionalBlockGradInferVarType : public framework::VarTypeInference {
  public:
   void operator()(framework::InferVarTypeContext *ctx) const override {
-    // NOTE(Aurelius84): VarType of Output is LoDTensor by default. In case of
-    // Input is {Tensor, LoDTensorArray}, we need synchronous the Input's
-    // VarType into Input@GRAD to avoid generating {Tensor, Tensor} as
+    // NOTE(Aurelius84): VarType of Output is phi::DenseTensor by default. In
+    // case of Input is {Tensor, LoDTensorArray}, we need synchronous the
+    // Input's VarType into Input@GRAD to avoid generating {Tensor, Tensor} as
     // Input@GRAD.
     auto input_size = ctx->InputSize(ConditionalOp::kInputs);
     auto output_size =
diff --git a/paddle/fluid/operators/controlflow/conditional_block_op_test.cc b/paddle/fluid/operators/controlflow/conditional_block_op_test.cc
index 93947cb5246..62552dc1fc8 100644
--- a/paddle/fluid/operators/controlflow/conditional_block_op_test.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op_test.cc
@@ -21,7 +21,6 @@ limitations under the License. */
 USE_NO_KERNEL_OP(conditional_block);
 USE_NO_KERNEL_OP(conditional_block_grad);
 
-using LoDTensor = phi::DenseTensor;
 using LoDTensorArray = paddle::framework::LoDTensorArray;
 using Scope = paddle::framework::Scope;
 using Variable = paddle::framework::Variable;
@@ -32,7 +31,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
   Scope scope;
 
   Variable* cond_var = scope.Var("condition");
-  LoDTensor* cond_tensor = cond_var->GetMutable<LoDTensor>();
+  phi::DenseTensor* cond_tensor = cond_var->GetMutable<phi::DenseTensor>();
   paddle::framework::DDim cond_dims = phi::make_ddim({1});
   bool* cond_data = cond_tensor->mutable_data<bool>(cond_dims, place);
   cond_data[0] = false;
@@ -41,7 +40,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
   LoDTensorArray* input_tensors = input_var->GetMutable<LoDTensorArray>();
   for (int i = 0; i < 5; ++i) {
     paddle::framework::DDim in_dims = phi::make_ddim({i + 1, i + 2});
-    LoDTensor lod_tensor;
+    phi::DenseTensor lod_tensor;
     float* in_data = lod_tensor.mutable_data<float>(in_dims, place);
     for (int j = 0; j < (i + 1) * (i + 2); ++j) {
       in_data[j] = static_cast<float>(j);
diff --git a/paddle/fluid/operators/controlflow/feed_op.cc b/paddle/fluid/operators/controlflow/feed_op.cc
index 65735cfb0c7..e076ead8914 100644
--- a/paddle/fluid/operators/controlflow/feed_op.cc
+++ b/paddle/fluid/operators/controlflow/feed_op.cc
@@ -29,7 +29,7 @@ namespace paddle {
 namespace operators {
 
 // FeedVariableVisitor is to feed the variable data
-// according to data type (LoDTensor or  Strings).
+// according to data type (phi::DenseTensor or  Strings).
 class FeedVariableVisitor {
  public:
   explicit FeedVariableVisitor(framework::Variable *out_var,
@@ -146,11 +146,11 @@ class FeedOpInfoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(vector<LoDTensor>) "
-             "A feeding list of LoDTensor, which may have "
+             "(vector<phi::DenseTensor>) "
+             "A feeding list of phi::DenseTensor, which may have "
              "different dimension and data type.");
     AddOutput("Out",
-              "(LoDTensor) The LoDTensor which is a copy "
+              "(phi::DenseTensor) The phi::DenseTensor which is a copy "
               "of the col-th feeding "
               "object.");
     AddAttr<int>("col", "(int) The column index of current feeding object.");
diff --git a/paddle/fluid/operators/controlflow/fetch_op.cc b/paddle/fluid/operators/controlflow/fetch_op.cc
index a84c5cf04e2..17b0f577f14 100644
--- a/paddle/fluid/operators/controlflow/fetch_op.cc
+++ b/paddle/fluid/operators/controlflow/fetch_op.cc
@@ -143,12 +143,14 @@ class FetchOpInfoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor) The resulted LoDTensor which is expected to return "
+             "(phi::DenseTensor) The resulted phi::DenseTensor which is "
+             "expected to return "
              "to users.");
     AddOutput(
         "Out",
-        "(vector<LoDTensor>|unordered_map<string, int32_t>) A fetching list"
-        " of LoDTensor|unordered_map<string, int32_t> which may have "
+        "(vector<phi::DenseTensor>|unordered_map<string, int32_t>) A fetching "
+        "list"
+        " of phi::DenseTensor|unordered_map<string, int32_t> which may have "
         "different dimension, shape and data type.");
     AddAttr<int>("col", "(int) The column index of fetching object.");
     AddComment(R"DOC(
diff --git a/paddle/fluid/operators/controlflow/fetch_v2_op.cc b/paddle/fluid/operators/controlflow/fetch_v2_op.cc
index 4daec3a6f92..939e58ebbb3 100644
--- a/paddle/fluid/operators/controlflow/fetch_v2_op.cc
+++ b/paddle/fluid/operators/controlflow/fetch_v2_op.cc
@@ -201,10 +201,12 @@ class FetchV2OpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor) The resulted LoDTensor which is expected to return "
+             "(phi::DenseTensor) The resulted phi::DenseTensor which is "
+             "expected to return "
              "to users.");
     AddOutput("Out",
-              "(vector<LoDTensor>) A fetching list of LoDTensor which may have "
+              "(vector<phi::DenseTensor>) A fetching list of phi::DenseTensor "
+              "which may have "
               "different dimension, shape and data type.");
     AddAttr<int>("col", "(int) The column index of fetching object.");
     AddAttr<bool>("deepcopy", "(bool) Whether deep copy is required.")
diff --git a/paddle/fluid/operators/controlflow/logical_op.cc b/paddle/fluid/operators/controlflow/logical_op.cc
index 7e1d6fbdca8..c6dde6f4ba5 100644
--- a/paddle/fluid/operators/controlflow/logical_op.cc
+++ b/paddle/fluid/operators/controlflow/logical_op.cc
@@ -35,7 +35,7 @@ class BinaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
                              comment.type));
     AddOutput("Out", string::Sprintf("n-dim bool Variable"));
     AddComment(string::Sprintf(R"DOC(%s Operator
-It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim LoDTensor or Tensor.
+It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim phi::DenseTensor or Tensor.
 Each element of Out is calculated by %s
 )DOC",
                                comment.type,
@@ -49,13 +49,14 @@ class UnaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
   void Make() override {
     OpComment comment;
     AddInput("X",
-             string::Sprintf("Operand of %s operator. Must be "
-                             "a LoDTensor or Tensor of type being one of bool, "
-                             "int8, int16, int32, int64, float32, float64.",
-                             comment.type));
-    AddOutput("Out", string::Sprintf("n-dim bool LoDTensor or Tensor."));
+             string::Sprintf(
+                 "Operand of %s operator. Must be "
+                 "a phi::DenseTensor or Tensor of type being one of bool, "
+                 "int8, int16, int32, int64, float32, float64.",
+                 comment.type));
+    AddOutput("Out", string::Sprintf("n-dim bool phi::DenseTensor or Tensor."));
     AddComment(string::Sprintf(R"DOC(%s Operator
-It operates element-wise on X, and returns the Out. X and Out are N-dim LoDTensor or Tensor.
+It operates element-wise on X, and returns the Out. X and Out are N-dim phi::DenseTensor or Tensor.
 Each element of Out is calculated by %s
 )DOC",
                                comment.type,
diff --git a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
index f7b7e62279b..b9aff315444 100644
--- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
+++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
@@ -67,7 +67,8 @@ class WriteToArrayOp : public ArrayOp {
 class WriteToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X", "(LoDTensor) the tensor will be written to tensor array");
+    AddInput("X",
+             "(phi::DenseTensor) the tensor will be written to tensor array");
     AddInput(
         "I",
         "(Tensor) the subscript index in tensor array. The number of element "
@@ -76,9 +77,9 @@ class WriteToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
     AddComment(R"DOC(
 WriteToArray Operator.
 
-This operator writes a LoDTensor to a LoDTensor array.
+This operator writes a phi::DenseTensor to a phi::DenseTensor array.
 
-Assume $T$ is LoDTensor, $i$ is the subscript of the array, and $A$ is the array. The
+Assume $T$ is phi::DenseTensor, $i$ is the subscript of the array, and $A$ is the array. The
 equation is
 
 $$A[i] = T$$
@@ -196,13 +197,13 @@ class ReadFromArrayProtoMaker : public framework::OpProtoAndCheckerMaker {
              "(Tensor) the writed tensor when used as the grad op of "
              "write_to_array. We use this to fill zero gradient.")
         .AsDispensable();
-    AddOutput("Out", "(LoDTensor) the tensor will be read from.");
+    AddOutput("Out", "(phi::DenseTensor) the tensor will be read from.");
     AddComment(R"DOC(
 ReadFromArray Operator.
 
-Read a LoDTensor from a LoDTensor Array.
+Read a phi::DenseTensor from a phi::DenseTensor Array.
 
-Assume $T$ is LoDTensor, $i$ is the subscript of the array, and $A$ is the array. The
+Assume $T$ is phi::DenseTensor, $i$ is the subscript of the array, and $A$ is the array. The
 equation is
 
 $$T = A[i]$$
diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc
index b5e30d8aaf0..cc6bb72324e 100644
--- a/paddle/fluid/operators/controlflow/while_op.cc
+++ b/paddle/fluid/operators/controlflow/while_op.cc
@@ -32,7 +32,6 @@ namespace paddle {
 namespace operators {
 
 using StepScopeVar = std::vector<framework::Scope *>;
-using LoDTensor = phi::DenseTensor;
 
 namespace {  // NOLINT
 static std::string GetSkipEagerDeletionVarsDebugString(
@@ -62,7 +61,7 @@ class WhileOp : public framework::OperatorBase {
                             platform::errors::NotFound(
                                 "Input(Condition) of WhileOp is not found."));
 
-    auto &cond = scope.FindVar(Input(kCondition))->Get<LoDTensor>();
+    auto &cond = scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>();
     PADDLE_ENFORCE_EQ(
         cond.dims(),
         phi::make_ddim({1}),
@@ -149,9 +148,10 @@ class WhileOp : public framework::OperatorBase {
             framework::Variable *input_var = scope.FindVar(input_var_name);
             if (input_var->IsType<phi::DenseTensor>()) {
               rename_vars.push_back(input_var_rename);
-              auto input_var_tensor = input_var->Get<LoDTensor>();
+              auto input_var_tensor = input_var->Get<phi::DenseTensor>();
               auto *rename_input_var_tensor =
-                  current_scope.Var(input_var_rename)->GetMutable<LoDTensor>();
+                  current_scope.Var(input_var_rename)
+                      ->GetMutable<phi::DenseTensor>();
               framework::TensorCopy(
                   input_var_tensor, dev_place, rename_input_var_tensor);
               rename_input_var_tensor->set_lod(input_var_tensor.lod());
@@ -166,8 +166,8 @@ class WhileOp : public framework::OperatorBase {
               var_rename.substr(0, var_rename.size() - strlen(kSuffix));
           current_scope.Rename(var_rename, input_var_name);
         }
-        cond_data =
-            GetCondData(scope.FindVar(Input(kCondition))->Get<LoDTensor>());
+        cond_data = GetCondData(
+            scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>());
       }
     } else {
       auto &current_scope = scope.NewScope();
@@ -188,8 +188,8 @@ class WhileOp : public framework::OperatorBase {
         }
         executor.RunPreparedContext(
             ctx.get(), &current_scope, false, false, false);
-        cond_data =
-            GetCondData(scope.FindVar(Input(kCondition))->Get<LoDTensor>());
+        cond_data = GetCondData(
+            scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>());
       }
       scope.DeleteScope(&current_scope);
     }
@@ -325,7 +325,8 @@ class WhileGradOp : public framework::OperatorBase {
           }
         } else {
           PADDLE_THROW(platform::errors::Unimplemented(
-              "Currently only support LoDTensor and LoDTensorArray in "
+              "Currently only support phi::DenseTensor and "
+              "phi::DenseTensorArray in "
               "WhileGradOp."));
         }
       }
@@ -398,16 +399,16 @@ class WhileGradOp : public framework::OperatorBase {
                                          inside_grad_name));
           PADDLE_ENFORCE_EQ(
               var->IsType<framework::LoDTensorArray>() ||
-                  var->IsType<LoDTensor>(),
+                  var->IsType<phi::DenseTensor>(),
               true,
               platform::errors::InvalidArgument(
                   "Currently the type of var only can be LoDTensorArray, "
-                  "or LoDTensor, but the received var[%s] is %s.",
+                  "or phi::DenseTensor, but the received var[%s] is %s.",
                   inside_grad_name,
                   framework::ToTypeName(var->Type())));
 
           if ((var_iter == outside_og_names.end()) &&
-              var->IsType<LoDTensor>()) {
+              var->IsType<phi::DenseTensor>()) {
             auto &inside_tensor = var->Get<phi::DenseTensor>();
             framework::AttributeMap attrs;
             attrs["dtype"] =
diff --git a/paddle/fluid/operators/detection/bbox_util.cu.h b/paddle/fluid/operators/detection/bbox_util.cu.h
index 5e946877e01..a9ad6cdfb65 100644
--- a/paddle/fluid/operators/detection/bbox_util.cu.h
+++ b/paddle/fluid/operators/detection/bbox_util.cu.h
@@ -31,7 +31,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
 
diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc
index 4eb7584973a..35368d00342 100644
--- a/paddle/fluid/operators/detection/bipartite_match_op.cc
+++ b/paddle/fluid/operators/detection/bipartite_match_op.cc
@@ -19,7 +19,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 class BipartiteMatchOp : public framework::OperatorWithKernel {
  public:
@@ -196,7 +195,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
   }
 
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* dist_mat = context.Input<LoDTensor>("DistMat");
+    auto* dist_mat = context.Input<phi::DenseTensor>("DistMat");
     auto* match_indices =
         context.Output<phi::DenseTensor>("ColToRowMatchIndices");
     auto* match_dist = context.Output<phi::DenseTensor>("ColToRowMatchDist");
@@ -251,7 +250,8 @@ class BipartiteMatchOpMaker : public framework::OpProtoAndCheckerMaker {
   void Make() override {
     AddInput(
         "DistMat",
-        "(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
+        "(phi::DenseTensor or Tensor) this input is a 2-D phi::DenseTensor "
+        "with shape "
         "[K, M]. It is pair-wise distance matrix between the entities "
         "represented by each row and each column. For example, assumed one "
         "entity is A with shape [K], another entity is B with shape [M]. The "
@@ -302,8 +302,8 @@ row entity to the column entity and the matched indices are not duplicated
 in each row of ColToRowMatchIndices. If the column entity is not matched
 any row entity, set -1 in ColToRowMatchIndices.
 
-Please note that the input DistMat can be LoDTensor (with LoD) or Tensor.
-If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size.
+Please note that the input DistMat can be phi::DenseTensor (with LoD) or Tensor.
+If phi::DenseTensor with LoD, the height of ColToRowMatchIndices is batch size.
 If Tensor, the height of ColToRowMatchIndices is 1.
 
 )DOC");
diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc
index 89650d62351..46363ceb535 100644
--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@@ -66,15 +66,15 @@ class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Input",
-             "(LoDTensor) "
-             "Input is a LoDTensor with shape [..., 4] holds 4 points"
+             "(phi::DenseTensor) "
+             "Input is a phi::DenseTensor with shape [..., 4] holds 4 points"
              "in last dimension in format [xmin, ymin, xmax, ymax]");
     AddInput("ImInfo",
              "(Tensor) Information for image reshape is in shape (N, 3), "
              "in format (height, width, im_scale)");
     AddOutput("Output",
-              "(LoDTensor) "
-              "Output is a LoDTensor with the same shape as Input"
+              "(phi::DenseTensor) "
+              "Output is a phi::DenseTensor with the same shape as Input"
               "and it is the result after clip");
     AddComment(R"DOC(
 This operator clips input boxes to original input images.
diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu
index 8fc8ec221f3..089f2f55692 100644
--- a/paddle/fluid/operators/detection/box_clip_op.cu
+++ b/paddle/fluid/operators/detection/box_clip_op.cu
@@ -49,9 +49,9 @@ template <typename DeviceContext, typename T>
 class GPUBoxClipKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    auto *input = context.Input<LoDTensor>("Input");
+    auto *input = context.Input<phi::DenseTensor>("Input");
     auto *im_info = context.Input<phi::DenseTensor>("ImInfo");
-    auto *output = context.Output<LoDTensor>("Output");
+    auto *output = context.Output<phi::DenseTensor>("Output");
     const int64_t num = input->dims()[0];
     const int64_t bbox_width = input->numel() / num;
     auto lod = input->lod();
diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h
index 544cb3c0331..bb72ca194b5 100644
--- a/paddle/fluid/operators/detection/box_clip_op.h
+++ b/paddle/fluid/operators/detection/box_clip_op.h
@@ -20,15 +20,14 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class BoxClipKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* input_box = context.Input<LoDTensor>("Input");
-    auto* im_info = context.Input<LoDTensor>("ImInfo");
-    auto* output_box = context.Output<LoDTensor>("Output");
+    auto* input_box = context.Input<phi::DenseTensor>("Input");
+    auto* im_info = context.Input<phi::DenseTensor>("ImInfo");
+    auto* output_box = context.Output<phi::DenseTensor>("Output");
     auto& dev_ctx = context.template device_context<phi::CPUContext>();
     output_box->mutable_data<T>(context.GetPlace());
     if (input_box->lod().size()) {
diff --git a/paddle/fluid/operators/detection/box_coder_op.cc b/paddle/fluid/operators/detection/box_coder_op.cc
index 5120f687dee..aafe040991e 100644
--- a/paddle/fluid/operators/detection/box_coder_op.cc
+++ b/paddle/fluid/operators/detection/box_coder_op.cc
@@ -44,7 +44,8 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
         .AsDispensable();
     AddInput(
         "TargetBox",
-        "(LoDTensor or Tensor) This input can be a 2-D LoDTensor with shape "
+        "(phi::DenseTensor or Tensor) This input can be a 2-D phi::DenseTensor "
+        "with shape "
         "[N, 4] when code_type is 'encode_center_size'. This input also can "
         "be a 3-D Tensor with shape [N, M, 4] when code_type is "
         "'decode_center_size'. [N, 4], each box is represented as "
@@ -79,7 +80,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
         "not be provided at the same time.")
         .SetDefault(std::vector<float>{});
     AddOutput("OutputBox",
-              "(LoDTensor or Tensor) "
+              "(phi::DenseTensor or Tensor) "
               "When code_type is 'encode_center_size', the output tensor of "
               "box_coder_op with shape [N, M, 4] representing the result of N "
               "target boxes encoded with M Prior boxes and variances. When "
diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
index 62f5523ebc6..7de0f05e0d3 100644
--- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
+++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
@@ -14,8 +14,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 class BoxDecoderAndAssignOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
@@ -157,12 +155,12 @@ class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
              "default.")
         .AsDispensable();
     AddInput("TargetBox",
-             "(LoDTensor or Tensor) "
-             "This input can be a 2-D LoDTensor with shape "
+             "(phi::DenseTensor or Tensor) "
+             "This input can be a 2-D phi::DenseTensor with shape "
              "[N, classnum*4]. It holds N targets for N boxes.");
     AddInput("BoxScore",
-             "(LoDTensor or Tensor) "
-             "This input can be a 2-D LoDTensor with shape "
+             "(phi::DenseTensor or Tensor) "
+             "This input can be a 2-D phi::DenseTensor with shape "
              "[N, classnum], each box is represented as [classnum] which is "
              "the classification probabilities.");
     AddAttr<float>("box_clip",
@@ -170,12 +168,12 @@ class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
                    "clip box to prevent overflowing")
         .SetDefault(4.135f);
     AddOutput("DecodeBox",
-              "(LoDTensor or Tensor) "
+              "(phi::DenseTensor or Tensor) "
               "the output tensor of op with shape [N, classnum * 4] "
               "representing the result of N target boxes decoded with "
               "M Prior boxes and variances for each class.");
     AddOutput("OutputAssignBox",
-              "(LoDTensor or Tensor) "
+              "(phi::DenseTensor or Tensor) "
               "the output tensor of op with shape [N, 4] "
               "representing the result of N target boxes decoded with "
               "M Prior boxes and variances with the best non-background class "
diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
index 2af8521a2db..37dc10df729 100644
--- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
+++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
@@ -17,7 +17,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 class CollectFpnProposalsOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
@@ -76,8 +75,8 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel {
             PADDLE_GET(framework::Variable *, roi_inputs[i]);
         framework::Variable *score_var =
             PADDLE_GET(framework::Variable *, score_inputs[i]);
-        auto &roi_lod = roi_var->Get<LoDTensor>().lod();
-        auto &score_lod = score_var->Get<LoDTensor>().lod();
+        auto &roi_lod = roi_var->Get<phi::DenseTensor>().lod();
+        auto &score_lod = score_var->Get<phi::DenseTensor>().lod();
         PADDLE_ENFORCE_EQ(
             roi_lod,
             score_lod,
@@ -101,11 +100,13 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("MultiLevelRois",
-             "(LoDTensor) Multiple roi LoDTensors from each level in shape "
+             "(phi::DenseTensor) Multiple roi phi::DenseTensors from each "
+             "level in shape "
              "(N, 4), N is the number of RoIs")
         .AsDuplicable();
     AddInput("MultiLevelScores",
-             "(LoDTensor) Multiple score LoDTensors from each level in shape"
+             "(phi::DenseTensor) Multiple score phi::DenseTensors from each "
+             "level in shape"
              " (N, 1), N is the number of RoIs.")
         .AsDuplicable();
     AddInput(
@@ -115,7 +116,8 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
         "images.")
         .AsDuplicable()
         .AsDispensable();
-    AddOutput("FpnRois", "(LoDTensor) All selected RoIs with highest scores");
+    AddOutput("FpnRois",
+              "(phi::DenseTensor) All selected RoIs with highest scores");
     AddOutput("RoisNum", "(Tensor), Number of RoIs in each images.")
         .AsDispensable();
     AddAttr<int>("post_nms_topN",
diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
index 18e52957d1a..b517f2ec1fd 100644
--- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
+++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
@@ -34,7 +34,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 static constexpr int kNumCUDAThreads = 64;
 static constexpr int kNumMaxinumNumBlocks = 4096;
@@ -58,9 +57,9 @@ template <typename DeviceContext, typename T>
 class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    const auto roi_ins = ctx.MultiInput<LoDTensor>("MultiLevelRois");
-    const auto score_ins = ctx.MultiInput<LoDTensor>("MultiLevelScores");
-    auto fpn_rois = ctx.Output<LoDTensor>("FpnRois");
+    const auto roi_ins = ctx.MultiInput<phi::DenseTensor>("MultiLevelRois");
+    const auto score_ins = ctx.MultiInput<phi::DenseTensor>("MultiLevelScores");
+    auto fpn_rois = ctx.Output<phi::DenseTensor>("FpnRois");
     auto& dev_ctx = ctx.template device_context<DeviceContext>();
 
     const int post_nms_topN = ctx.Attr<int>("post_nms_topN");
diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h
index f055de53d5c..1495b880d3e 100644
--- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h
+++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h
@@ -91,7 +91,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel<T> {
             "is %d",
             multi_layer_rois.size(),
             multi_layer_scores.size()));
-    // Check if the lod information of two LoDTensor is same
+    // Check if the lod information of two phi::DenseTensor is same
     const int num_fpn_level = multi_layer_rois.size();
     std::vector<int> integral_of_all_rois(num_fpn_level + 1, 0);
     for (int i = 0; i < num_fpn_level; ++i) {
diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
index 91aeaf3df2f..20b8846bc4c 100644
--- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
+++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
@@ -37,12 +37,14 @@ class DistributeFpnProposalsOp : public framework::OperatorWithKernel {
 class DistributeFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("FpnRois", "(LoDTensor) The RoIs at all levels in shape (-1, 4)");
+    AddInput("FpnRois",
+             "(phi::DenseTensor) The RoIs at all levels in shape (-1, 4)");
     AddInput("RoisNum",
              "(Tensor) The number of RoIs in shape (B),"
              "B is the number of images")
         .AsDispensable();
-    AddOutput("MultiFpnRois", "(LoDTensor) Output with distribute operator")
+    AddOutput("MultiFpnRois",
+              "(phi::DenseTensor) Output with distribute operator")
         .AsDuplicable();
     AddOutput("RestoreIndex",
               "(Tensor) An array of positive number which is "
diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc
index 9f6d7cc43ab..f14768168a4 100644
--- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc
+++ b/paddle/fluid/operators/detection/generate_mask_labels_op.cc
@@ -26,11 +26,12 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 const int kBoxDim = 4;
 
 template <typename T>
-void AppendMask(LoDTensor* out, int64_t offset, phi::DenseTensor* to_add) {
+void AppendMask(phi::DenseTensor* out,
+                int64_t offset,
+                phi::DenseTensor* to_add) {
   auto* out_data = out->data<T>();
   auto* to_add_data = to_add->data<T>();
   memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
@@ -331,16 +332,16 @@ template <typename T>
 class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* im_info = ctx.Input<LoDTensor>("ImInfo");
-    auto* gt_classes = ctx.Input<LoDTensor>("GtClasses");
-    auto* is_crowd = ctx.Input<LoDTensor>("IsCrowd");
-    auto* gt_segms = ctx.Input<LoDTensor>("GtSegms");
-    auto* rois = ctx.Input<LoDTensor>("Rois");
-    auto* label_int32 = ctx.Input<LoDTensor>("LabelsInt32");
+    auto* im_info = ctx.Input<phi::DenseTensor>("ImInfo");
+    auto* gt_classes = ctx.Input<phi::DenseTensor>("GtClasses");
+    auto* is_crowd = ctx.Input<phi::DenseTensor>("IsCrowd");
+    auto* gt_segms = ctx.Input<phi::DenseTensor>("GtSegms");
+    auto* rois = ctx.Input<phi::DenseTensor>("Rois");
+    auto* label_int32 = ctx.Input<phi::DenseTensor>("LabelsInt32");
 
-    auto* mask_rois = ctx.Output<LoDTensor>("MaskRois");
-    auto* roi_has_mask_int32 = ctx.Output<LoDTensor>("RoiHasMaskInt32");
-    auto* mask_int32 = ctx.Output<LoDTensor>("MaskInt32");
+    auto* mask_rois = ctx.Output<phi::DenseTensor>("MaskRois");
+    auto* roi_has_mask_int32 = ctx.Output<phi::DenseTensor>("RoiHasMaskInt32");
+    auto* mask_int32 = ctx.Output<phi::DenseTensor>("MaskInt32");
 
     int num_classes = ctx.Attr<int>("num_classes");
     int resolution = ctx.Attr<int>("resolution");
@@ -463,17 +464,20 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
              "B is the number of input images, "
              "each element consists of im_height, im_width, im_scale.");
     AddInput("GtClasses",
-             "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
+             "(phi::DenseTensor), This input is a 2D phi::DenseTensor with "
+             "shape [M, 1]. "
              "M is the number of groundtruth, "
              "each element is a class label of groundtruth.");
     AddInput(
         "IsCrowd",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[M, 1]. "
         "M is the number of groundtruth, "
         "each element is a flag indicates whether a groundtruth is crowd.");
     AddInput(
         "GtSegms",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [S, 2], it's LoD "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[S, 2], it's LoD "
         "level is 3. The LoD[0] represents the gt objects number of each "
         "instance. LoD[1] represents the segmentation counts of each objects. "
         "LoD[2] represents the polygons number of each segmentation. S the "
@@ -481,24 +485,29 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
         "coordinate points.");
     AddInput(
         "Rois",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [R, 4]. "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[R, 4]. "
         "R is the number of rois which is the output of "
         "generate_proposal_labels, "
         "each element is a bounding box with (xmin, ymin, xmax, ymax) format.");
     AddInput("LabelsInt32",
-             "(LoDTensor), This intput is a 2D LoDTensor with shape [R, 1], "
+             "(phi::DenseTensor), This intput is a 2D phi::DenseTensor with "
+             "shape [R, 1], "
              "each element represents a class label of a roi");
     AddOutput(
         "MaskRois",
-        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
+        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
+        "[P, 4]. "
         "P is the number of mask, "
         "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
     AddOutput("RoiHasMaskInt32",
-              "(LoDTensor), This output is a 2D LoDTensor with shape [P, 1], "
+              "(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
+              "shape [P, 1], "
               "each element represents the output mask rois index with regard "
               "to input rois");
     AddOutput("MaskInt32",
-              "(LoDTensor), This output is a 4D LoDTensor with shape [P, Q], "
+              "(phi::DenseTensor), This output is a 4D phi::DenseTensor with "
+              "shape [P, Q], "
               "Q equal to num_classes * resolution * resolution");
 
     AddAttr<int>("num_classes", "Class number.");
diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
index d3ae86e234c..1071641b6bc 100644
--- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
@@ -26,11 +26,12 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 const int kBoxDim = 4;
 
 template <typename T>
-void AppendRois(LoDTensor* out, int64_t offset, phi::DenseTensor* to_add) {
+void AppendRois(phi::DenseTensor* out,
+                int64_t offset,
+                phi::DenseTensor* to_add) {
   auto* out_data = out->data<T>();
   auto* to_add_data = to_add->data<T>();
   memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
@@ -513,19 +514,21 @@ template <typename T>
 class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* rpn_rois = context.Input<LoDTensor>("RpnRois");
-    auto* gt_classes = context.Input<LoDTensor>("GtClasses");
-    auto* is_crowd = context.Input<LoDTensor>("IsCrowd");
-    auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
-    auto* im_info = context.Input<LoDTensor>("ImInfo");
-
-    auto* rois = context.Output<LoDTensor>("Rois");
-    auto* labels_int32 = context.Output<LoDTensor>("LabelsInt32");
-    auto* bbox_targets = context.Output<LoDTensor>("BboxTargets");
-    auto* bbox_inside_weights = context.Output<LoDTensor>("BboxInsideWeights");
+    auto* rpn_rois = context.Input<phi::DenseTensor>("RpnRois");
+    auto* gt_classes = context.Input<phi::DenseTensor>("GtClasses");
+    auto* is_crowd = context.Input<phi::DenseTensor>("IsCrowd");
+    auto* gt_boxes = context.Input<phi::DenseTensor>("GtBoxes");
+    auto* im_info = context.Input<phi::DenseTensor>("ImInfo");
+
+    auto* rois = context.Output<phi::DenseTensor>("Rois");
+    auto* labels_int32 = context.Output<phi::DenseTensor>("LabelsInt32");
+    auto* bbox_targets = context.Output<phi::DenseTensor>("BboxTargets");
+    auto* bbox_inside_weights =
+        context.Output<phi::DenseTensor>("BboxInsideWeights");
     auto* bbox_outside_weights =
-        context.Output<LoDTensor>("BboxOutsideWeights");
-    auto* max_overlap_with_gt = context.Output<LoDTensor>("MaxOverlapWithGT");
+        context.Output<phi::DenseTensor>("BboxOutsideWeights");
+    auto* max_overlap_with_gt =
+        context.Output<phi::DenseTensor>("MaxOverlapWithGT");
 
     int batch_size_per_im = context.Attr<int>("batch_size_per_im");
     float fg_fraction = context.Attr<float>("fg_fraction");
@@ -685,21 +688,25 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
   void Make() override {
     AddInput(
         "RpnRois",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [N, 4]. "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[N, 4]. "
         "N is the number of the GenerateProposalOp's output, "
         "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
     AddInput("GtClasses",
-             "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
+             "(phi::DenseTensor), This input is a 2D phi::DenseTensor with "
+             "shape [M, 1]. "
              "M is the number of groundtruth, "
              "each element is a class label of groundtruth.");
     AddInput(
         "IsCrowd",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[M, 1]. "
         "M is the number of groundtruth, "
         "each element is a flag indicates whether a groundtruth is crowd.");
     AddInput(
         "GtBoxes",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [M, 4]. "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[M, 4]. "
         "M is the number of groundtruth, "
         "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
     AddInput("ImInfo",
@@ -707,7 +714,8 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
              "B is the number of input images, "
              "each element consists of im_height, im_width, im_scale.");
     AddInput("MaxOverlap",
-             "(LoDTensor), This input is a 1D LoDTensor with shape [N]."
+             "(phi::DenseTensor), This input is a 1D phi::DenseTensor with "
+             "shape [N]."
              "N is the number of Input(RpnRois), "
              "each element is the maximum overlap between "
              "the proposal RoI and ground-truth.")
@@ -715,28 +723,34 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
 
     AddOutput(
         "Rois",
-        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
+        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
+        "[P, 4]. "
         "P usuall equal to  batch_size_per_im * batch_size, "
         "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
     AddOutput("LabelsInt32",
-              "(LoDTensor), This output is a 2D LoDTensor with shape [P, 1], "
+              "(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
+              "shape [P, 1], "
               "each element represents a class label of a roi");
     AddOutput("BboxTargets",
-              "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
+              "(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
+              "shape [P, 4 * "
               "class_nums], "
               "each element represents a box label of a roi");
     AddOutput(
         "BboxInsideWeights",
-        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
+        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
+        "[P, 4 * "
         "class_nums], "
         "each element indicates whether a box should contribute to loss.");
     AddOutput(
         "BboxOutsideWeights",
-        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
+        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
+        "[P, 4 * "
         "class_nums], "
         "each element indicates whether a box should contribute to loss.");
     AddOutput("MaxOverlapWithGT",
-              "(LoDTensor), This output is a 1D LoDTensor with shape [P], "
+              "(phi::DenseTensor), This output is a 1D phi::DenseTensor with "
+              "shape [P], "
               "each element indicates the maxoverlap "
               "between output RoIs and ground-truth. "
               "The output RoIs may include ground-truth "
diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc
index ccb7ca690cf..6491c8b8fce 100644
--- a/paddle/fluid/operators/detection/generate_proposals_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposals_op.cc
@@ -28,7 +28,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 class GenerateProposalsOp : public framework::OperatorWithKernel {
  public:
@@ -90,8 +89,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
                         "Variances",
                         "GenerateProposals");
 
-    auto *rpn_rois = context.Output<LoDTensor>("RpnRois");
-    auto *rpn_roi_probs = context.Output<LoDTensor>("RpnRoiProbs");
+    auto *rpn_rois = context.Output<phi::DenseTensor>("RpnRois");
+    auto *rpn_roi_probs = context.Output<phi::DenseTensor>("RpnRoiProbs");
 
     int pre_nms_top_n = context.Attr<int>("pre_nms_topN");
     int post_nms_top_n = context.Attr<int>("post_nms_topN");
@@ -288,9 +287,10 @@ class GenerateProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
              "(Tensor) Bounding box variances with same shape as `Anchors`.");
 
     AddOutput("RpnRois",
-              "(LoDTensor), Output proposals with shape (rois_num, 4).");
-    AddOutput("RpnRoiProbs",
-              "(LoDTensor) Scores of proposals with shape (rois_num, 1).");
+              "(phi::DenseTensor), Output proposals with shape (rois_num, 4).");
+    AddOutput(
+        "RpnRoiProbs",
+        "(phi::DenseTensor) Scores of proposals with shape (rois_num, 1).");
     AddOutput("RpnRoisNum", "(Tensor), The number of Rpn RoIs in each image")
         .AsDispensable();
     AddAttr<int>("pre_nms_topN",
diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu
index 665480f6d73..0890ff49333 100644
--- a/paddle/fluid/operators/detection/generate_proposals_op.cu
+++ b/paddle/fluid/operators/detection/generate_proposals_op.cu
@@ -29,7 +29,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 namespace {
 template <typename T>
@@ -144,8 +143,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
                         "Variances",
                         "GenerateProposals");
 
-    auto *rpn_rois = context.Output<LoDTensor>("RpnRois");
-    auto *rpn_roi_probs = context.Output<LoDTensor>("RpnRoiProbs");
+    auto *rpn_rois = context.Output<phi::DenseTensor>("RpnRois");
+    auto *rpn_roi_probs = context.Output<phi::DenseTensor>("RpnRoiProbs");
 
     int pre_nms_top_n = context.Attr<int>("pre_nms_topN");
     int post_nms_top_n = context.Attr<int>("post_nms_topN");
diff --git a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc
index a772efd58dd..08c7a649c1e 100644
--- a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc
@@ -30,7 +30,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 class GenerateProposalsV2Op : public framework::OperatorWithKernel {
  public:
@@ -65,9 +64,10 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
              "(Tensor) Bounding box variances with same shape as `Anchors`.");
 
     AddOutput("RpnRois",
-              "(LoDTensor), Output proposals with shape (rois_num, 4).");
-    AddOutput("RpnRoiProbs",
-              "(LoDTensor) Scores of proposals with shape (rois_num, 1).");
+              "(phi::DenseTensor), Output proposals with shape (rois_num, 4).");
+    AddOutput(
+        "RpnRoiProbs",
+        "(phi::DenseTensor) Scores of proposals with shape (rois_num, 1).");
     AddOutput("RpnRoisNum", "(Tensor), The number of Rpn RoIs in each image")
         .AsDispensable();
     AddAttr<int>("pre_nms_topN",
diff --git a/paddle/fluid/operators/detection/iou_similarity_op.cc b/paddle/fluid/operators/detection/iou_similarity_op.cc
index 5f46e9ab51b..406114c588a 100644
--- a/paddle/fluid/operators/detection/iou_similarity_op.cc
+++ b/paddle/fluid/operators/detection/iou_similarity_op.cc
@@ -59,17 +59,18 @@ class IOUSimilarityOp : public framework::OperatorWithKernel {
 class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("X",
-             "(LoDTensor, default LoDTensor<float>) "
-             "Box list X is a 2-D LoDTensor with shape [N, 4] holds N boxes, "
-             "each box is represented as [xmin, ymin, xmax, ymax], "
-             "the shape of X is [N, 4]. [xmin, ymin] is the left top "
-             "coordinate of the box if the input is image feature map, they "
-             "are close to the origin of the coordinate system. "
-             "[xmax, ymax] is the right bottom coordinate of the box. "
-             "This tensor can contain LoD information to represent a batch "
-             "of inputs. One instance of this batch can contain different "
-             "numbers of entities.");
+    AddInput(
+        "X",
+        "(phi::DenseTensor, default phi::DenseTensor<float>) "
+        "Box list X is a 2-D phi::DenseTensor with shape [N, 4] holds N boxes, "
+        "each box is represented as [xmin, ymin, xmax, ymax], "
+        "the shape of X is [N, 4]. [xmin, ymin] is the left top "
+        "coordinate of the box if the input is image feature map, they "
+        "are close to the origin of the coordinate system. "
+        "[xmax, ymax] is the right bottom coordinate of the box. "
+        "This tensor can contain LoD information to represent a batch "
+        "of inputs. One instance of this batch can contain different "
+        "numbers of entities.");
     AddInput("Y",
              "(Tensor, default Tensor<float>) "
              "Box list Y holds M boxes, each box is represented as "
@@ -82,7 +83,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
                   "whether treat the priorbox as a normalized box")
         .SetDefault(true);
     AddOutput("Out",
-              "(LoDTensor, the lod is same as input X) The output of "
+              "(phi::DenseTensor, the lod is same as input X) The output of "
               "iou_similarity op, a tensor with shape [N, M] "
               "representing pairwise iou scores.");
 
@@ -90,7 +91,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
 **IOU Similarity Operator**
 
 Computes intersection-over-union (IOU) between two box lists.
-Box list 'X' should be a LoDTensor and 'Y' is a common Tensor,
+Box list 'X' should be a phi::DenseTensor and 'Y' is a common Tensor,
 boxes in 'Y' are shared by all instance of the batched inputs of X.
 Given two boxes A and B, the calculation of IOU is as follows:
 
diff --git a/paddle/fluid/operators/detection/locality_aware_nms_op.cc b/paddle/fluid/operators/detection/locality_aware_nms_op.cc
index 5e77ee33a07..c2b8833bbd9 100644
--- a/paddle/fluid/operators/detection/locality_aware_nms_op.cc
+++ b/paddle/fluid/operators/detection/locality_aware_nms_op.cc
@@ -20,7 +20,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 class LocalityAwareNMSOp : public framework::OperatorWithKernel {
  public:
@@ -352,15 +351,15 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
   }
 
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* boxes_input = ctx.Input<LoDTensor>("BBoxes");
-    auto* scores_input = ctx.Input<LoDTensor>("Scores");
-    auto* outs = ctx.Output<LoDTensor>("Out");
+    auto* boxes_input = ctx.Input<phi::DenseTensor>("BBoxes");
+    auto* scores_input = ctx.Input<phi::DenseTensor>("Scores");
+    auto* outs = ctx.Output<phi::DenseTensor>("Out");
     auto& score_dims = scores_input->dims();
     auto score_size = score_dims.size();
     auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
 
-    LoDTensor scores;
-    LoDTensor boxes;
+    phi::DenseTensor scores;
+    phi::DenseTensor boxes;
     paddle::framework::TensorCopySync(
         *scores_input, platform::CPUPlace(), &scores);
     paddle::framework::TensorCopySync(
@@ -476,10 +475,12 @@ class LocalityAwareNMSOpMaker : public framework::OpProtoAndCheckerMaker {
                   "Whether detections are normalized.")
         .SetDefault(true);
     AddOutput("Out",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
+              "represents the "
               "detections. Each row has 6 values: "
               "[label, confidence, xmin, ymin, xmax, ymax] or "
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] "
+              "represents the "
               "detections. Each row has 10 values: "
               "[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
               "total number of detections in this mini-batch."
@@ -501,7 +502,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
 per image if keep_top_k is larger than -1.
 This operator support multi-class and batched inputs. It applying NMS
 independently for each class. The outputs is a 2-D LoDTenosr, for each
-image, the offsets in first dimension of LoDTensor are called LoD, the number
+image, the offsets in first dimension of phi::DenseTensor are called LoD, the number
 of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
 means there is no detected bbox for this image.
 
diff --git a/paddle/fluid/operators/detection/matrix_nms_op.cc b/paddle/fluid/operators/detection/matrix_nms_op.cc
index c122e12b299..21e52a39c37 100644
--- a/paddle/fluid/operators/detection/matrix_nms_op.cc
+++ b/paddle/fluid/operators/detection/matrix_nms_op.cc
@@ -21,7 +21,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 class MatrixNMSOp : public framework::OperatorWithKernel {
  public:
@@ -89,14 +88,16 @@ class MatrixNMSOpMaker : public framework::OpProtoAndCheckerMaker {
                    "when 'use_gaussian' is enabled.")
         .SetDefault(2.);
     AddOutput("Out",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
+              "represents the "
               "detections. Each row has 6 values: "
               "[label, confidence, xmin, ymin, xmax, ymax]. "
               "the offsets in first dimension are called LoD, the number of "
               "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
               "no detected bbox.");
     AddOutput("Index",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 1] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 1] "
+              "represents the "
               "index of selected bbox. The index is the absolute index cross "
               "batches.");
     AddOutput("RoisNum", "(Tensor), Number of RoIs in each images.")
@@ -113,7 +114,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
 per image if keep_top_k is larger than -1.
 This operator support multi-class and batched inputs. It applying NMS
 independently for each class. The outputs is a 2-D LoDTenosr, for each
-image, the offsets in first dimension of LoDTensor are called LoD, the number
+image, the offsets in first dimension of phi::DenseTensor are called LoD, the number
 of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
 means there is no detected bbox for this image. Now this operator has one more
 output, which is RoisNum. The size of RoisNum is N, RoisNum[i] means the number of
diff --git a/paddle/fluid/operators/detection/mine_hard_examples_op.cc b/paddle/fluid/operators/detection/mine_hard_examples_op.cc
index 408e1201e64..28099630b83 100644
--- a/paddle/fluid/operators/detection/mine_hard_examples_op.cc
+++ b/paddle/fluid/operators/detection/mine_hard_examples_op.cc
@@ -363,15 +363,15 @@ class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault("max_negative")
         .InEnum({"hard_example", "max_negative"});
 
-    AddOutput(
-        "NegIndices",
-        "(LoDTensor<int>) The output of negative example indices. a LoDTensor "
-        "with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
-        "and each element is the prior box index. "
-        "For example, the batch size is 2, the lod is [[0, 1, 2]], "
-        "the sample 0's box 1(MatchIndices[0][1]) is selected, "
-        "and sample 1's box 0 is selected. The output NegIndices is "
-        "[[1], [0]].");
+    AddOutput("NegIndices",
+              "(phi::DenseTensor<int>) The output of negative example indices. "
+              "a phi::DenseTensor "
+              "with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
+              "and each element is the prior box index. "
+              "For example, the batch size is 2, the lod is [[0, 1, 2]], "
+              "the sample 0's box 1(MatchIndices[0][1]) is selected, "
+              "and sample 1's box 0 is selected. The output NegIndices is "
+              "[[1], [0]].");
 
     AddOutput("UpdatedMatchIndices",
               "(Tensor<int>) The output of updated MatchIndices, a tensor with "
diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc
index 2e24fa3fabb..5af93551d78 100644
--- a/paddle/fluid/operators/detection/multiclass_nms_op.cc
+++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc
@@ -22,7 +22,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 inline std::vector<size_t> GetNmsLodFromRoisNum(
     const phi::DenseTensor* rois_num) {
@@ -357,11 +356,11 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
   }
 
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* boxes = ctx.Input<LoDTensor>("BBoxes");
-    auto* scores = ctx.Input<LoDTensor>("Scores");
-    auto* outs = ctx.Output<LoDTensor>("Out");
+    auto* boxes = ctx.Input<phi::DenseTensor>("BBoxes");
+    auto* scores = ctx.Input<phi::DenseTensor>("Scores");
+    auto* outs = ctx.Output<phi::DenseTensor>("Out");
     bool return_index = ctx.HasOutput("Index") ? true : false;
-    auto index = ctx.Output<LoDTensor>("Index");
+    auto index = ctx.Output<phi::DenseTensor>("Index");
     bool has_roisnum = ctx.HasInput("RoisNum") ? true : false;
     auto rois_num = ctx.Input<phi::DenseTensor>("RoisNum");
     auto score_dims = scores->dims();
@@ -496,7 +495,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
              "predicted locations of M bounding bboxes, N is the batch size. "
              "Each bounding box has four coordinate values and the layout is "
              "[xmin, ymin, xmax, ymax], when box size equals to 4."
-             "2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]"
+             "2. (phi::DenseTensor) A 3-D Tensor with shape [M, C, 4]"
              "M is the number of bounding boxes, C is the class number");
     AddInput("Scores",
              "Two types of scores are supported:"
@@ -505,7 +504,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
              "class number, M is number of bounding boxes. For each category "
              "there are total M scores which corresponding M bounding boxes. "
              " Please note, M is equal to the 2nd dimension of BBoxes. "
-             "2. (LoDTensor) A 2-D LoDTensor with shape [M, C]. "
+             "2. (phi::DenseTensor) A 2-D phi::DenseTensor with shape [M, C]. "
              "M is the number of bbox, C is the class number. In this case, "
              "Input BBoxes should be the second case with shape [M, C, 4].");
     AddAttr<int>(
@@ -540,10 +539,12 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
                   "Whether detections are normalized.")
         .SetDefault(true);
     AddOutput("Out",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
+              "represents the "
               "detections. Each row has 6 values: "
               "[label, confidence, xmin, ymin, xmax, ymax] or "
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] "
+              "represents the "
               "detections. Each row has 10 values: "
               "[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
               "total number of detections in this mini-batch."
@@ -564,7 +565,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
 per image if keep_top_k is larger than -1.
 This operator support multi-class and batched inputs. It applying NMS
 independently for each class. The outputs is a 2-D LoDTenosr, for each
-image, the offsets in first dimension of LoDTensor are called LoD, the number
+image, the offsets in first dimension of phi::DenseTensor are called LoD, the number
 of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
 means there is no detected bbox for this image.
 )DOC");
@@ -600,7 +601,8 @@ class MultiClassNMS2OpMaker : public MultiClassNMSOpMaker {
   void Make() override {
     MultiClassNMSOpMaker::Make();
     AddOutput("Index",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 1] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 1] "
+              "represents the "
               "index of selected bbox. The index is the absolute index cross "
               "batches.")
         .AsIntermediate();
diff --git a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc
index 4304e6d4d78..a38765e28d7 100644
--- a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc
+++ b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc
@@ -19,7 +19,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 class RetinanetDetectionOutputOp : public framework::OperatorWithKernel {
  public:
@@ -490,8 +489,8 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
     auto boxes = ctx.MultiInput<phi::DenseTensor>("BBoxes");
     auto scores = ctx.MultiInput<phi::DenseTensor>("Scores");
     auto anchors = ctx.MultiInput<phi::DenseTensor>("Anchors");
-    auto* im_info = ctx.Input<LoDTensor>("ImInfo");
-    auto* outs = ctx.Output<LoDTensor>("Out");
+    auto* im_info = ctx.Input<phi::DenseTensor>("ImInfo");
+    auto* outs = ctx.Output<phi::DenseTensor>("Out");
 
     std::vector<Tensor> boxes_list(boxes.size());
     std::vector<Tensor> scores_list(scores.size());
@@ -586,7 +585,8 @@ class RetinanetDetectionOutputOpMaker
              "[xmin, ymin, xmax, ymax].")
         .AsDuplicable();
     AddInput("ImInfo",
-             "(LoDTensor) A 2-D LoDTensor with shape [N, 3] represents the "
+             "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [N, 3] "
+             "represents the "
              "image information. N is the batch size, each image information "
              "includes height, width and scale.");
     AddAttr<float>("score_threshold",
@@ -609,7 +609,8 @@ class RetinanetDetectionOutputOpMaker
         "Number of total bounding boxes to be kept per image after NMS "
         "step.");
     AddOutput("Out",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
+              "represents the "
               "detections. Each row has 6 values: "
               "[label, confidence, xmin, ymin, xmax, ymax]"
               "No is the total number of detections in this mini-batch."
@@ -650,7 +651,7 @@ After NMS step, at most keep_top_k number of total bounding boxes are to be kept
 per image if keep_top_k is larger than -1.
 This operator support multi-class and batched inputs. It applying NMS
 independently for each class. The outputs is a 2-D LoDTenosr, for each
-image, the offsets in first dimension of LoDTensor are called LoD, the number
+image, the offsets in first dimension of phi::DenseTensor are called LoD, the number
 of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
 means there is no detected bounding box for this image. If there is no detected boxes
 for all images, all the elements in LoD are set to 0, and the output tensor is
diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc
index 2ba1b75f0ca..ff4c1159119 100644
--- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc
+++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc
@@ -23,7 +23,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 bool GT_E(T a, T b) {
@@ -504,7 +503,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
         rois_dims.size(),
         2,
         platform::errors::InvalidArgument(
-            "ROIs should be a 2-D LoDTensor of shape (num_rois, 8)"
+            "ROIs should be a 2-D phi::DenseTensor of shape (num_rois, 8)"
             "given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]. But received "
             "rois dims is %d",
             rois_dims.size()));
@@ -512,7 +511,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
         rois_dims[1],
         8,
         platform::errors::InvalidArgument(
-            "ROIs should be a 2-D LoDTensor of shape (num_rois, 8)"
+            "ROIs should be a 2-D phi::DenseTensor of shape (num_rois, 8)"
             "given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]. But received %d",
             rois_dims[1]));
 
@@ -608,9 +607,9 @@ class ROIPerspectiveTransformOpMaker
              "H is the height of the feature, and "
              "W is the width of the feature.");
     AddInput("ROIs",
-             "(LoDTensor), "
+             "(phi::DenseTensor), "
              "ROIs (Regions of Interest) to be transformed. "
-             "should be a 2-D LoDTensor of shape (num_rois, 8)"
+             "should be a 2-D phi::DenseTensor of shape (num_rois, 8)"
              "given as [[x1, y1, x2, y2, x3, y3, x4, y4], ...]."
              "(x1, y1) is the top left coordinates, and "
              "(x2, y2) is the top right coordinates, and"
diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc
index 05666e4f7ce..f73ddf9a09e 100644
--- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc
+++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc
@@ -22,7 +22,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 template <typename T,
           int MajorType = Eigen::RowMajor,
           typename IndexType = Eigen::DenseIndex>
@@ -105,7 +104,9 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
 };
 
 template <typename T>
-void AppendRpns(LoDTensor* out, int64_t offset, phi::DenseTensor* to_add) {
+void AppendRpns(phi::DenseTensor* out,
+                int64_t offset,
+                phi::DenseTensor* to_add) {
   auto* out_data = out->data<T>();
   auto* to_add_data = to_add->data<T>();
   memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
@@ -395,15 +396,16 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     auto* anchor = context.Input<phi::DenseTensor>("Anchor");  // (H*W*A) * 4
-    auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
-    auto* is_crowd = context.Input<LoDTensor>("IsCrowd");
-    auto* im_info = context.Input<LoDTensor>("ImInfo");
+    auto* gt_boxes = context.Input<phi::DenseTensor>("GtBoxes");
+    auto* is_crowd = context.Input<phi::DenseTensor>("IsCrowd");
+    auto* im_info = context.Input<phi::DenseTensor>("ImInfo");
 
-    auto* loc_index = context.Output<LoDTensor>("LocationIndex");
-    auto* score_index = context.Output<LoDTensor>("ScoreIndex");
-    auto* tgt_bbox = context.Output<LoDTensor>("TargetBBox");
-    auto* tgt_lbl = context.Output<LoDTensor>("TargetLabel");
-    auto* bbox_inside_weight = context.Output<LoDTensor>("BBoxInsideWeight");
+    auto* loc_index = context.Output<phi::DenseTensor>("LocationIndex");
+    auto* score_index = context.Output<phi::DenseTensor>("ScoreIndex");
+    auto* tgt_bbox = context.Output<phi::DenseTensor>("TargetBBox");
+    auto* tgt_lbl = context.Output<phi::DenseTensor>("TargetLabel");
+    auto* bbox_inside_weight =
+        context.Output<phi::DenseTensor>("BBoxInsideWeight");
 
     PADDLE_ENFORCE_EQ(gt_boxes->lod().size(),
                       1UL,
@@ -598,11 +600,11 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("Anchor",
              "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4].");
     AddInput("GtBoxes",
-             "(LoDTensor) input ground-truth bbox with shape [K, 4].");
+             "(phi::DenseTensor) input ground-truth bbox with shape [K, 4].");
     AddInput("IsCrowd",
-             "(LoDTensor) input which indicates ground-truth is crowd.");
+             "(phi::DenseTensor) input which indicates ground-truth is crowd.");
     AddInput("ImInfo",
-             "(LoDTensor) input image information with shape [N, 3]. "
+             "(phi::DenseTensor) input image information with shape [N, 3]. "
              "N is the batch size, each image information includes height, "
              "width and scale.");
     AddAttr<int>("rpn_batch_size_per_im",
@@ -685,13 +687,13 @@ class RetinanetTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("Anchor",
              "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4].");
     AddInput("GtBoxes",
-             "(LoDTensor) input ground-truth bbox with shape [K, 4].");
+             "(phi::DenseTensor) input ground-truth bbox with shape [K, 4].");
     AddInput("GtLabels",
-             "(LoDTensor) input ground-truth label with shape [K, 1].");
+             "(phi::DenseTensor) input ground-truth label with shape [K, 1].");
     AddInput("IsCrowd",
-             "(LoDTensor) input which indicates ground-truth is crowd.");
+             "(phi::DenseTensor) input which indicates ground-truth is crowd.");
     AddInput("ImInfo",
-             "(LoDTensor) input image information with shape [N, 3]. "
+             "(phi::DenseTensor) input image information with shape [N, 3]. "
              "N is the batch size, each image information includes height, "
              "width and scale.");
     AddAttr<float>(
@@ -994,17 +996,18 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
     auto* anchor = context.Input<phi::DenseTensor>("Anchor");  // (H*W*A) * 4
-    auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
-    auto* gt_labels = context.Input<LoDTensor>("GtLabels");
-    auto* is_crowd = context.Input<LoDTensor>("IsCrowd");
-    auto* im_info = context.Input<LoDTensor>("ImInfo");
-
-    auto* loc_index = context.Output<LoDTensor>("LocationIndex");
-    auto* score_index = context.Output<LoDTensor>("ScoreIndex");
-    auto* tgt_bbox = context.Output<LoDTensor>("TargetBBox");
-    auto* tgt_lbl = context.Output<LoDTensor>("TargetLabel");
-    auto* bbox_inside_weight = context.Output<LoDTensor>("BBoxInsideWeight");
-    auto* fg_num = context.Output<LoDTensor>("ForegroundNumber");
+    auto* gt_boxes = context.Input<phi::DenseTensor>("GtBoxes");
+    auto* gt_labels = context.Input<phi::DenseTensor>("GtLabels");
+    auto* is_crowd = context.Input<phi::DenseTensor>("IsCrowd");
+    auto* im_info = context.Input<phi::DenseTensor>("ImInfo");
+
+    auto* loc_index = context.Output<phi::DenseTensor>("LocationIndex");
+    auto* score_index = context.Output<phi::DenseTensor>("ScoreIndex");
+    auto* tgt_bbox = context.Output<phi::DenseTensor>("TargetBBox");
+    auto* tgt_lbl = context.Output<phi::DenseTensor>("TargetLabel");
+    auto* bbox_inside_weight =
+        context.Output<phi::DenseTensor>("BBoxInsideWeight");
+    auto* fg_num = context.Output<phi::DenseTensor>("ForegroundNumber");
 
     PADDLE_ENFORCE_EQ(
         gt_boxes->lod().size(),
diff --git a/paddle/fluid/operators/detection/target_assign_op.cc b/paddle/fluid/operators/detection/target_assign_op.cc
index 5b8e6739bfb..c3d79b05050 100644
--- a/paddle/fluid/operators/detection/target_assign_op.cc
+++ b/paddle/fluid/operators/detection/target_assign_op.cc
@@ -89,7 +89,8 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(LoDTensor), This input is a 3D LoDTensor with shape [M, P, K]. "
+             "(phi::DenseTensor), This input is a 3D phi::DenseTensor with "
+             "shape [M, P, K]. "
              "Some elements in X will be assigned to Out based on the "
              "MatchIndices and NegIndices.");
     AddInput("MatchIndices",
@@ -97,7 +98,8 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
              "with shape [N, P], If MatchIndices[i][j] is -1, the j-th entity "
              "of column is not matched to any entity of row in i-th instance.");
     AddInput("NegIndices",
-             "(LoDTensor, default LoDTensor<int>), The input negative example "
+             "(phi::DenseTensor, default phi::DenseTensor<int>), The input "
+             "negative example "
              "indices are an optional input with shape [Neg, 1], where Neg is "
              "the total number of negative example indices.")
         .AsDispensable();
diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cc b/paddle/fluid/operators/elementwise/elementwise_add_op.cc
index 0123df0006f..b4164846aa4 100644
--- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc
@@ -31,15 +31,17 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker {
   std::string GetEquation() const override { return "Out = X + Y"; }
 
   void AddInputX() override {
-    AddInput("X",
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
-             "should be int32, int64, float32, float64.");
+    AddInput(
+        "X",
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
   }
 
   void AddInputY() override {
-    AddInput("Y",
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
-             "should be int32, int64, float32, float64.");
+    AddInput(
+        "Y",
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
   }
 
   std::string GetOpFuntionality() const override {
diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cc b/paddle/fluid/operators/elementwise/elementwise_div_op.cc
index 506ba07c67c..f7a9b993c09 100644
--- a/paddle/fluid/operators/elementwise/elementwise_div_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_div_op.cc
@@ -29,15 +29,17 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker {
   std::string GetEquation() const override { return "Out = X / Y"; }
 
   void AddInputX() override {
-    AddInput("X",
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
-             "should be int32, int64, float32, float64.");
+    AddInput(
+        "X",
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
   }
 
   void AddInputY() override {
-    AddInput("Y",
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
-             "should be int32, int64, float32, float64.");
+    AddInput(
+        "Y",
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
   }
 
   std::string GetOpFuntionality() const override {
diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
index 6a8c986a53c..27c12c300bd 100644
--- a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
@@ -35,15 +35,17 @@ class ElementwiseFloorDivOpMaker : public ElementwiseOpMaker {
   std::string GetEquation() const override { return "Out = X // Y"; }
 
   void AddInputX() override {
-    AddInput("X",
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
-             "should be int32, int64.");
+    AddInput(
+        "X",
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64.");
   }
 
   void AddInputY() override {
-    AddInput("Y",
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
-             "should be int32, int64.");
+    AddInput(
+        "Y",
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64.");
   }
 
   std::string GetOpFuntionality() const override {
diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cc b/paddle/fluid/operators/elementwise/elementwise_mul_op.cc
index a36d979df4a..5048a40ddde 100644
--- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cc
@@ -28,15 +28,17 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker {
   std::string GetEquation() const override { return "Out = X \\\\odot Y"; }
 
   void AddInputX() override {
-    AddInput("X",
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
-             "should be int32, int64, float32, float64.");
+    AddInput(
+        "X",
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
   }
 
   void AddInputY() override {
-    AddInput("Y",
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
-             "should be int32, int64, float32, float64.");
+    AddInput(
+        "Y",
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
   }
 
   std::string GetOpFuntionality() const override {
diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h
index c1dacdcef71..6ab782e07fd 100644
--- a/paddle/fluid/operators/elementwise/elementwise_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_op.h
@@ -43,13 +43,14 @@ class ElementwiseOp : public framework::OperatorWithKernel {
     OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "ElementwiseOp");
     OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "ElementwiseOp");
 
-    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Y").front(),
-                      framework::proto::VarType::LOD_TENSOR,
-                      platform::errors::InvalidArgument(
-                          "The input var's type should be LoDTensor, but the "
-                          "received is %s [%s].",
-                          ctx->GetInputsVarType("Y").front(),
-                          ctx->Inputs("Y").front()));
+    PADDLE_ENFORCE_EQ(
+        ctx->GetInputsVarType("Y").front(),
+        framework::proto::VarType::LOD_TENSOR,
+        platform::errors::InvalidArgument(
+            "The input var's type should be phi::DenseTensor, but the "
+            "received is %s [%s].",
+            ctx->GetInputsVarType("Y").front(),
+            ctx->Inputs("Y").front()));
 
     if (ctx->GetInputsVarType("X").front() ==
         framework::proto::VarType::SELECTED_ROWS) {
diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h
index 5f2097f3330..e1c91d43667 100644
--- a/paddle/fluid/operators/elementwise/elementwise_op_function.h
+++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h
@@ -61,11 +61,11 @@ namespace operators {
 /*
  *  Pack input and output tensors into respective vectors with
  *  consideration of varible X`s class type.
- *  Input variable X is supported to be whether LoDTensor or
+ *  Input variable X is supported to be whether phi::DenseTensor or
  *  SelectedRows class type in this package function, once X
  *  was SelectedRows type, a valid pointer x_for_selectedrows
  *  is excepted to be passed in from op kernel for acquisition
- *  of the valid address of LoDTensor created ahead in the function.
+ *  of the valid address of phi::DenseTensor created ahead in the function.
  */
 template <typename OutT>
 int PackTensorsIntoVector(const framework::ExecutionContext &ctx,
@@ -112,7 +112,7 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx,
   } else {
     PADDLE_THROW(platform::errors::InvalidArgument(
         "X's type[%s] is not supported by elementwise_op. X's type should be "
-        "LoDTensor or SelectedRows.",
+        "phi::DenseTensor or SelectedRows.",
         framework::ToTypeName(x_var->Type())));
   }
   z->mutable_data<OutT>(ctx.GetPlace());
diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc
index 24f0228025f..c73192ae792 100644
--- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc
@@ -34,15 +34,17 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker {
   std::string GetEquation() const override { return "Out = X - Y"; }
 
   void AddInputX() override {
-    AddInput("X",
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
-             "should be int32, int64, float32, float64.");
+    AddInput(
+        "X",
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
   }
 
   void AddInputY() override {
-    AddInput("Y",
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
-             "should be int32, int64, float32, float64.");
+    AddInput(
+        "Y",
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
   }
 
   std::string GetOpFuntionality() const override {
diff --git a/paddle/fluid/operators/elementwise/elementwise_xpu.h b/paddle/fluid/operators/elementwise/elementwise_xpu.h
index d1d22d94780..c78c5cc6a4b 100644
--- a/paddle/fluid/operators/elementwise/elementwise_xpu.h
+++ b/paddle/fluid/operators/elementwise/elementwise_xpu.h
@@ -43,8 +43,8 @@ void XPUElementwise(const framework::ExecutionContext& ctx,
   PADDLE_ENFORCE_EQ(
       x_var->IsType<phi::DenseTensor>(),
       true,
-      platform::errors::InvalidArgument(
-          "XPU only support LoDTensor, Input(X) is not LoDTensor"));
+      platform::errors::InvalidArgument("XPU only support phi::DenseTensor, "
+                                        "Input(X) is not phi::DenseTensor"));
 
   auto x = x_var->Get<phi::DenseTensor>();
   auto* y = ctx.Input<phi::DenseTensor>("Y");
diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_activation_op.cc
index 1627f533492..faf4a1aae44 100644
--- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc
+++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc
@@ -24,8 +24,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const {
   PADDLE_ENFORCE_EQ(ctx->HasInput("X"),
                     true,
@@ -309,8 +307,8 @@ framework::OpKernelType FusedBatchNormActGradOp::GetExpectedKernelType(
   const Tensor *t = nullptr;
   if (var->IsType<Tensor>()) {
     t = &var->Get<Tensor>();
-  } else if (var->IsType<LoDTensor>()) {
-    t = &var->Get<LoDTensor>();
+  } else if (var->IsType<phi::DenseTensor>()) {
+    t = &var->Get<phi::DenseTensor>();
   }
   if (t == nullptr) {
     PADDLE_THROW(
diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
index 2671e878697..2d51a3efaf6 100644
--- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
+++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
@@ -23,8 +23,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 void FusedBatchNormAddActOp::InferShape(
     framework::InferShapeContext *ctx) const {
   OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FusedBatchNormAddActOp");
@@ -267,8 +265,8 @@ framework::OpKernelType FusedBatchNormAddActGradOp::GetExpectedKernelType(
   const Tensor *t = nullptr;
   if (var->IsType<Tensor>()) {
     t = &var->Get<Tensor>();
-  } else if (var->IsType<LoDTensor>()) {
-    t = &var->Get<LoDTensor>();
+  } else if (var->IsType<phi::DenseTensor>()) {
+    t = &var->Get<phi::DenseTensor>();
   }
   if (t == nullptr) {
     PADDLE_THROW(
diff --git a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
index af75fa6112e..9c58c690095 100644
--- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
+++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
@@ -213,23 +213,25 @@ void FusedEmbeddingFCLSTMOpMaker::Make() {
            "input. This is a tensor with shape (N x D), where N is the "
            "batch size. `H0` and `C0` can be NULL but only at the same time.")
       .AsDispensable();
-  AddOutput("Hidden",
-            "(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. "
-            "The shape is (T x D), and lod is the same with the `Input`.");
-  AddOutput("Cell",
-            "(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. "
-            "The shape is (T x D), and lod is the same with the `Input`.");
+  AddOutput(
+      "Hidden",
+      "(phi::DenseTensor) (same as LSTMOp) the hidden state of LSTM operator. "
+      "The shape is (T x D), and lod is the same with the `Input`.");
+  AddOutput(
+      "Cell",
+      "(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
+      "The shape is (T x D), and lod is the same with the `Input`.");
   AddOutput("XX",
-            "(LoDTensor) the result after X * WeightX (size is T x 4D)"
+            "(phi::DenseTensor) the result after X * WeightX (size is T x 4D)"
             " or batched_X (size is T x M), this will be automatically chosen,"
             " where T is the total time steps in this mini-batch,"
             " D is the hidden size, M is the dim size of x input.")
       .AsIntermediate();
-  AddOutput("BatchedInput", "(LoDTensor) (T x 4D).").AsIntermediate();
-  AddOutput("BatchedHidden", "(LoDTensor) (T x D).").AsIntermediate();
-  AddOutput("BatchedCell", "(LoDTensor) (T x D).").AsIntermediate();
-  AddOutput("ReorderedH0", "(LoDTensor) (N x D).").AsIntermediate();
-  AddOutput("ReorderedC0", "(LoDTensor) (N x D).").AsIntermediate();
+  AddOutput("BatchedInput", "(phi::DenseTensor) (T x 4D).").AsIntermediate();
+  AddOutput("BatchedHidden", "(phi::DenseTensor) (T x D).").AsIntermediate();
+  AddOutput("BatchedCell", "(phi::DenseTensor) (T x D).").AsIntermediate();
+  AddOutput("ReorderedH0", "(phi::DenseTensor) (N x D).").AsIntermediate();
+  AddOutput("ReorderedC0", "(phi::DenseTensor) (N x D).").AsIntermediate();
   AddAttr<bool>("use_peepholes",
                 "(bool, default: True) "
                 "whether to enable diagonal/peephole connections.")
@@ -286,15 +288,15 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
   }
 
 #define INIT_BASE_INPUT_OUTPUT                                  \
-  auto* ids = ctx.Input<LoDTensor>("Ids");                      \
+  auto* ids = ctx.Input<phi::DenseTensor>("Ids");               \
   auto* h0 = ctx.Input<phi::DenseTensor>("H0");                 \
   auto* c0 = ctx.Input<phi::DenseTensor>("C0");                 \
   auto* embeddings = ctx.Input<phi::DenseTensor>("Embeddings"); \
   auto* wh = ctx.Input<phi::DenseTensor>("WeightH");            \
   auto* bias = ctx.Input<phi::DenseTensor>("Bias");             \
-  auto* xx = ctx.Output<LoDTensor>("XX");                       \
-  auto* hidden_out = ctx.Output<LoDTensor>("Hidden");           \
-  auto* cell_out = ctx.Output<LoDTensor>("Cell");               \
+  auto* xx = ctx.Output<phi::DenseTensor>("XX");                \
+  auto* hidden_out = ctx.Output<phi::DenseTensor>("Hidden");    \
+  auto* cell_out = ctx.Output<phi::DenseTensor>("Cell");        \
   bool is_reverse = ctx.Attr<bool>("is_reverse");               \
   bool use_peepholes = ctx.Attr<bool>("use_peepholes");
 
@@ -508,9 +510,9 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
 
     auto* reordered_h0 = ctx.Output<phi::DenseTensor>("ReorderedH0");
     auto* reordered_c0 = ctx.Output<phi::DenseTensor>("ReorderedC0");
-    auto* batched_input = ctx.Output<LoDTensor>("BatchedInput");
-    auto* batched_c_out = ctx.Output<LoDTensor>("BatchedCell");
-    auto* batched_h_out = ctx.Output<LoDTensor>("BatchedHidden");
+    auto* batched_input = ctx.Output<phi::DenseTensor>("BatchedInput");
+    auto* batched_c_out = ctx.Output<phi::DenseTensor>("BatchedCell");
+    auto* batched_h_out = ctx.Output<phi::DenseTensor>("BatchedHidden");
     T* xx_data = xx->mutable_data<T>(place);
     T* batched_input_data = batched_input->mutable_data<T>(place);
     T* batched_c_out_data = batched_c_out->mutable_data<T>(place);
diff --git a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
index 7e0500b87b1..181fa06b020 100644
--- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
+++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 class FusedEmbeddingFCLSTMOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
index d05fbeac00a..bbb5ce50c90 100644
--- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
+++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
@@ -162,7 +162,7 @@ class FusedEmbeddingSeqPoolOpGradVarTypeInference
                          framework::proto::VarType::SELECTED_ROWS);
     } else {
       VLOG(3) << "fused_embedding_seq_pool_grad op "
-              << framework::GradVarName("W") << " is set to LoDTensor";
+              << framework::GradVarName("W") << " is set to phi::DenseTensor";
       ctx->SetOutputType(out_var_name, framework::proto::VarType::LOD_TENSOR);
     }
     ctx->SetOutputDataType(out_var_name, ctx->GetInputDataType("W"));
diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
index f069c064791..0e4134d4280 100644
--- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
@@ -29,7 +29,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
 using DDim = framework::DDim;
 
@@ -83,9 +82,9 @@ void prepare_csr_data(const std::vector<uint64_t> &offset,
 template <typename T>
 struct EmbeddingVSumFunctor {
   void operator()(const framework::ExecutionContext &context,
-                  const LoDTensor *table_t,
-                  const LoDTensor *ids_t,
-                  LoDTensor *output_t) {
+                  const phi::DenseTensor *table_t,
+                  const phi::DenseTensor *ids_t,
+                  phi::DenseTensor *output_t) {
     auto *table = table_t->data<T>();
     int64_t table_height = table_t->dims()[0];
     int64_t table_width = table_t->dims()[1];
@@ -141,9 +140,11 @@ template <typename T>
 class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &context) const override {
-    const LoDTensor *ids_t = context.Input<LoDTensor>("Ids");  // int tensor
-    LoDTensor *output_t = context.Output<LoDTensor>("Out");    // float tensor
-    const LoDTensor *table_var = context.Input<LoDTensor>("W");
+    const phi::DenseTensor *ids_t =
+        context.Input<phi::DenseTensor>("Ids");  // int tensor
+    phi::DenseTensor *output_t =
+        context.Output<phi::DenseTensor>("Out");  // float tensor
+    const phi::DenseTensor *table_var = context.Input<phi::DenseTensor>("W");
     const std::string &combiner_type = context.Attr<std::string>("combiner");
 
     int64_t last_dim =
@@ -228,23 +229,24 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext &context) const override {
     auto *table_var = context.InputVar("W");
     DDim table_dim;
-    if (table_var->IsType<LoDTensor>()) {
-      table_dim = context.Input<LoDTensor>("W")->dims();
+    if (table_var->IsType<phi::DenseTensor>()) {
+      table_dim = context.Input<phi::DenseTensor>("W")->dims();
     } else if (table_var->IsType<phi::SelectedRows>()) {
       auto *table_t = context.Input<phi::SelectedRows>("W");
       table_dim = table_t->value().dims();
     } else {
       PADDLE_THROW(platform::errors::PermissionDenied(
           "The parameter W of a LookupTable "
-          "must be either LoDTensor or SelectedRows."));
+          "must be either phi::DenseTensor or SelectedRows."));
     }
 
     bool is_sparse = context.Attr<bool>("is_sparse");
     // Since paddings are not trainable and fixed in forward, the gradient of
     // paddings makes no sense and we don't deal with it in backward.
     if (is_sparse) {
-      auto *ids = context.Input<LoDTensor>("Ids");
-      auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
+      auto *ids = context.Input<phi::DenseTensor>("Ids");
+      auto *d_output =
+          context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
       auto *d_table =
           context.Output<phi::SelectedRows>(framework::GradVarName("W"));
       // runtime shape
@@ -276,9 +278,11 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
     } else {
 #if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
     !defined(__OSX__)
-      auto *ids = context.Input<LoDTensor>("Ids");
-      auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
-      auto *d_table = context.Output<LoDTensor>(framework::GradVarName("W"));
+      auto *ids = context.Input<phi::DenseTensor>("Ids");
+      auto *d_output =
+          context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+      auto *d_table =
+          context.Output<phi::DenseTensor>(framework::GradVarName("W"));
       int64_t padding_idx = context.Attr<int64_t>("padding_idx");
 
       d_table->Resize(table_dim);
diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
index f370d15e5d2..95c82c72efd 100644
--- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
+++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
@@ -95,7 +95,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
-    auto inputs = ctx.MultiInput<LoDTensor>("X");
+    auto inputs = ctx.MultiInput<phi::DenseTensor>("X");
     auto input_data_type = framework::proto::VarType::Type(0);
     bool flag = 0;
     for (auto* input : inputs) {
@@ -121,7 +121,7 @@ class FusedSeqpoolCVMOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("X",
-             "(vector<LoDTensor>) The input tensors of"
+             "(vector<phi::DenseTensor>) The input tensors of"
              " operator.")
         .AsDuplicable();
     AddInput("CVM",
diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
index a58a5ea01d0..98c2fc9896f 100644
--- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
+++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
@@ -424,7 +424,7 @@ template <typename T>
 class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
-    auto inputs = ctx.MultiInput<LoDTensor>("X");
+    auto inputs = ctx.MultiInput<phi::DenseTensor>("X");
     auto outputs = ctx.MultiOutput<phi::DenseTensor>("Out");
     auto &dev_ctx = ctx.template device_context<phi::GPUContext>();
     const auto slot_size = inputs.size();
@@ -432,7 +432,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
     std::vector<const size_t *> lods_data(slot_size);
     std::vector<T *> output_data(slot_size);
 
-    std::vector<LoDTensor> seqpool_outputs(slot_size);
+    std::vector<phi::DenseTensor> seqpool_outputs(slot_size);
     std::vector<T *> seqpool_output_data(slot_size);
 
     auto padding_value = ctx.Attr<float>("pad_value");
@@ -509,9 +509,11 @@ template <typename T>
 class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
-    auto out_grads = ctx.MultiInput<LoDTensor>(framework::GradVarName("Out"));
-    auto in_grads = ctx.MultiOutput<LoDTensor>(framework::GradVarName("X"));
-    auto *cvm = ctx.Input<LoDTensor>("CVM");
+    auto out_grads =
+        ctx.MultiInput<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto in_grads =
+        ctx.MultiOutput<phi::DenseTensor>(framework::GradVarName("X"));
+    auto *cvm = ctx.Input<phi::DenseTensor>("CVM");
     auto &dev_ctx = ctx.template device_context<phi::GPUContext>();
     std::string pooltype = ctx.Attr<std::string>("pooltype");
     auto use_cvm = ctx.Attr<bool>("use_cvm");
diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
index a454029050e..2c0b8efa8f1 100644
--- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
+++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
@@ -23,8 +23,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 template <typename T>
 class FusedSeqpoolCVMOpCPUKernel : public framework::OpKernel<T> {
  public:
diff --git a/paddle/fluid/operators/fused/fusion_group_op.cc b/paddle/fluid/operators/fused/fusion_group_op.cc
index eef2479341b..36b97ea7b12 100644
--- a/paddle/fluid/operators/fused/fusion_group_op.cc
+++ b/paddle/fluid/operators/fused/fusion_group_op.cc
@@ -87,10 +87,10 @@ class FusionGroupOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Inputs",
-             "(std::vector<LoDTensor>) The inputs of fusion_group op.")
+             "(std::vector<phi::DenseTensor>) The inputs of fusion_group op.")
         .AsDuplicable();
     AddOutput("Outs",
-              "(std::vector<LoDTensor>) The outputs of fusion_group op.")
+              "(std::vector<phi::DenseTensor>) The outputs of fusion_group op.")
         .AsDuplicable();
     AddAttr<std::vector<int>>("outs_dtype",
                               "The data type of Outputs in fusion_group op.")
diff --git a/paddle/fluid/operators/fused/fusion_gru_op.cc b/paddle/fluid/operators/fused/fusion_gru_op.cc
index f44e7afddbd..814631bd87b 100644
--- a/paddle/fluid/operators/fused/fusion_gru_op.cc
+++ b/paddle/fluid/operators/fused/fusion_gru_op.cc
@@ -154,11 +154,12 @@ framework::OpKernelType FusionGRUOp::GetExpectedKernelType(
 }
 
 void FusionGRUOpMaker::Make() {
-  AddInput("X",
-           "(LoDTensor) the input is a LodTensor, which support "
-           "variable-time length input sequence. The underlying tensor in "
-           "this LoDTensor is a matrix with shape (T X M), where T is the "
-           "total time steps in this mini-batch, M is the dim size of x.");
+  AddInput(
+      "X",
+      "(phi::DenseTensor) the input is a LodTensor, which support "
+      "variable-time length input sequence. The underlying tensor in "
+      "this phi::DenseTensor is a matrix with shape (T X M), where T is the "
+      "total time steps in this mini-batch, M is the dim size of x.");
   AddInput("H0",
            "(Tensor, optional) The initial hidden state is an optional "
            "input. This is a tensor with shape (N x D), where N is the "
@@ -181,18 +182,18 @@ void FusionGRUOpMaker::Make() {
   AddOutput("ReorderedH0", "(Tensor) (N x D), which N is the min-batch size.")
       .AsIntermediate();
   AddOutput("XX",
-            "(LoDTensor) the result after X * WeightX (size is T x 3D)"
+            "(phi::DenseTensor) the result after X * WeightX (size is T x 3D)"
             " or batched_X (size is T x M), this will be automatically chosen,"
             " where T is the total time steps in this mini-batch,"
             " D is the hidden size, M is the dim size of x input.")
       .AsIntermediate();
   AddOutput("BatchedInput",
-            "(LoDTensor) This is the batched result of input X"
+            "(phi::DenseTensor) This is the batched result of input X"
             "or the batched result after fc, shape (T x 3D)")
       .AsIntermediate();
-  AddOutput("BatchedOut", "(LoDTensor) (T X D) save batched hidden.")
+  AddOutput("BatchedOut", "(phi::DenseTensor) (T X D) save batched hidden.")
       .AsIntermediate();
-  AddOutput("Hidden", "(LoDTensor) (T x D) Same as GRUOp");
+  AddOutput("Hidden", "(phi::DenseTensor) (T x D) Same as GRUOp");
   AddAttr<std::string>("activation",
                        "(string, default tanh) "
                        "The activation type used for output candidate {h}_t.")
@@ -257,9 +258,9 @@ class FusionGRUKernel : public framework::OpKernel<T> {
   }
 
 #define INIT_BASE_DEFINES                                  \
-  auto* x = ctx.Input<LoDTensor>("X");                     \
+  auto* x = ctx.Input<phi::DenseTensor>("X");              \
   auto* wh = ctx.Input<phi::DenseTensor>("WeightH");       \
-  auto* xx = ctx.Output<LoDTensor>("XX");                  \
+  auto* xx = ctx.Output<phi::DenseTensor>("XX");           \
   auto x_lod = x->lod();                                   \
   auto x_dims = x->dims(); /* T x M*/                      \
   auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) \
@@ -273,7 +274,7 @@ class FusionGRUKernel : public framework::OpKernel<T> {
   auto* h0 = ctx.Input<phi::DenseTensor>("H0");                              \
   auto* wx = ctx.Input<phi::DenseTensor>("WeightX");                         \
   auto* bias = ctx.Input<phi::DenseTensor>("Bias");                          \
-  auto* hidden_out = ctx.Output<LoDTensor>("Hidden");                        \
+  auto* hidden_out = ctx.Output<phi::DenseTensor>("Hidden");                 \
   bool is_reverse = ctx.Attr<bool>("is_reverse");                            \
   const int M = x_mat_dims[1];                                               \
   const int D = wh_dims[0];                                                  \
@@ -398,8 +399,8 @@ class FusionGRUKernel : public framework::OpKernel<T> {
     }
     INIT_OTHER_DEFINES;
     auto* reordered_h0 = ctx.Output<phi::DenseTensor>("ReorderedH0");
-    auto* batched_input = ctx.Output<LoDTensor>("BatchedInput");
-    auto* batched_out = ctx.Output<LoDTensor>("BatchedOut");
+    auto* batched_input = ctx.Output<phi::DenseTensor>("BatchedInput");
+    auto* batched_out = ctx.Output<phi::DenseTensor>("BatchedOut");
     T* batched_input_data = batched_input->mutable_data<T>(place);
     T* batched_out_data = batched_out->mutable_data<T>(place);
     hidden_out->mutable_data<T>(place);
diff --git a/paddle/fluid/operators/fused/fusion_gru_op.h b/paddle/fluid/operators/fused/fusion_gru_op.h
index 1003d1e0c29..4df50420890 100644
--- a/paddle/fluid/operators/fused/fusion_gru_op.h
+++ b/paddle/fluid/operators/fused/fusion_gru_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 class FusionGRUOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc
index 3da1cbd8beb..b612d590ea1 100644
--- a/paddle/fluid/operators/fused/fusion_lstm_op.cc
+++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc
@@ -177,11 +177,12 @@ framework::OpKernelType FusionLSTMOp::GetExpectedKernelType(
 }
 
 void FusionLSTMOpMaker::Make() {
-  AddInput("X",
-           "(LoDTensor) the input is a LodTensor, which support "
-           "variable-time length input sequence. The underlying tensor in "
-           "this LoDTensor is a matrix with shape (T X M), where T is the "
-           "total time steps in this mini-batch, M is the dim size of x.");
+  AddInput(
+      "X",
+      "(phi::DenseTensor) the input is a LodTensor, which support "
+      "variable-time length input sequence. The underlying tensor in "
+      "this phi::DenseTensor is a matrix with shape (T X M), where T is the "
+      "total time steps in this mini-batch, M is the dim size of x.");
   AddInput("WeightX",
            "(Tensor) the learnable weights of X."
            " - The shape is (M x 4D), where M is the dim size of x, D is the "
@@ -214,23 +215,25 @@ void FusionLSTMOpMaker::Make() {
            "input. This is a tensor with shape (N x D), where N is the "
            "batch size. `H0` and `C0` can be NULL but only at the same time.")
       .AsDispensable();
-  AddOutput("Hidden",
-            "(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. "
-            "The shape is (T x D), and lod is the same with the `Input`.");
-  AddOutput("Cell",
-            "(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. "
-            "The shape is (T x D), and lod is the same with the `Input`.");
+  AddOutput(
+      "Hidden",
+      "(phi::DenseTensor) (same as LSTMOp) the hidden state of LSTM operator. "
+      "The shape is (T x D), and lod is the same with the `Input`.");
+  AddOutput(
+      "Cell",
+      "(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
+      "The shape is (T x D), and lod is the same with the `Input`.");
   AddOutput("XX",
-            "(LoDTensor) the result after X * WeightX (size is T x 4D)"
+            "(phi::DenseTensor) the result after X * WeightX (size is T x 4D)"
             " or batched_X (size is T x M), this will be automatically chosen,"
             " where T is the total time steps in this mini-batch,"
             " D is the hidden size, M is the dim size of x input.")
       .AsIntermediate();
-  AddOutput("BatchedInput", "(LoDTensor) (T x 4D).").AsIntermediate();
-  AddOutput("BatchedHidden", "(LoDTensor) (T x D).").AsIntermediate();
-  AddOutput("BatchedCell", "(LoDTensor) (T x D).").AsIntermediate();
-  AddOutput("ReorderedH0", "(LoDTensor) (N x D).").AsIntermediate();
-  AddOutput("ReorderedC0", "(LoDTensor) (N x D).").AsIntermediate();
+  AddOutput("BatchedInput", "(phi::DenseTensor) (T x 4D).").AsIntermediate();
+  AddOutput("BatchedHidden", "(phi::DenseTensor) (T x D).").AsIntermediate();
+  AddOutput("BatchedCell", "(phi::DenseTensor) (T x D).").AsIntermediate();
+  AddOutput("ReorderedH0", "(phi::DenseTensor) (N x D).").AsIntermediate();
+  AddOutput("ReorderedC0", "(phi::DenseTensor) (N x D).").AsIntermediate();
   AddOutput("CheckedCell", "(Tensor) (2 x D) only for peephole.")
       .AsIntermediate();
   AddAttr<bool>("use_peepholes",
@@ -295,23 +298,23 @@ This operator fuse the X into LSTM, more details can refer to LSTM op.
 template <typename T>
 class FuisonLSTMKernel : public framework::OpKernel<T> {
  public:
-#define INIT_BASE_DEFINES                               \
-  using DeviceContext = phi::CPUContext;                \
-  auto* x = ctx.Input<LoDTensor>("X");                  \
-  auto* h0 = ctx.Input<phi::DenseTensor>("H0");         \
-  auto* c0 = ctx.Input<phi::DenseTensor>("C0");         \
-  auto* wx = ctx.Input<phi::DenseTensor>("WeightX");    \
-  auto* wh = ctx.Input<phi::DenseTensor>("WeightH");    \
-  auto* bias = ctx.Input<phi::DenseTensor>("Bias");     \
-  auto* xx = ctx.Output<LoDTensor>("XX");               \
-  auto* hidden_out = ctx.Output<LoDTensor>("Hidden");   \
-  auto* cell_out = ctx.Output<LoDTensor>("Cell");       \
-  bool is_reverse = ctx.Attr<bool>("is_reverse");       \
-  bool use_peepholes = ctx.Attr<bool>("use_peepholes"); \
-  auto x_dims = x->dims();   /* T x M*/                 \
-  auto wh_dims = wh->dims(); /* D x 4D*/                \
-  const int M = x_dims[1];                              \
-  const int D = wh_dims[0];                             \
+#define INIT_BASE_DEFINES                                    \
+  using DeviceContext = phi::CPUContext;                     \
+  auto* x = ctx.Input<phi::DenseTensor>("X");                \
+  auto* h0 = ctx.Input<phi::DenseTensor>("H0");              \
+  auto* c0 = ctx.Input<phi::DenseTensor>("C0");              \
+  auto* wx = ctx.Input<phi::DenseTensor>("WeightX");         \
+  auto* wh = ctx.Input<phi::DenseTensor>("WeightH");         \
+  auto* bias = ctx.Input<phi::DenseTensor>("Bias");          \
+  auto* xx = ctx.Output<phi::DenseTensor>("XX");             \
+  auto* hidden_out = ctx.Output<phi::DenseTensor>("Hidden"); \
+  auto* cell_out = ctx.Output<phi::DenseTensor>("Cell");     \
+  bool is_reverse = ctx.Attr<bool>("is_reverse");            \
+  bool use_peepholes = ctx.Attr<bool>("use_peepholes");      \
+  auto x_dims = x->dims();   /* T x M*/                      \
+  auto wh_dims = wh->dims(); /* D x 4D*/                     \
+  const int M = x_dims[1];                                   \
+  const int D = wh_dims[0];                                  \
   const int D4 = wh_dims[1]
 
 #define INIT_OTHER_DEFINES                                                     \
@@ -439,9 +442,9 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
 
     auto* reordered_h0 = ctx.Output<phi::DenseTensor>("ReorderedH0");
     auto* reordered_c0 = ctx.Output<phi::DenseTensor>("ReorderedC0");
-    auto* batched_input = ctx.Output<LoDTensor>("BatchedInput");
-    auto* batched_c_out = ctx.Output<LoDTensor>("BatchedCell");
-    auto* batched_h_out = ctx.Output<LoDTensor>("BatchedHidden");
+    auto* batched_input = ctx.Output<phi::DenseTensor>("BatchedInput");
+    auto* batched_c_out = ctx.Output<phi::DenseTensor>("BatchedCell");
+    auto* batched_h_out = ctx.Output<phi::DenseTensor>("BatchedHidden");
     T* xx_data = xx->mutable_data<T>(place);
     T* batched_input_data = batched_input->mutable_data<T>(place);
     T* batched_c_out_data = batched_c_out->mutable_data<T>(place);
diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.h b/paddle/fluid/operators/fused/fusion_lstm_op.h
index ebcc9ec4877..590d4bd7c29 100644
--- a/paddle/fluid/operators/fused/fusion_lstm_op.h
+++ b/paddle/fluid/operators/fused/fusion_lstm_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 class FusionLSTMOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
index 50291ee6481..7bad7c78edc 100644
--- a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
+++ b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
@@ -106,14 +106,14 @@ framework::OpKernelType FusionRepeatedFCReluOp::GetExpectedKernelType(
 }
 
 void FusionRepeatedFCReluOpMaker::Make() {
-  AddInput("X", "(LoDTensor) Input tensors of this operator.");
+  AddInput("X", "(phi::DenseTensor) Input tensors of this operator.");
   AddInput("W", "(Tensor) The weight tensors of this operator.").AsDuplicable();
   AddInput("Bias", "(Tensor) The bias tensors of this operator.")
       .AsDuplicable();
   AddOutput("ReluOut", "(Tensor) The output tensor of each relu operator.")
       .AsDuplicable()
       .AsIntermediate();
-  AddOutput("Out", "(LoDTensor) Output tensor of this operator.");
+  AddOutput("Out", "(phi::DenseTensor) Output tensor of this operator.");
   AddComment(R"DOC(
   Fusion Repeated FC with Relu Operator.
 )DOC");
diff --git a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
index 86342a84b8d..2cfb404913c 100644
--- a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
+++ b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
@@ -18,7 +18,6 @@
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 class FusionRepeatedFCReluOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
index 64cc22224d3..cb08e4fbff2 100644
--- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
+++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
@@ -95,11 +95,12 @@ framework::OpKernelType FusionSeqConvEltAddReluOp::GetExpectedKernelType(
 }
 
 void FusionSeqConvEltAddReluOpMaker::Make() {
-  AddInput("X",
-           "(LoDTensor) the input is a LodTensor, which support "
-           "variable-time length input sequence. The underlying tensor in "
-           "this LoDTensor is a matrix with shape (T X M), where T is the "
-           "total time steps in this mini-batch, M is the dim size of x.");
+  AddInput(
+      "X",
+      "(phi::DenseTensor) the input is a LodTensor, which support "
+      "variable-time length input sequence. The underlying tensor in "
+      "this phi::DenseTensor is a matrix with shape (T X M), where T is the "
+      "total time steps in this mini-batch, M is the dim size of x.");
   // PaddingData only support false yet, should be ensured at pass.
   AddInput("Filter",
            "(Tensor) same as the input(Filter) of sequence conv op is an "
@@ -111,9 +112,9 @@ void FusionSeqConvEltAddReluOpMaker::Make() {
            "output feature size");
   AddOutput(
       "Out",
-      "(LoDTensor) the output(Out) is a LodTensor, which support "
+      "(phi::DenseTensor) the output(Out) is a LodTensor, which support "
       "variable-time length output sequence. The underlying tensor in "
-      "this LoDTensor is a matrix with shape (T, N), where, T is the "
+      "this phi::DenseTensor is a matrix with shape (T, N), where, T is the "
       "total time steps in this mini-batch, N is the output feature size.");
   AddOutput("ColMat",
             "(Tensor) (T, K), where T is where T is the "
@@ -150,10 +151,10 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     using DeviceContext = phi::CPUContext;
-    auto* x = ctx.Input<LoDTensor>("X");
+    auto* x = ctx.Input<phi::DenseTensor>("X");
     auto* w = ctx.Input<phi::DenseTensor>("Filter");
     auto* b = ctx.Input<phi::DenseTensor>("Bias");
-    auto* y = ctx.Output<LoDTensor>("Out");
+    auto* y = ctx.Output<phi::DenseTensor>("Out");
     auto* col = ctx.Output<phi::DenseTensor>("ColMat");
 
     auto x_lod = x->lod();
diff --git a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
index e47726b8b85..d1b7ae83582 100644
--- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
+++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 class FusionSeqConvEltAddReluOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
index 095a1c1deb1..bcc8ee89454 100644
--- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
+++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
@@ -110,12 +110,13 @@ framework::OpKernelType FusionSeqExpandConcatFCOp::GetExpectedKernelType(
 
 void FusionSeqExpandConcatFCOpMaker::Make() {
   AddInput("X",
-           "(LoDTensor) input LodDTensors, the first one must be have ref lod "
+           "(phi::DenseTensor) input LodDTensors, the first one must be have "
+           "ref lod "
            "for sequence expand, and the rest input should have same lod.")
       .AsDuplicable();
   AddInput("FCWeight", "(Tensor) the weights of fc.");
   AddInput("FCBias", "(Tensor, optional) the bias of fc.").AsDispensable();
-  AddOutput("Out", "(LoDTensor) Output LodTensor.");
+  AddOutput("Out", "(phi::DenseTensor) Output LodTensor.");
   AddOutput(
       "FCOut",
       "(Tensor) the intermediate tensor to keep the result of fc."
@@ -150,10 +151,10 @@ class FusionSeqExpandConcatFCOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     using DeviceContext = phi::CPUContext;
-    auto ins = ctx.MultiInput<LoDTensor>("X");
+    auto ins = ctx.MultiInput<phi::DenseTensor>("X");
     auto* w = ctx.Input<phi::DenseTensor>("FCWeight");
     auto* b = ctx.Input<phi::DenseTensor>("FCBias");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
     auto* fc_out = ctx.Output<phi::DenseTensor>("FCOut");
 
     auto* ref_in = ins[0];
diff --git a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
index 064fdd7ceb8..9c611025351 100644
--- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
+++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 class FusionSeqExpandConcatFCOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
index d33da7375cd..f2f7801d7c2 100644
--- a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
+++ b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
@@ -75,8 +75,9 @@ framework::OpKernelType FusionSeqPoolConcatOp::GetExpectedKernelType(
 }
 
 void FusionSeqPoolConcatOpMaker::Make() {
-  AddInput("X", "(LoDTensor) Input tensors of this operator.").AsDuplicable();
-  AddOutput("Out", "(LoDTensor) Output tensor of concat operator.");
+  AddInput("X", "(phi::DenseTensor) Input tensors of this operator.")
+      .AsDuplicable();
+  AddOutput("Out", "(phi::DenseTensor) Output tensor of concat operator.");
   AddAttr<std::string>("pooltype",
                        "(string, default 'SUM') some of the pooling "
                        "pooltype of SequencePoolOp.")
@@ -95,8 +96,8 @@ template <typename T>
 class FusionSeqPoolConcatKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto ins = ctx.MultiInput<LoDTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto ins = ctx.MultiInput<phi::DenseTensor>("X");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
     std::string pooltype = ctx.Attr<std::string>("pooltype");
     auto x0_lod = ins[0]->lod();
     const auto& x0_dims = ins[0]->dims();
diff --git a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
index e076cfa51bf..6dc29b23cbb 100644
--- a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
+++ b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
@@ -18,7 +18,6 @@
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 class FusionSeqPoolConcatOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
index b97ceeecb59..41944f4bc09 100644
--- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
+++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
@@ -74,11 +74,12 @@ framework::OpKernelType FusionSeqPoolCVMConcatOp::GetExpectedKernelType(
 }
 
 void FusionSeqPoolCVMConcatOpMaker::Make() {
-  AddInput("X", "(LoDTensor) Input tensors of this operator.").AsDuplicable();
+  AddInput("X", "(phi::DenseTensor) Input tensors of this operator.")
+      .AsDuplicable();
   AddInput("CVM",
            "(Tensor),  a 2-D Tensor with shape [N x 2], where N is the batch "
            "size, 2 is show and click.");
-  AddOutput("Out", "(LoDTensor) Output tensor of concat operator.");
+  AddOutput("Out", "(phi::DenseTensor) Output tensor of concat operator.");
   AddAttr<std::string>("pooltype",
                        "(string, default 'SUM') some of the pooling "
                        "pooltype of SequencePoolOp.")
@@ -98,8 +99,8 @@ template <typename T>
 class FusionSeqPoolCVMConcatKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto ins = ctx.MultiInput<LoDTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto ins = ctx.MultiInput<phi::DenseTensor>("X");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
     std::string pooltype = ctx.Attr<std::string>("pooltype");
     auto x0_lod = ins[0]->lod();
     const auto& x0_dims = ins[0]->dims();
diff --git a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
index 841ea11e33a..24a02553044 100644
--- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
+++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
@@ -18,7 +18,6 @@
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 class FusionSeqPoolCVMConcatOp : public framework::OperatorWithKernel {
diff --git a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
index 8a26358c992..7707bb14fce 100644
--- a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
+++ b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
@@ -18,7 +18,6 @@
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 
 // ( (A.^2 * B.^2) - (A * B).^2 ) .* scalar
diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
index 84ee7c0fb9b..c7acc1cf730 100644
--- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
@@ -59,11 +59,11 @@ class MultiGRUHandler {
         origin_mode_(ctx.Attr<bool>("origin_mode")),
         layers_(ctx.Attr<int>("layers")),
         concat_pds_(layers_, std::shared_ptr<dnnl::concat::primitive_desc>()),
-        x_(ctx.Input<LoDTensor>("X")),
+        x_(ctx.Input<phi::DenseTensor>("X")),
         weights_x_(ctx.MultiInput<phi::DenseTensor>("WeightX")),
         weights_h_(ctx.MultiInput<phi::DenseTensor>("WeightH")),
         biases_(ctx.MultiInput<phi::DenseTensor>("Bias")),
-        hidden_(ctx.Output<LoDTensor>("Hidden")),
+        hidden_(ctx.Output<phi::DenseTensor>("Hidden")),
         x_lod_(x_->lod()[0]) {
     PADDLE_ENFORCE_EQ(
         weights_x_.size(),
@@ -127,7 +127,8 @@ class MultiGRUHandler {
 
     if (is_int8) {
       // Add int8 attributes
-      const auto scale_weights = ctx.MultiInput<LoDTensor>("Scale_weights");
+      const auto scale_weights =
+          ctx.MultiInput<phi::DenseTensor>("Scale_weights");
       PADDLE_ENFORCE_EQ(
           scale_weights.size(),
           layers_ * 2,
@@ -669,11 +670,11 @@ class MultiGRUHandler {
   // on Ti size, thus we need another key to cache them
   std::string memory_key_;
 
-  const LoDTensor* x_;
+  const phi::DenseTensor* x_;
   const std::vector<const phi::DenseTensor*> weights_x_;
   const std::vector<const phi::DenseTensor*> weights_h_;
   const std::vector<const phi::DenseTensor*> biases_;
-  LoDTensor* hidden_;
+  phi::DenseTensor* hidden_;
   std::vector<dnnl::primitive_attr> attrs_;
   const paddle::framework::Vector<size_t>& x_lod_;
 };
diff --git a/paddle/fluid/operators/fused/multi_gru_op.cc b/paddle/fluid/operators/fused/multi_gru_op.cc
index 9fb260aee19..0552c3ce9b5 100644
--- a/paddle/fluid/operators/fused/multi_gru_op.cc
+++ b/paddle/fluid/operators/fused/multi_gru_op.cc
@@ -148,11 +148,12 @@ framework::OpKernelType MultiGRUOp::GetExpectedKernelType(
 }
 
 void MultiGRUOpMaker::Make() {
-  AddInput("X",
-           "(LoDTensor) the input is an LodTensor, which support "
-           "variable-time length input sequence. The underlying tensor in "
-           "this LoDTensor is a matrix with shape (T X M), where T is the "
-           "total time steps in this mini-batch, M is the dim size of x.");
+  AddInput(
+      "X",
+      "(phi::DenseTensor) the input is an LodTensor, which support "
+      "variable-time length input sequence. The underlying tensor in "
+      "this phi::DenseTensor is a matrix with shape (T X M), where T is the "
+      "total time steps in this mini-batch, M is the dim size of x.");
   AddInput("WeightX",
            "(MultiTensor) The FC weight with shape (M x 3D),"
            "where M is the dim size of x, D is the hidden size. ")
@@ -176,7 +177,7 @@ void MultiGRUOpMaker::Make() {
       "Only used with MKL-DNN INT8.")
       .AsDuplicable()
       .AsDispensable();
-  AddOutput("Hidden", "(LoDTensor) (T x D) Same as GRUOp");
+  AddOutput("Hidden", "(phi::DenseTensor) (T x D) Same as GRUOp");
   AddAttr<std::string>("activation",
                        "(string, default tanh) "
                        "The activation type used for output candidate {h}_t.")
diff --git a/paddle/fluid/operators/fused/multi_gru_op.h b/paddle/fluid/operators/fused/multi_gru_op.h
index 004ca350a4e..1846d819600 100644
--- a/paddle/fluid/operators/fused/multi_gru_op.h
+++ b/paddle/fluid/operators/fused/multi_gru_op.h
@@ -20,7 +20,6 @@ namespace paddle {
 namespace operators {
 
 using framework::ExecutionContext;
-using LoDTensor = phi::DenseTensor;
 
 class MultiGRUOp : public framework::OperatorWithKernel {
  public:
diff --git a/paddle/fluid/operators/math/context_project.h b/paddle/fluid/operators/math/context_project.h
index 832be9b0efd..0038b25fb42 100644
--- a/paddle/fluid/operators/math/context_project.h
+++ b/paddle/fluid/operators/math/context_project.h
@@ -27,7 +27,6 @@ namespace operators {
 namespace math {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 /*
  * \brief Context projection concatenates features in adjacent time-steps in
@@ -51,9 +50,8 @@ using LoDTensor = phi::DenseTensor;
  * For a mini-batch of 2 variable lengths sentences, containing 3, and 1
  * time-steps:
  *
- * Assumed input (X) is a [4, M, N] float LoDTensor, and X->lod()[0] = [0, 3,
- * 4].
- * Besides, for the sake of simplicity, we assume M=1 and N=2.
+ * Assumed input (X) is a [4, M, N] float phi::DenseTensor, and X->lod()[0] =
+ * [0, 3, 4]. Besides, for the sake of simplicity, we assume M=1 and N=2.
  *
  * X = [[a1, a2;
  *       b1, b2;
@@ -89,7 +87,7 @@ template <typename DeviceContext, typename T>
 class ContextProjectFunctor {
  public:
   void operator()(const DeviceContext& context,
-                  const LoDTensor& in,
+                  const phi::DenseTensor& in,
                   const phi::DenseTensor* padding_data,
                   bool padding_trainable,
                   const int context_start,
@@ -218,7 +216,7 @@ template <typename DeviceContext, typename T>
 class ContextProjectGradFunctor {
  public:
   void operator()(const DeviceContext& context,
-                  const LoDTensor& in,
+                  const phi::DenseTensor& in,
                   bool padding_trainable,
                   const int context_start,
                   const int context_length,
diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h
index 8b76f85d7a8..9e7db9f03ed 100644
--- a/paddle/fluid/operators/math/sequence_padding.h
+++ b/paddle/fluid/operators/math/sequence_padding.h
@@ -82,8 +82,8 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
 }
 
 /*
- * \brief   Padding/Unpadding LoDTensor to/from normal Tensor of the shape
- *          [max_sequence_length, num_sequences, sequence_width].
+ * \brief   Padding/Unpadding phi::DenseTensor to/from normal Tensor of the
+ * shape [max_sequence_length, num_sequences, sequence_width].
  *
  *  Padding sequence:
  *        padding[i] = seq[lod[level][i]]
@@ -97,13 +97,11 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
  *    padding (s0, s1, s2, s3; s0, s1, s2, 0; s0, 0, s2, 0; s0, 0, 0, 0)
  *
  * \param context       device context of this functor.
- * \param seq           LoDTensor which is stored in sequence format, the shape
- *                      is [total_sequence_length, sequence_width] where
- *                      total_sequence_length is the sum of all sequences'
- *                      length.
- * \param padding       Tensor which is padded to the same length, the shape is
- *                      [max_sequence_length, num_sequences, sequence_width].
- * \param norm_by_times whether dividing sequence's length.
+ * \param seq           phi::DenseTensor which is stored in sequence format, the
+ * shape is [total_sequence_length, sequence_width] where total_sequence_length
+ * is the sum of all sequences' length. \param padding       Tensor which is
+ * padded to the same length, the shape is [max_sequence_length, num_sequences,
+ * sequence_width]. \param norm_by_times whether dividing sequence's length.
  *
  * \note  transposition is also done in this functor.
  */
diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc
index 9008111e42b..65d4a479a49 100644
--- a/paddle/fluid/operators/math/sequence_pooling.cc
+++ b/paddle/fluid/operators/math/sequence_pooling.cc
@@ -25,7 +25,6 @@ namespace operators {
 namespace math {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 template <typename T,
           int MajorType = Eigen::RowMajor,
           typename IndexType = Eigen::DenseIndex>
diff --git a/paddle/fluid/operators/math/sequence_scale.h b/paddle/fluid/operators/math/sequence_scale.h
index e9c1c9832f5..7d7becbd51a 100644
--- a/paddle/fluid/operators/math/sequence_scale.h
+++ b/paddle/fluid/operators/math/sequence_scale.h
@@ -35,7 +35,8 @@ namespace math {
 
  *
  * \param context       Device context of this functor.
- * \param seq           LoDTensor which is stored in sequence format, the shape
+ * \param seq           phi::DenseTensor which is stored in sequence format, the
+ shape
  *                      is [total_sequence_length, sequence_width] where
  *                      total_sequence_length is the sum of all sequences'
  *                      length.
diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
index 6a6704c0945..b4ced90db80 100644
--- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
@@ -28,7 +28,6 @@ using dnnl::prop_kind;
 using dnnl::stream;
 using framework::DDim;
 using framework::ExecutionContext;
-using LoDTensor = phi::DenseTensor;
 using phi::funcs::OneDNNGetDataType;
 using phi::funcs::to_void_cast;
 using platform::MKLDNNDeviceContext;
@@ -382,7 +381,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
 
   void PrepareSrcMem(const std::shared_ptr<inner_product_forward>& fc_p,
                      const std::shared_ptr<dnnl::memory>& src_mem,
-                     const LoDTensor* x,
+                     const phi::DenseTensor* x,
                      const dnnl::engine& engine) const {
     auto x_md = x->mem_desc().reshape(src_mem->get_desc().dims());
     if (x_md != src_mem->get_desc()) {
@@ -403,10 +402,10 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
         ctx.template device_context<platform::MKLDNNDeviceContext>();
     const auto& mkldnn_engine = dev_ctx.GetEngine();
 
-    const auto* x = ctx.Input<LoDTensor>("Input");
+    const auto* x = ctx.Input<phi::DenseTensor>("Input");
     const auto* weights = ctx.Input<phi::DenseTensor>("W");
     const auto* bias = ctx.Input<phi::DenseTensor>("Bias");
-    auto out = ctx.Output<LoDTensor>("Out");
+    auto out = ctx.Output<phi::DenseTensor>("Out");
 
     const auto& scale_weights = ctx.Attr<std::vector<float>>("Scale_weights");
 
@@ -513,9 +512,9 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
   }
 
   void RecomputeOutputDims(const ExecutionContext& ctx,
-                           const LoDTensor* x,
+                           const phi::DenseTensor* x,
                            const phi::DenseTensor* weights,
-                           LoDTensor* out) const {
+                           phi::DenseTensor* out) const {
     int in_num_col_dims = ctx.Attr<int>("in_num_col_dims");
     bool padding_weights = ctx.Attr<bool>("padding_weights");
     PADDLE_ENFORCE_EQ(padding_weights,
diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
index 86395b0465d..a0bc835a4cc 100644
--- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
@@ -26,7 +26,6 @@ namespace operators {
 
 using framework::DDim;
 using framework::ExecutionContext;
-using LoDTensor = phi::DenseTensor;
 
 using platform::MatMulV2MKLDNNHandler;
 using platform::MKLDNNDeviceContext;
diff --git a/paddle/fluid/operators/nccl/nccl_op.cu.cc b/paddle/fluid/operators/nccl/nccl_op.cu.cc
index 780e12c1241..d328329e1c2 100644
--- a/paddle/fluid/operators/nccl/nccl_op.cu.cc
+++ b/paddle/fluid/operators/nccl/nccl_op.cu.cc
@@ -19,8 +19,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-using LoDTensor = phi::DenseTensor;
-
 using platform::Communicator;
 
 template <typename Type>
@@ -62,8 +60,8 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
                       true,
                       platform::errors::PreconditionNotMet(
                           "This kernel only runs on GPU device."));
-    auto* x = ctx.Input<LoDTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* x = ctx.Input<phi::DenseTensor>("X");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
     auto* comm = ctx.Input<Communicator>("Communicator");
     std::string reduction = ctx.Attr<std::string>("reduction");
 
@@ -97,8 +95,8 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
                       true,
                       platform::errors::InvalidArgument(
                           "This kernel only runs on GPU device."));
-    auto x = ctx.Input<LoDTensor>("X");  // x0, x1, x2
-    auto out = ctx.Output<LoDTensor>("Out");
+    auto x = ctx.Input<phi::DenseTensor>("X");  // x0, x1, x2
+    auto out = ctx.Output<phi::DenseTensor>("Out");
     auto* comm = ctx.Input<Communicator>("Communicator");
     int root = ctx.Attr<int>("root");
     std::string reduction = ctx.Attr<std::string>("reduction");
@@ -144,7 +142,7 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
     int gpu_id = ctx.GetPlace().GetDeviceId();
     int idx = comm->GetCommId(gpu_id);
     if (idx == root) {
-      auto* x = ctx.Input<LoDTensor>("X");
+      auto* x = ctx.Input<phi::DenseTensor>("X");
       VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel();
       PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
           reinterpret_cast<void*>(const_cast<T*>(x->data<T>())),
@@ -155,7 +153,7 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
           ctx.cuda_device_context().stream()));
       VLOG(3) << "gpu : " << gpu_id << " finished Bcast.";
     } else {
-      auto* out = ctx.Output<LoDTensor>("Out");
+      auto* out = ctx.Output<phi::DenseTensor>("Out");
       VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. recv buffer "
               << phi::product(out->dims());
       PADDLE_ENFORCE_GPU_SUCCESS(
diff --git a/paddle/fluid/operators/optimizers/adam_op_mlu.cc b/paddle/fluid/operators/optimizers/adam_op_mlu.cc
index 80743606c7c..c9c33643d1e 100644
--- a/paddle/fluid/operators/optimizers/adam_op_mlu.cc
+++ b/paddle/fluid/operators/optimizers/adam_op_mlu.cc
@@ -20,7 +20,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename T>
 class AdamMLUKernel : public framework::OpKernel<T> {
@@ -30,32 +29,32 @@ class AdamMLUKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                       true,
                       platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                           "but the received is %s",
                           ctx.InputNames("Param").front(),
                           framework::ToTypeName(param_var->Type())));
-    auto* param = ctx.Input<LoDTensor>("Param");
+    auto* param = ctx.Input<phi::DenseTensor>("Param");
     auto* grad_var = ctx.InputVar("Grad");
     PADDLE_ENFORCE_EQ(grad_var->IsType<phi::DenseTensor>(),
                       true,
                       platform::errors::InvalidArgument(
-                          "The Grad(%s)'s type should be LoDTensor, "
+                          "The Grad(%s)'s type should be phi::DenseTensor, "
                           "but the received is %s",
                           ctx.InputNames("Grad").front(),
                           framework::ToTypeName(param_var->Type())));
-    auto* grad = ctx.Input<LoDTensor>("Grad");
-    auto* mom1 = ctx.Input<LoDTensor>("Moment1");
-    auto* mom2 = ctx.Input<LoDTensor>("Moment2");
-    auto* lr = ctx.Input<LoDTensor>("LearningRate");
+    auto* grad = ctx.Input<phi::DenseTensor>("Grad");
+    auto* mom1 = ctx.Input<phi::DenseTensor>("Moment1");
+    auto* mom2 = ctx.Input<phi::DenseTensor>("Moment2");
+    auto* lr = ctx.Input<phi::DenseTensor>("LearningRate");
 
     auto* beta1_pow = ctx.Input<phi::DenseTensor>("Beta1Pow");
     auto* beta2_pow = ctx.Input<phi::DenseTensor>("Beta2Pow");
 
-    auto* param_out = ctx.Output<LoDTensor>("ParamOut");
-    auto* mom1_out = ctx.Output<LoDTensor>("Moment1Out");
-    auto* mom2_out = ctx.Output<LoDTensor>("Moment2Out");
-    auto* beta1_pow_out = ctx.Output<LoDTensor>("Beta1PowOut");
-    auto* beta2_pow_out = ctx.Output<LoDTensor>("Beta2PowOut");
+    auto* param_out = ctx.Output<phi::DenseTensor>("ParamOut");
+    auto* mom1_out = ctx.Output<phi::DenseTensor>("Moment1Out");
+    auto* mom2_out = ctx.Output<phi::DenseTensor>("Moment2Out");
+    auto* beta1_pow_out = ctx.Output<phi::DenseTensor>("Beta1PowOut");
+    auto* beta2_pow_out = ctx.Output<phi::DenseTensor>("Beta2PowOut");
 
     bool skip_update = false;
     if (ctx.HasInput("SkipUpdate")) {
@@ -110,8 +109,8 @@ class AdamMLUKernel : public framework::OpKernel<T> {
     mom1_out->ShareDataWith(*mom1);
     mom2_out->ShareDataWith(*mom2);
 
-    LoDTensor beta1_pow_tmp;
-    LoDTensor beta2_pow_tmp;
+    phi::DenseTensor beta1_pow_tmp;
+    phi::DenseTensor beta2_pow_tmp;
     if (beta1_pow->place() == platform::CPUPlace()) {
       T beta1 = *beta1_pow->data<T>();
       beta1_pow_tmp.mutable_data<T>({1}, ctx.GetPlace());
@@ -292,13 +291,13 @@ class AdamWMLUKernel : public AdamMLUKernel<T> {
     }
     bool with_decay = ctx.Attr<bool>("with_decay");
     const bool multi_precision = ctx.Attr<bool>("multi_precision");
-    auto* param_out = ctx.Output<LoDTensor>("ParamOut");
-    auto* master_param_out = ctx.Output<LoDTensor>("MasterParamOut");
-    const auto* master_param = ctx.Input<LoDTensor>("MasterParam");
+    auto* param_out = ctx.Output<phi::DenseTensor>("ParamOut");
+    auto* master_param_out = ctx.Output<phi::DenseTensor>("MasterParamOut");
+    const auto* master_param = ctx.Input<phi::DenseTensor>("MasterParam");
 
     VLOG(3) << "Skip update: " << skip_update << ", With decay: " << with_decay;
     if (!skip_update && with_decay) {
-      auto* param = ctx.Input<LoDTensor>("Param");
+      auto* param = ctx.Input<phi::DenseTensor>("Param");
       MLUCnnlTensorDesc param_desc(*param);
       if (multi_precision) {
         VLOG(3) << "[adamw] multi_precision, cast masterparam to param.";
@@ -328,12 +327,12 @@ class AdamWMLUKernel : public AdamMLUKernel<T> {
         PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                           true,
                           platform::errors::InvalidArgument(
-                              "The Var(%s)'s type should be LoDTensor, "
+                              "The Var(%s)'s type should be phi::DenseTensor, "
                               "but the received is %s",
                               ctx.InputNames("Param").front(),
                               framework::ToTypeName(param_var->Type())));
 
-        auto* lr = ctx.Input<LoDTensor>("LearningRate");
+        auto* lr = ctx.Input<phi::DenseTensor>("LearningRate");
         float coeff = ctx.Attr<float>("coeff");
 
         // update param with decay coeff: mul(-1 * lr, coeff * param) + param
@@ -502,8 +501,8 @@ class MergedAdamMLUKernel : public framework::OpKernel<T> {
       mom1_outs[i]->ShareDataWith(*mom1s[i]);
       mom2_outs[i]->ShareDataWith(*mom2s[i]);
 
-      LoDTensor beta1_pow_tmp;
-      LoDTensor beta2_pow_tmp;
+      phi::DenseTensor beta1_pow_tmp;
+      phi::DenseTensor beta2_pow_tmp;
       if (beta1_pows[i]->place() == platform::CPUPlace()) {
         T beta1 = *beta1_pows[i]->data<T>();
         beta1_pow_tmp.mutable_data<T>({1}, ctx.GetPlace());
diff --git a/paddle/fluid/operators/optimizers/adam_op_npu.cc b/paddle/fluid/operators/optimizers/adam_op_npu.cc
index f53a6b75072..f94b32413a0 100644
--- a/paddle/fluid/operators/optimizers/adam_op_npu.cc
+++ b/paddle/fluid/operators/optimizers/adam_op_npu.cc
@@ -23,7 +23,6 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class AdamNPUKernel : public framework::OpKernel<T> {
@@ -33,32 +32,32 @@ class AdamNPUKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                       true,
                       platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                           "but the received is %s",
                           ctx.InputNames("Param").front(),
                           framework::ToTypeName(param_var->Type())));
-    auto* param = ctx.Input<LoDTensor>("Param");
+    auto* param = ctx.Input<phi::DenseTensor>("Param");
     auto* grad_var = ctx.InputVar("Grad");
     PADDLE_ENFORCE_EQ(grad_var->IsType<phi::DenseTensor>(),
                       true,
                       platform::errors::InvalidArgument(
-                          "The Grad(%s)'s type should be LoDTensor, "
+                          "The Grad(%s)'s type should be phi::DenseTensor, "
                           "but the received is %s",
                           ctx.InputNames("Grad").front(),
                           framework::ToTypeName(param_var->Type())));
-    auto* grad = ctx.Input<LoDTensor>("Grad");
-    auto* mom1 = ctx.Input<LoDTensor>("Moment1");
-    auto* mom2 = ctx.Input<LoDTensor>("Moment2");
-    auto* lr = ctx.Input<LoDTensor>("LearningRate");
+    auto* grad = ctx.Input<phi::DenseTensor>("Grad");
+    auto* mom1 = ctx.Input<phi::DenseTensor>("Moment1");
+    auto* mom2 = ctx.Input<phi::DenseTensor>("Moment2");
+    auto* lr = ctx.Input<phi::DenseTensor>("LearningRate");
 
     auto* beta1_pow = ctx.Input<phi::DenseTensor>("Beta1Pow");
     auto* beta2_pow = ctx.Input<phi::DenseTensor>("Beta2Pow");
 
-    auto* param_out = ctx.Output<LoDTensor>("ParamOut");
-    auto* mom1_out = ctx.Output<LoDTensor>("Moment1Out");
-    auto* mom2_out = ctx.Output<LoDTensor>("Moment2Out");
-    auto* beta1_pow_out = ctx.Output<LoDTensor>("Beta1PowOut");
-    auto* beta2_pow_out = ctx.Output<LoDTensor>("Beta2PowOut");
+    auto* param_out = ctx.Output<phi::DenseTensor>("ParamOut");
+    auto* mom1_out = ctx.Output<phi::DenseTensor>("Moment1Out");
+    auto* mom2_out = ctx.Output<phi::DenseTensor>("Moment2Out");
+    auto* beta1_pow_out = ctx.Output<phi::DenseTensor>("Beta1PowOut");
+    auto* beta2_pow_out = ctx.Output<phi::DenseTensor>("Beta2PowOut");
 
     bool skip_update = false;
     if (ctx.HasInput("SkipUpdate")) {
@@ -114,8 +113,8 @@ class AdamNPUKernel : public framework::OpKernel<T> {
 
     // NOTE(zhiqiu): beta1_pow and beta2_pow may on CPU and not transform
     // place.
-    LoDTensor beta1_pow_tmp;
-    LoDTensor beta2_pow_tmp;
+    phi::DenseTensor beta1_pow_tmp;
+    phi::DenseTensor beta2_pow_tmp;
     if (beta1_pow->place() == platform::CPUPlace()) {
       T beta1 = *beta1_pow->data<T>();
       beta1_pow_tmp.mutable_data<T>({1}, ctx.GetPlace());
@@ -279,7 +278,7 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
     bool with_decay = ctx.Attr<bool>("with_decay");
     if (!skip_update && with_decay) {
       float coeff = ctx.Attr<float>("coeff");
-      auto* lr = ctx.Input<LoDTensor>("LearningRate");
+      auto* lr = ctx.Input<phi::DenseTensor>("LearningRate");
 
       auto place = ctx.GetPlace();
 
@@ -308,18 +307,18 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
         PADDLE_THROW(platform::errors::Unimplemented(
             "Master Parma is not supported on npu"));
       } else {
-        auto* param_out = ctx.Output<LoDTensor>("ParamOut");
+        auto* param_out = ctx.Output<phi::DenseTensor>("ParamOut");
         param_out->mutable_data<T>(ctx.GetPlace());
 
         const auto* param_var = ctx.InputVar("Param");
         PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                           true,
                           platform::errors::InvalidArgument(
-                              "The Var(%s)'s type should be LoDTensor, "
+                              "The Var(%s)'s type should be phi::DenseTensor, "
                               "but the received is %s",
                               ctx.InputNames("Param").front(),
                               framework::ToTypeName(param_var->Type())));
-        auto* param = ctx.Input<LoDTensor>("Param");
+        auto* param = ctx.Input<phi::DenseTensor>("Param");
 
         const auto& runner =
             NpuOpRunner("Mul",
diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
index 94a52d9765b..5ab3ef3b2e6 100644
--- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
+++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
@@ -32,20 +32,20 @@ class DecayedAdagradOp : public framework::OperatorWithKernel {
                    "Input",
                    "LearningRate",
                    "DecayedAdagradOp");
-    PADDLE_ENFORCE_EQ(
-        ctx->GetInputsVarType("Param").front(),
-        framework::proto::VarType::LOD_TENSOR,
-        platform::errors::InvalidArgument(
-            "The input var's type should be LoDTensor, but the received is %s",
-            ctx->Inputs("Param").front(),
-            ctx->GetInputsVarType("Param").front()));
-    PADDLE_ENFORCE_EQ(
-        ctx->GetInputsVarType("Grad").front(),
-        framework::proto::VarType::LOD_TENSOR,
-        platform::errors::InvalidArgument(
-            "The input var's type should be LoDTensor, but the received is %s",
-            ctx->Inputs("Grad").front(),
-            ctx->GetInputsVarType("Grad").front()));
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(),
+                      framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
+                          "but the received is %s",
+                          ctx->Inputs("Param").front(),
+                          ctx->GetInputsVarType("Param").front()));
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad").front(),
+                      framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
+                          "but the received is %s",
+                          ctx->Inputs("Grad").front(),
+                          ctx->GetInputsVarType("Grad").front()));
 
     OP_INOUT_CHECK(
         ctx->HasOutput("ParamOut"), "Output", "ParamOut", "DecayedAdagradOp");
diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h
index 8c5c6a3f445..e5925866b83 100644
--- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h
+++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h
@@ -27,7 +27,7 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                       true,
                       platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                           "but the received is %s",
                           ctx.InputNames("Param").front(),
                           framework::ToTypeName(param_var->Type())));
@@ -35,7 +35,7 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(grad_var->IsType<phi::DenseTensor>(),
                       true,
                       platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                           "but the received is %s",
                           ctx.InputNames("Grad").front(),
                           framework::ToTypeName(grad_var->Type())));
diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.cc b/paddle/fluid/operators/optimizers/dpsgd_op.cc
index 9d522031acf..e866a97f1dd 100644
--- a/paddle/fluid/operators/optimizers/dpsgd_op.cc
+++ b/paddle/fluid/operators/optimizers/dpsgd_op.cc
@@ -36,18 +36,18 @@ class DpsgdOp : public framework::OperatorWithKernel {
         true,
         platform::errors::NotFound(
             "Input(LearningRate) of DpsgdOp should not be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->GetInputsVarType("Param").front(),
-        framework::proto::VarType::LOD_TENSOR,
-        platform::errors::InvalidArgument(
-            "The input var's type should be LoDTensor, but the received is %s",
-            ctx->GetInputsVarType("Param").front()));
-    PADDLE_ENFORCE_EQ(
-        ctx->GetInputsVarType("Grad").front(),
-        framework::proto::VarType::LOD_TENSOR,
-        platform::errors::InvalidArgument(
-            "The input var's type should be LoDTensor, but the received is %s",
-            ctx->GetInputsVarType("Grad").front()));
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(),
+                      framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
+                          "but the received is %s",
+                          ctx->GetInputsVarType("Param").front()));
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad").front(),
+                      framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
+                          "but the received is %s",
+                          ctx->GetInputsVarType("Grad").front()));
 
     PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"),
                       true,
diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.h b/paddle/fluid/operators/optimizers/dpsgd_op.h
index 0d89ca2b6a7..d6ee84f4b7b 100644
--- a/paddle/fluid/operators/optimizers/dpsgd_op.h
+++ b/paddle/fluid/operators/optimizers/dpsgd_op.h
@@ -32,7 +32,7 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                       true,
                       platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                           "but the received is %s",
                           ctx.InputNames("Param").front(),
                           framework::ToTypeName(param_var->Type())));
@@ -41,7 +41,7 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(grad_var->IsType<phi::DenseTensor>(),
                       true,
                       platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                           "but the received is %s",
                           ctx.InputNames("Grad").front(),
                           framework::ToTypeName(grad_var->Type())));
diff --git a/paddle/fluid/operators/optimizers/lamb_op.cc b/paddle/fluid/operators/optimizers/lamb_op.cc
index 3e2ee495b05..df55ffa116a 100644
--- a/paddle/fluid/operators/optimizers/lamb_op.cc
+++ b/paddle/fluid/operators/optimizers/lamb_op.cc
@@ -52,10 +52,10 @@ class LambOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Param",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
              "Input parameter that has to be updated.");
     AddInput("Grad",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
              "Input gradient of the parameter.");
     AddInput("LearningRate", "(Tensor) Learning rate.");
     AddInput("Moment1", "(Tensor) Input first moment.");
@@ -63,7 +63,7 @@ class LambOpMaker : public framework::OpProtoAndCheckerMaker {
     AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator.");
     AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator.");
     AddInput("MasterParam",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
              "Input master parameter that has to be updated.")
         .AsDispensable();
     AddInput(
diff --git a/paddle/fluid/operators/optimizers/lars_momentum_op.cc b/paddle/fluid/operators/optimizers/lars_momentum_op.cc
index 30e08779f51..a5c641cc70a 100644
--- a/paddle/fluid/operators/optimizers/lars_momentum_op.cc
+++ b/paddle/fluid/operators/optimizers/lars_momentum_op.cc
@@ -37,12 +37,12 @@ class LarsMomentumOp : public framework::OperatorWithKernel {
                    "Output",
                    "VelocityOut",
                    "LarsMomentum");
-    PADDLE_ENFORCE_EQ(
-        ctx->GetInputsVarType("Param").front(),
-        framework::proto::VarType::LOD_TENSOR,
-        platform::errors::InvalidArgument(
-            "The input var's type should be LoDTensor, but the received is %s",
-            ctx->GetInputsVarType("Param").front()));
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(),
+                      framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
+                          "but the received is %s",
+                          ctx->GetInputsVarType("Param").front()));
 
     auto lr_dims = ctx->GetInputsDim("LearningRate");
     auto grad_dim = ctx->GetInputsDim("Grad");
@@ -102,7 +102,7 @@ class LarsMomentumOp : public framework::OperatorWithKernel {
       PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad")[i],
                         framework::proto::VarType::LOD_TENSOR,
                         platform::errors::InvalidArgument(
-                            "The Var(%s)'s type should be LoDTensor, "
+                            "The Var(%s)'s type should be phi::DenseTensor, "
                             "but the received is %s",
                             ctx->Inputs("Grad")[i].front(),
                             ctx->GetInputsVarType("Grad")[i]));
@@ -145,31 +145,31 @@ class LarsMomentumOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Param",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
              "Input parameter that has to be updated")
         .AsDuplicable();
     AddInput("Grad",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
              "Input gradient of the parameter")
         .AsDuplicable();
     AddInput("Velocity",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
              "Input velocity (corresponding to the parameter) "
              "that has to be updated")
         .AsDuplicable();
     AddInput("LearningRate",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
              "Input learning rate")
         .AsDuplicable();
     AddInput("MasterParam", "FP32 master weight for AMP.")
         .AsDuplicable()
         .AsDispensable();
     AddOutput("ParamOut",
-              "(LoDTensor) This output is updated parameter. "
+              "(phi::DenseTensor) This output is updated parameter. "
               "It shared memory with Input(Param).")
         .AsDuplicable();
     AddOutput("VelocityOut",
-              "(LoDTensor) This output is updated velocity. "
+              "(phi::DenseTensor) This output is updated velocity. "
               "It shared memory with Input(Velocity).")
         .AsDuplicable();
     AddOutput("MasterParamOut",
diff --git a/paddle/fluid/operators/optimizers/momentum_op.h b/paddle/fluid/operators/optimizers/momentum_op.h
index 847601777eb..ad1ae550745 100644
--- a/paddle/fluid/operators/optimizers/momentum_op.h
+++ b/paddle/fluid/operators/optimizers/momentum_op.h
@@ -54,12 +54,12 @@ class MomentumOp : public framework::OperatorWithKernel {
         true,
         platform::errors::NotFound(
             "Input(LearningRate) of Momentum should not be null."));
-    PADDLE_ENFORCE_EQ(
-        ctx->GetInputsVarType("Param").front(),
-        framework::proto::VarType::LOD_TENSOR,
-        platform::errors::InvalidArgument(
-            "The input var's type should be LoDTensor, but the received is %s",
-            ctx->GetInputsVarType("Param").front()));
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(),
+                      framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
+                          "but the received is %s",
+                          ctx->GetInputsVarType("Param").front()));
 
     PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"),
                       true,
diff --git a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
index e285dc6d202..579bc76be5f 100644
--- a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
+++ b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
@@ -16,16 +16,15 @@ namespace paddle {
 namespace operators {
 
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 
 template <typename DeviceContext, typename T>
 class RMSPROPNPUKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
     auto *grad_var = ctx.InputVar("Grad");
-    auto *param_out = ctx.Output<LoDTensor>("ParamOut");
-    auto *moment_out = ctx.Output<LoDTensor>("MomentOut");
-    auto *mean_square_out = ctx.Output<LoDTensor>("MeanSquareOut");
+    auto *param_out = ctx.Output<phi::DenseTensor>("ParamOut");
+    auto *moment_out = ctx.Output<phi::DenseTensor>("MomentOut");
+    auto *mean_square_out = ctx.Output<phi::DenseTensor>("MeanSquareOut");
 
     param_out->mutable_data<T>(ctx.GetPlace());
     moment_out->mutable_data<T>(ctx.GetPlace());
@@ -34,17 +33,17 @@ class RMSPROPNPUKernel : public framework::OpKernel<T> {
     auto epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
     auto rho = static_cast<T>(ctx.Attr<float>("decay"));
     auto momentum = static_cast<T>(ctx.Attr<float>("momentum"));
-    auto *p_tensor = ctx.Input<LoDTensor>("Param");
-    auto *ms_tensor = ctx.Input<LoDTensor>("MeanSquare");
-    auto *lr_tensor = ctx.Input<LoDTensor>("LearningRate");
-    auto *mom_tensor = ctx.Input<LoDTensor>("Moment");
+    auto *p_tensor = ctx.Input<phi::DenseTensor>("Param");
+    auto *ms_tensor = ctx.Input<phi::DenseTensor>("MeanSquare");
+    auto *lr_tensor = ctx.Input<phi::DenseTensor>("LearningRate");
+    auto *mom_tensor = ctx.Input<phi::DenseTensor>("Moment");
     bool centered = ctx.Attr<bool>("centered");
 
     auto stream =
         ctx.template device_context<paddle::platform::NPUDeviceContext>()
             .stream();
-    if (grad_var->IsType<LoDTensor>()) {
-      auto *grad_tensor = ctx.Input<LoDTensor>("Grad");
+    if (grad_var->IsType<phi::DenseTensor>()) {
+      auto *grad_tensor = ctx.Input<phi::DenseTensor>("Grad");
       if (centered) {
         framework::NPUAttributeMap attr_input = {{"use_locking", false}};
         const Tensor *rho_tensor = nullptr;
diff --git a/paddle/fluid/operators/optimizers/sgd_op.cu b/paddle/fluid/operators/optimizers/sgd_op.cu
index 385e9a70e54..79cce5abdb0 100644
--- a/paddle/fluid/operators/optimizers/sgd_op.cu
+++ b/paddle/fluid/operators/optimizers/sgd_op.cu
@@ -72,7 +72,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
     PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                       true,
                       platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                           "but the received is %s",
                           ctx.InputNames("Param").front(),
                           paddle::framework::ToTypeName(param_var->Type())));
@@ -107,7 +107,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
             ? master_param_out->mutable_data<MPDType>(ctx.GetPlace())
             : nullptr;
 
-    // Actually, all tensors are LoDTensor except SelectedRows.
+    // Actually, all tensors are phi::DenseTensor except SelectedRows.
     if (grad_var->IsType<phi::DenseTensor>()) {
       auto* grad = ctx.Input<phi::DenseTensor>("Grad");
 
diff --git a/paddle/fluid/operators/optimizers/sgd_op.h b/paddle/fluid/operators/optimizers/sgd_op.h
index e3a2e6e17e9..ffdf1898f58 100644
--- a/paddle/fluid/operators/optimizers/sgd_op.h
+++ b/paddle/fluid/operators/optimizers/sgd_op.h
@@ -36,7 +36,7 @@ template <typename T>
 struct sgd_dense_param_kernel<T,
                               framework::VarTypeTrait<phi::DenseTensor>::kId> {
   void operator()(const framework::ExecutionContext &ctx) const {
-    VLOG(4) << "[CPU]: sgd_dense_param_kernel<T, LoDTensor>";
+    VLOG(4) << "[CPU]: sgd_dense_param_kernel<T, phi::DenseTensor>";
     const auto *learning_rate = ctx.Input<phi::DenseTensor>("LearningRate");
     const auto *param = ctx.Input<phi::DenseTensor>("Param");
     auto *param_out = ctx.Output<phi::DenseTensor>("ParamOut");
@@ -95,7 +95,7 @@ template <>
 struct sgd_dense_param_kernel<platform::bfloat16,
                               framework::VarTypeTrait<phi::DenseTensor>::kId> {
   void operator()(const framework::ExecutionContext &ctx) const {
-    VLOG(4) << "[CPU]: sgd_dense_param_kernel<bfloat16, LoDTensor>";
+    VLOG(4) << "[CPU]: sgd_dense_param_kernel<bfloat16, phi::DenseTensor>";
     const auto *learning_rate = ctx.Input<phi::DenseTensor>("LearningRate");
     const auto *param = ctx.Input<phi::DenseTensor>("Param");
     auto *param_out = ctx.Output<phi::DenseTensor>("ParamOut");
diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
index dbdf5863758..046269a396e 100644
--- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
+++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
@@ -90,7 +90,7 @@ class DistributedLookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Ids",
-             "(LoDTensor) Ids's type should be LoDTensor"
+             "(phi::DenseTensor) Ids's type should be phi::DenseTensor"
              "THe ids to be looked up in W.")
         .AsDuplicable();
 
@@ -98,8 +98,9 @@ class DistributedLookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
              "(Tensor) The input represents embedding tensors, "
              "which is a learnable parameter.");
 
-    AddOutput("Outputs",
-              "(LoDTensor) The lookup results, which have the same type as W.")
+    AddOutput(
+        "Outputs",
+        "(phi::DenseTensor) The lookup results, which have the same type as W.")
         .AsDuplicable();
 
     AddAttr<int>("table_id", "sparse table id").SetDefault(0);
diff --git a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
index a2bf63da10b..97391bc0e8b 100644
--- a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
+++ b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
@@ -63,22 +63,23 @@ class DistributedPushSparseOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Ids",
-             "(LoDTensor) Ids's type should be LoDTensor"
+             "(phi::DenseTensor) Ids's type should be phi::DenseTensor"
              "THe ids to be looked up in W.")
         .AsDuplicable();
 
     AddInput("Shows",
-             "(LoDTensor) Shows's type should be LoDTensor"
+             "(phi::DenseTensor) Shows's type should be phi::DenseTensor"
              "THe shows default to be 1.")
         .AsDuplicable();
 
     AddInput("Clicks",
-             "(LoDTensor) Clicks's type should be LoDTensor"
+             "(phi::DenseTensor) Clicks's type should be phi::DenseTensor"
              "THe clicks usually equal to label.")
         .AsDuplicable();
 
-    AddOutput("Outputs",
-              "(LoDTensor) The lookup results, which have the same type as W.")
+    AddOutput(
+        "Outputs",
+        "(phi::DenseTensor) The lookup results, which have the same type as W.")
         .AsDuplicable();
 
     AddAttr<int>("table_id", "sparse table id").SetDefault(0);
diff --git a/paddle/fluid/operators/pscore/fake_init_op.cc b/paddle/fluid/operators/pscore/fake_init_op.cc
index b59f5fd0306..cefd0ee5855 100644
--- a/paddle/fluid/operators/pscore/fake_init_op.cc
+++ b/paddle/fluid/operators/pscore/fake_init_op.cc
@@ -45,7 +45,7 @@ class FakeInitOp : public framework::OperatorBase {
     } else {
       PADDLE_THROW(platform::errors::InvalidArgument(
           "fake init op's output only"
-          "supports SelectedRows and LoDTensor"));
+          "supports SelectedRows and phi::DenseTensor"));
     }
   }
 };
diff --git a/paddle/fluid/operators/reader/create_py_reader_op.cc b/paddle/fluid/operators/reader/create_py_reader_op.cc
index 718298112fc..1e6ef050ae1 100644
--- a/paddle/fluid/operators/reader/create_py_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_py_reader_op.cc
@@ -109,7 +109,7 @@ class CreatePyReaderOpMaker : public FileReaderMakerBase {
         .SetDefault(1);
 
     AddComment(R"DOC(
-      Create PyReader to support LoDTensor data feeding in Python side.
+      Create PyReader to support phi::DenseTensor data feeding in Python side.
       )DOC");
   }
 };
diff --git a/paddle/fluid/operators/reader/read_op.cc b/paddle/fluid/operators/reader/read_op.cc
index ac8c4362f1a..1c65669adc3 100644
--- a/paddle/fluid/operators/reader/read_op.cc
+++ b/paddle/fluid/operators/reader/read_op.cc
@@ -171,7 +171,7 @@ class ReadOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
     AddInput("Reader", "(ReaderHolder) The executed reader.");
-    AddOutput("Out", "(LoDTensor) The output data.").AsDuplicable();
+    AddOutput("Out", "(phi::DenseTensor) The output data.").AsDuplicable();
     AddAttr<bool>(
         "throw_eof_exp",
         "If set true, an exception will be thrown when the Reader "
-- 
GitLab