replace LoDTensor with phi::DenseTensor in fluid\operators\*\ except sequence_ops (#48418)

30a31a53 · 张春乔 · GitHub · 8424cf28 · 30a31a53 · 30a31a53
106 changed file
--- a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
+++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
@@ -68,11 +68,11 @@ class CinnInstructionRunOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput(kX,
-             "(vector<LoDTensor>)"
+             "(vector<phi::DenseTensor>)"
             "which are the input arguments of this cinn instruction")
        .AsDuplicable();
    AddOutput(kOutputs,
-              "(vector<LoDTensor>)"
+              "(vector<phi::DenseTensor>)"
              "which are the output arguments of this cinn instruction")
        .AsDuplicable();
    AddAttr<int64_t>(

--- a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
@@ -74,7 +74,7 @@ class TestCinnInstructionRunOp : public ::testing::Test {
    // so a cinn_instruction_run_op will throw an error
    framework::Scope scope;
    InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
-    scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
    ASSERT_THROW(cinn_instruction_run_op->Run(scope, place),
                 paddle::platform::EnforceNotMet);
@@ -83,7 +83,7 @@ class TestCinnInstructionRunOp : public ::testing::Test {
    // of both type float and int
    cinn_launch_op->Run(scope, place);
    scope.EraseVars({"x", "y", test_op_out_name});
-    scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
    InitVariablesWithRandomValue<int>({"x", "y"}, {30, 40}, place, &scope);
    cinn_launch_op->Run(scope, place);
  }
@@ -92,8 +92,8 @@ class TestCinnInstructionRunOp : public ::testing::Test {
    // Run ops and check the computation results
    framework::Scope scope;
    InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
-    scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
-    scope.Var(add_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(add_op_out_name)->GetMutable<phi::DenseTensor>();
    elementwise_add_op->Run(scope, place);
    cinn_launch_op->Run(scope, place);
    CompareOpResult<float>(scope.GetVar(test_op_out_name),

--- a/paddle/fluid/operators/cinn/cinn_launch_context.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc
@@ -45,7 +45,6 @@
 namespace paddle {
 namespace operators::details {
-using LoDTensor = phi::DenseTensor;
 using framework::ParallelExecutor;
 using framework::Scope;
 using CinnInstruction = ::cinn::hlir::framework::Instruction;
@@ -268,7 +267,8 @@ void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) {
  // assign external malloc/free callbacks of cinn_buffer_t
  cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
      [this, var_name](void* ctx, cinn_buffer_t* buffer) {
-        auto* tensor = cached_scope_->GetVar(var_name)->GetMutable<LoDTensor>();
+        auto* tensor =
+            cached_scope_->GetVar(var_name)->GetMutable<phi::DenseTensor>();
        tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions));
        buffer->memory = reinterpret_cast<uint8_t*>(tensor->mutable_data(
            *cached_place_,
@@ -294,7 +294,7 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
  cinn_buffer->external_malloc = new std::function<int(void*, cinn_buffer_t*)>(
      [this, var_name](void* ctx, cinn_buffer_t* buffer) {
        auto* tensor =
-            cached_temp_scope_->Var(var_name)->GetMutable<LoDTensor>();
+            cached_temp_scope_->Var(var_name)->GetMutable<phi::DenseTensor>();
        tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions));
        buffer->memory = reinterpret_cast<uint8_t*>(tensor->mutable_data(
            *cached_place_,
@@ -306,8 +306,8 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
  // if no instruction use it
  cinn_buffer->external_free = new std::function<int(void*, cinn_buffer_t*)>(
      [this, var_name](void* ctx, cinn_buffer_t* buffer) {
-        auto* tensor =
+        auto* tensor = cached_temp_scope_->GetVar(var_name)
-            cached_temp_scope_->GetVar(var_name)->GetMutable<LoDTensor>();
+                           ->GetMutable<phi::DenseTensor>();
        tensor->clear();
        return 0;
      });
@@ -438,8 +438,8 @@ ParallelExecutor* CinnLaunchContext::InitializePE(const platform::Place& place,
    auto* var = scope->GetVar(var_name);
    auto* buffer = GetCinnBufferOfVar(var_name);
    auto dim = framework::DDim(buffer->dims, buffer->dimensions);
-    var->GetMutable<LoDTensor>()->Resize(dim);
+    var->GetMutable<phi::DenseTensor>()->Resize(dim);
-    var->GetMutable<LoDTensor>()->mutable_data(
+    var->GetMutable<phi::DenseTensor>()->mutable_data(
        place, framework::paddle2cinn::TransToPaddleDataType(buffer->type));
  }
  return parallel_executor_.get();

--- a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
@@ -40,7 +40,6 @@ USE_OP(cinn_instruction_run);
 namespace paddle {
 namespace operators::details {
-using LoDTensor = phi::DenseTensor;
 using framework::OpDesc;
 using framework::ParallelExecutor;
 using framework::ProgramDesc;
@@ -203,8 +202,8 @@ TEST_F(CinnLaunchContextTest, TestConstructResult) {
 TEST_F(CinnLaunchContextTest, TestCheckTensorEquivalent) {
  platform::CPUPlace place;
  framework::Scope scope;
-  auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
+  auto* tensor1 = scope.Var("var1")->GetMutable<phi::DenseTensor>();
-  auto* tensor2 = scope.Var("var2")->GetMutable<LoDTensor>();
+  auto* tensor2 = scope.Var("var2")->GetMutable<phi::DenseTensor>();
  // dimension not equivalent
  tensor1->mutable_data<float>(phi::make_ddim({3, 5}), place);
@@ -264,7 +263,7 @@ TEST_F(CinnLaunchContextTest, TestCallbackAssignment) {
  launch_context->UpdateCapturedEnv(scope, place);
  // assign external variables
-  auto* tensor1 = scope.Var("var1")->GetMutable<LoDTensor>();
+  auto* tensor1 = scope.Var("var1")->GetMutable<phi::DenseTensor>();
  float* data1 = tensor1->mutable_data<float>(phi::make_ddim({3, 4}), place);
  data1[0] = 9.99f;
  data1[10] = 19.99f;

--- a/paddle/fluid/operators/cinn/cinn_launch_op.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_op.cc
@@ -128,18 +128,18 @@ class CinnLaunchOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput(kX,
-             "(vector<LoDTensor>)"
+             "(vector<phi::DenseTensor>)"
             "which are the input of graph inside the CinnLaunchOp"
             "excluding kNoNeedBufferX.")
        .AsDuplicable();
    AddInput(kNoNeedBufferX,
-             "(vector<LoDTensor>)"
+             "(vector<phi::DenseTensor>)"
             "which are the input of graph inside the CinnLaunchOp but"
             "their buffer are not needed.")
        .AsDuplicable()
        .AsDispensable();
    AddOutput(kOutputs,
-              "(vector<LoDTensor>)"
+              "(vector<phi::DenseTensor>)"
              "which are the output of graph inside the CinnLaunchOp.")
        .AsDuplicable();
    AddAttr<int64_t>(

--- a/paddle/fluid/operators/cinn/cinn_launch_op.h
+++ b/paddle/fluid/operators/cinn/cinn_launch_op.h
@@ -34,7 +34,6 @@ DECLARE_bool(enable_pe_launch_cinn);
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using CinnCompiler = framework::paddle2cinn::CinnCompiler;
 using CinnCompiledObject = framework::paddle2cinn::CinnCompiledObject;
@@ -76,29 +75,30 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
            << "value:\n"
            << CinnCompiler::GetInstance()->ReadableKey(compilation_key);
-    std::map<std::string, const LoDTensor*> inputs_name2tensor;
+    std::map<std::string, const phi::DenseTensor*> inputs_name2tensor;
    std::vector<std::string> input_x_variable_names;
    std::vector<std::string> input_no_need_buffer_variable_names;
    auto add_name2tensor_fn =
-        [&inputs_name2tensor](const std::vector<std::string>& variable_names,
+        [&inputs_name2tensor](
-                              const std::vector<const LoDTensor*>& tensors) {
+            const std::vector<std::string>& variable_names,
+            const std::vector<const phi::DenseTensor*>& tensors) {
          std::transform(
              variable_names.begin(),
              variable_names.end(),
              tensors.begin(),
              std::inserter(inputs_name2tensor, inputs_name2tensor.end()),
-              [](const std::string& name, const LoDTensor* tensor) {
+              [](const std::string& name, const phi::DenseTensor* tensor) {
                return std::make_pair(name, tensor);
              });
        };
-    auto input_x_tensors = ctx.MultiInput<LoDTensor>(kX);
+    auto input_x_tensors = ctx.MultiInput<phi::DenseTensor>(kX);
    if (!input_x_tensors.empty()) {
      input_x_variable_names = std::move(ctx.InputNames(kX));
      add_name2tensor_fn(input_x_variable_names, input_x_tensors);
    }
    auto input_no_need_buffer_tensors =
-        ctx.MultiInput<LoDTensor>(kNoNeedBufferX);
+        ctx.MultiInput<phi::DenseTensor>(kNoNeedBufferX);
    if (!input_no_need_buffer_tensors.empty()) {
      input_no_need_buffer_variable_names =
          std::move(ctx.InputNames(kNoNeedBufferX));

--- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
@@ -78,8 +78,8 @@ class TestCinnLaunchOp : public ::testing::Test {
    // Run ops and check the computation results
    framework::Scope scope;
    InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
-    scope.Var(test_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
-    scope.Var(add_op_out_name)->GetMutable<LoDTensor>();
+    scope.Var(add_op_out_name)->GetMutable<phi::DenseTensor>();
    elementwise_add_op->Run(scope, place);
    cinn_launch_op->Run(scope, place);
    CompareOpResult<float>(scope.GetVar(test_op_out_name),

--- a/paddle/fluid/operators/cinn/test_helper.h
+++ b/paddle/fluid/operators/cinn/test_helper.h
@@ -29,7 +29,6 @@ limitations under the License. */
 namespace paddle::operators {
-using LoDTensor = phi::DenseTensor;
 using Variable = framework::Variable;
 using Graph = framework::ir::Graph;
 using Node = framework::ir::Node;
@@ -97,11 +96,11 @@ void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
  std::default_random_engine engine(seed());
  std::uniform_real_distribution<float> dist(0, 100);
-  LoDTensor tmp_tensor;
+  phi::DenseTensor tmp_tensor;
  auto* tmp_data =
      tmp_tensor.mutable_data<DataType>(common_ddim, platform::CPUPlace());
  for (const auto& var_name : var_names) {
-    auto* tensor = scope->Var(var_name)->GetMutable<LoDTensor>();
+    auto* tensor = scope->Var(var_name)->GetMutable<phi::DenseTensor>();
    tensor->mutable_data<DataType>(common_ddim, place);
    for (auto i = 0; i < tensor->numel(); ++i) {
      tmp_data[i] = static_cast<DataType>(dist(engine));
@@ -112,11 +111,12 @@ void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
 template <typename DataType>
 void CompareOpResult(Variable* test_out, Variable* expected_out) {
-  LoDTensor test_tensor, expected_tensor;
+  phi::DenseTensor test_tensor, expected_tensor;
  paddle::framework::TensorCopySync(
-      test_out->Get<LoDTensor>(), platform::CPUPlace(), &test_tensor);
+      test_out->Get<phi::DenseTensor>(), platform::CPUPlace(), &test_tensor);
-  paddle::framework::TensorCopySync(
+  paddle::framework::TensorCopySync(expected_out->Get<phi::DenseTensor>(),
-      expected_out->Get<LoDTensor>(), platform::CPUPlace(), &expected_tensor);
+                                    platform::CPUPlace(),
+                                    &expected_tensor);
  ASSERT_TRUE(test_tensor.IsInitialized());
  ASSERT_TRUE(expected_tensor.IsInitialized());

--- a/paddle/fluid/operators/collective/c_embedding_op.cc
+++ b/paddle/fluid/operators/collective/c_embedding_op.cc
@@ -162,7 +162,7 @@ class CEmbeddingOpGradVarTypeInference : public framework::VarTypeInference {
  void operator()(framework::InferVarTypeContext* ctx) const override {
    auto out_var_name = framework::GradVarName("W");
    VLOG(3) << "c_embedding_grad op " << framework::GradVarName("W")
-            << " is set to LoDTensor";
+            << " is set to phi::DenseTensor";
    ctx->SetOutputType(out_var_name, framework::proto::VarType::LOD_TENSOR);
    ctx->SetOutputDataType(out_var_name, ctx->GetInputDataType("W"));
  }

--- a/paddle/fluid/operators/collective/c_embedding_op.cu
+++ b/paddle/fluid/operators/collective/c_embedding_op.cu
@@ -86,9 +86,9 @@ template <typename T>
 class CEmbeddingCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
-    auto *table_t = context.Input<LoDTensor>("W");
+    auto *table_t = context.Input<phi::DenseTensor>("W");
-    auto *ids_t = context.Input<LoDTensor>("Ids");
+    auto *ids_t = context.Input<phi::DenseTensor>("Ids");
-    auto *output_t = context.Output<LoDTensor>("Out");
+    auto *output_t = context.Output<phi::DenseTensor>("Out");
    const auto &dev_ctx = context.template device_context<phi::GPUContext>();
    const int64_t start_idx = context.Attr<int64_t>("start_index");
@@ -142,9 +142,11 @@ class CEmbeddingGradCUDAKernel : public framework::OpKernel<T> {
  void Compute(const framework::ExecutionContext &context) const override {
    const auto &dev_ctx = context.template device_context<phi::GPUContext>();
    const int64_t start_idx = context.Attr<int64_t>("start_index");
-    auto ids_t = context.Input<LoDTensor>("Ids");
+    auto ids_t = context.Input<phi::DenseTensor>("Ids");
-    auto d_output_t = context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto d_output_t =
-    auto d_table_t = context.Output<LoDTensor>(framework::GradVarName("W"));
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+    auto d_table_t =
+        context.Output<phi::DenseTensor>(framework::GradVarName("W"));
    int N = d_table_t->dims()[0];
    int D = d_table_t->dims()[1];

--- a/paddle/fluid/operators/collective/c_embedding_op.h
+++ b/paddle/fluid/operators/collective/c_embedding_op.h
@@ -25,8 +25,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 inline void CheckTableValid() {}
 template <typename TIds, typename TData>
@@ -57,9 +55,9 @@ template <typename T>
 class CEmbeddingOpCPUKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* table_t = ctx.Input<LoDTensor>("W");
+    auto* table_t = ctx.Input<phi::DenseTensor>("W");
-    auto* ids_t = ctx.Input<LoDTensor>("Ids");
+    auto* ids_t = ctx.Input<phi::DenseTensor>("Ids");
-    auto* output_t = ctx.Output<LoDTensor>("Out");
+    auto* output_t = ctx.Output<phi::DenseTensor>("Out");
    const int64_t start_idx = ctx.Attr<int64_t>("start_index");
    VLOG(10) << "table_dims:" << table_t->dims();
@@ -119,10 +117,12 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    const int64_t start_idx = context.Attr<int64_t>("start_index");
-    auto ids_t = context.Input<LoDTensor>("Ids");
+    auto ids_t = context.Input<phi::DenseTensor>("Ids");
-    auto d_output_t = context.Input<LoDTensor>(framework::GradVarName("Out"));
+    auto d_output_t =
-    auto table_t = context.Input<LoDTensor>("W");
+        context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
-    auto table_grad_t = context.Output<LoDTensor>(framework::GradVarName("W"));
+    auto table_t = context.Input<phi::DenseTensor>("W");
+    auto table_grad_t =
+        context.Output<phi::DenseTensor>(framework::GradVarName("W"));
    T* table_grad_data =
        table_grad_t->mutable_data<T>(table_t->dims(), context.GetPlace());

--- a/paddle/fluid/operators/collective/c_embedding_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_embedding_op_npu.cc
@@ -111,9 +111,9 @@ void shard_index(const Tensor &table_t,
 template <typename TIds, typename T>
 void NPUGetIdsEmbedding(const framework::ExecutionContext &context) {
-  auto *table_t = context.Input<LoDTensor>("W");
+  auto *table_t = context.Input<phi::DenseTensor>("W");
-  auto *ids_t = context.Input<LoDTensor>("Ids");
+  auto *ids_t = context.Input<phi::DenseTensor>("Ids");
-  auto *output_t = context.Output<LoDTensor>("Out");
+  auto *output_t = context.Output<phi::DenseTensor>("Out");
  const int64_t start_idx = context.Attr<int64_t>("start_index");
  auto stream =
@@ -165,7 +165,7 @@ template <typename T>
 class CEmbeddingNPUKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
-    auto *ids_t = context.Input<LoDTensor>("Ids");
+    auto *ids_t = context.Input<phi::DenseTensor>("Ids");
    const auto &index_type = framework::TransToProtoVarType(ids_t->dtype());
    if (index_type == framework::proto::VarType::INT32) {
@@ -181,10 +181,12 @@ template <typename TIds, typename T>
 void NPUUpdateEmbedding(const framework::ExecutionContext &context) {
  // get inputs
  const int64_t start_idx = context.Attr<int64_t>("start_index");
-  auto ids_t = context.Input<LoDTensor>("Ids");
+  auto ids_t = context.Input<phi::DenseTensor>("Ids");
-  auto d_output_t = context.Input<LoDTensor>(framework::GradVarName("Out"));
+  auto d_output_t =
+      context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
  auto table_t = context.Input<phi::DenseTensor>("W");
-  auto table_grad_t = context.Output<LoDTensor>(framework::GradVarName("W"));
+  auto table_grad_t =
+      context.Output<phi::DenseTensor>(framework::GradVarName("W"));
  VLOG(10) << "ids_t:" << ids_t << ", d_output_t:" << d_output_t
           << ", table_t:" << table_t << ", table_grad_t" << table_grad_t;
@@ -243,7 +245,7 @@ template <typename T>
 class CEmbeddingGradNPUKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
-    auto *ids_t = context.Input<LoDTensor>("Ids");
+    auto *ids_t = context.Input<phi::DenseTensor>("Ids");
    const auto &index_type = framework::TransToProtoVarType(ids_t->dtype());
    if (index_type == framework::proto::VarType::INT32) {

--- a/paddle/fluid/operators/collective/c_embedding_op_xpu.cc
+++ b/paddle/fluid/operators/collective/c_embedding_op_xpu.cc
@@ -18,15 +18,13 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 template <typename DeviceContext, typename T>
 class CEmbeddingOpXPUKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* table_t = ctx.Input<LoDTensor>("W");
+    auto* table_t = ctx.Input<phi::DenseTensor>("W");
-    auto* ids_t = ctx.Input<LoDTensor>("Ids");
+    auto* ids_t = ctx.Input<phi::DenseTensor>("Ids");
-    auto* output_t = ctx.Output<LoDTensor>("Out");
+    auto* output_t = ctx.Output<phi::DenseTensor>("Out");
    const int64_t start_index = ctx.Attr<int64_t>("start_index");
    const T* table_data = table_t->data<T>();
    T* output_data = output_t->mutable_data<T>(ctx.GetPlace());

--- a/paddle/fluid/operators/controlflow/conditional_block_op.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc
@@ -363,13 +363,14 @@ class ConditionalBlockGradOp : public ConditionalOp {
      }
      if (input_var->IsType<phi::DenseTensor>()) {
-        PADDLE_ENFORCE_EQ(outside_var->IsType<phi::DenseTensor>(),
+        PADDLE_ENFORCE_EQ(
-                          true,
+            outside_var->IsType<phi::DenseTensor>(),
-                          platform::errors::InvalidArgument(
+            true,
-                              "Type of outside_var %s is NOT LoDTensor, which "
+            platform::errors::InvalidArgument(
-                              "doesn't match input_var %s.",
+                "Type of outside_var %s is NOT phi::DenseTensor, which "
-                              outside_grad_name,
+                "doesn't match input_var %s.",
-                              input_name));
+                outside_grad_name,
+                input_name));
        AssignZeroToOutsideTensor(place,
                                  scope,
                                  input_var->Get<phi::DenseTensor>(),
@@ -402,7 +403,8 @@ class ConditionalBlockGradOp : public ConditionalOp {
      } else {
        // TODO(huihuangzheng): add support for SelectedRows
        PADDLE_THROW(platform::errors::InvalidArgument(
-            "Conditional block grad op doesn't support non-LoDTensor output "
+            "Conditional block grad op doesn't support non-phi::DenseTensor "
+            "output "
            "now."));
      }
    }
@@ -475,9 +477,9 @@ class ConditionalBlockGradInferShape : public framework::InferShapeBase {
 class ConditionalBlockGradInferVarType : public framework::VarTypeInference {
 public:
  void operator()(framework::InferVarTypeContext *ctx) const override {
-    // NOTE(Aurelius84): VarType of Output is LoDTensor by default. In case of
+    // NOTE(Aurelius84): VarType of Output is phi::DenseTensor by default. In
-    // Input is {Tensor, LoDTensorArray}, we need synchronous the Input's
+    // case of Input is {Tensor, LoDTensorArray}, we need synchronous the
-    // VarType into Input@GRAD to avoid generating {Tensor, Tensor} as
+    // Input's VarType into Input@GRAD to avoid generating {Tensor, Tensor} as
    // Input@GRAD.
    auto input_size = ctx->InputSize(ConditionalOp::kInputs);
    auto output_size =

--- a/paddle/fluid/operators/controlflow/conditional_block_op_test.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op_test.cc
@@ -21,7 +21,6 @@ limitations under the License. */
 USE_NO_KERNEL_OP(conditional_block);
 USE_NO_KERNEL_OP(conditional_block_grad);
-using LoDTensor = phi::DenseTensor;
 using LoDTensorArray = paddle::framework::LoDTensorArray;
 using Scope = paddle::framework::Scope;
 using Variable = paddle::framework::Variable;
@@ -32,7 +31,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
  Scope scope;
  Variable* cond_var = scope.Var("condition");
-  LoDTensor* cond_tensor = cond_var->GetMutable<LoDTensor>();
+  phi::DenseTensor* cond_tensor = cond_var->GetMutable<phi::DenseTensor>();
  paddle::framework::DDim cond_dims = phi::make_ddim({1});
  bool* cond_data = cond_tensor->mutable_data<bool>(cond_dims, place);
  cond_data[0] = false;
@@ -41,7 +40,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
  LoDTensorArray* input_tensors = input_var->GetMutable<LoDTensorArray>();
  for (int i = 0; i < 5; ++i) {
    paddle::framework::DDim in_dims = phi::make_ddim({i + 1, i + 2});
-    LoDTensor lod_tensor;
+    phi::DenseTensor lod_tensor;
    float* in_data = lod_tensor.mutable_data<float>(in_dims, place);
    for (int j = 0; j < (i + 1) * (i + 2); ++j) {
      in_data[j] = static_cast<float>(j);

--- a/paddle/fluid/operators/controlflow/feed_op.cc
+++ b/paddle/fluid/operators/controlflow/feed_op.cc
@@ -29,7 +29,7 @@ namespace paddle {
 namespace operators {
 // FeedVariableVisitor is to feed the variable data
-// according to data type (LoDTensor or  Strings).
+// according to data type (phi::DenseTensor or  Strings).
 class FeedVariableVisitor {
 public:
  explicit FeedVariableVisitor(framework::Variable *out_var,
@@ -146,11 +146,11 @@ class FeedOpInfoMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
-             "(vector<LoDTensor>) "
+             "(vector<phi::DenseTensor>) "
-             "A feeding list of LoDTensor, which may have "
+             "A feeding list of phi::DenseTensor, which may have "
             "different dimension and data type.");
    AddOutput("Out",
-              "(LoDTensor) The LoDTensor which is a copy "
+              "(phi::DenseTensor) The phi::DenseTensor which is a copy "
              "of the col-th feeding "
              "object.");
    AddAttr<int>("col", "(int) The column index of current feeding object.");

--- a/paddle/fluid/operators/controlflow/fetch_op.cc
+++ b/paddle/fluid/operators/controlflow/fetch_op.cc
@@ -143,12 +143,14 @@ class FetchOpInfoMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
-             "(LoDTensor) The resulted LoDTensor which is expected to return "
+             "(phi::DenseTensor) The resulted phi::DenseTensor which is "
+             "expected to return "
             "to users.");
    AddOutput(
        "Out",
-        "(vector<LoDTensor>|unordered_map<string, int32_t>) A fetching list"
+        "(vector<phi::DenseTensor>|unordered_map<string, int32_t>) A fetching "
-        " of LoDTensor|unordered_map<string, int32_t> which may have "
+        "list"
+        " of phi::DenseTensor|unordered_map<string, int32_t> which may have "
        "different dimension, shape and data type.");
    AddAttr<int>("col", "(int) The column index of fetching object.");
    AddComment(R"DOC(

--- a/paddle/fluid/operators/controlflow/fetch_v2_op.cc
+++ b/paddle/fluid/operators/controlflow/fetch_v2_op.cc
@@ -201,10 +201,12 @@ class FetchV2OpProtoMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
-             "(LoDTensor) The resulted LoDTensor which is expected to return "
+             "(phi::DenseTensor) The resulted phi::DenseTensor which is "
+             "expected to return "
             "to users.");
    AddOutput("Out",
-              "(vector<LoDTensor>) A fetching list of LoDTensor which may have "
+              "(vector<phi::DenseTensor>) A fetching list of phi::DenseTensor "
+              "which may have "
              "different dimension, shape and data type.");
    AddAttr<int>("col", "(int) The column index of fetching object.");
    AddAttr<bool>("deepcopy", "(bool) Whether deep copy is required.")

--- a/paddle/fluid/operators/controlflow/logical_op.cc
+++ b/paddle/fluid/operators/controlflow/logical_op.cc
@@ -35,7 +35,7 @@ class BinaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
                             comment.type));
    AddOutput("Out", string::Sprintf("n-dim bool Variable"));
    AddComment(string::Sprintf(R"DOC(%s Operator
-It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim LoDTensor or Tensor.
+It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim phi::DenseTensor or Tensor.
 Each element of Out is calculated by %s
 )DOC",
                               comment.type,
@@ -49,13 +49,14 @@ class UnaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
  void Make() override {
    OpComment comment;
    AddInput("X",
-             string::Sprintf("Operand of %s operator. Must be "
+             string::Sprintf(
-                             "a LoDTensor or Tensor of type being one of bool, "
+                 "Operand of %s operator. Must be "
-                             "int8, int16, int32, int64, float32, float64.",
+                 "a phi::DenseTensor or Tensor of type being one of bool, "
-                             comment.type));
+                 "int8, int16, int32, int64, float32, float64.",
-    AddOutput("Out", string::Sprintf("n-dim bool LoDTensor or Tensor."));
+                 comment.type));
+    AddOutput("Out", string::Sprintf("n-dim bool phi::DenseTensor or Tensor."));
    AddComment(string::Sprintf(R"DOC(%s Operator
-It operates element-wise on X, and returns the Out. X and Out are N-dim LoDTensor or Tensor.
+It operates element-wise on X, and returns the Out. X and Out are N-dim phi::DenseTensor or Tensor.
 Each element of Out is calculated by %s
 )DOC",
                               comment.type,

--- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
+++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
@@ -67,7 +67,8 @@ class WriteToArrayOp : public ArrayOp {
 class WriteToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
-    AddInput("X", "(LoDTensor) the tensor will be written to tensor array");
+    AddInput("X",
+             "(phi::DenseTensor) the tensor will be written to tensor array");
    AddInput(
        "I",
        "(Tensor) the subscript index in tensor array. The number of element "
@@ -76,9 +77,9 @@ class WriteToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
    AddComment(R"DOC(
 WriteToArray Operator.
-This operator writes a LoDTensor to a LoDTensor array.
+This operator writes a phi::DenseTensor to a phi::DenseTensor array.
-Assume $T$ is LoDTensor, $i$ is the subscript of the array, and $A$ is the array. The
+Assume $T$ is phi::DenseTensor, $i$ is the subscript of the array, and $A$ is the array. The
 equation is
 $$A[i] = T$$
@@ -196,13 +197,13 @@ class ReadFromArrayProtoMaker : public framework::OpProtoAndCheckerMaker {
             "(Tensor) the writed tensor when used as the grad op of "
             "write_to_array. We use this to fill zero gradient.")
        .AsDispensable();
-    AddOutput("Out", "(LoDTensor) the tensor will be read from.");
+    AddOutput("Out", "(phi::DenseTensor) the tensor will be read from.");
    AddComment(R"DOC(
 ReadFromArray Operator.
-Read a LoDTensor from a LoDTensor Array.
+Read a phi::DenseTensor from a phi::DenseTensor Array.
-Assume $T$ is LoDTensor, $i$ is the subscript of the array, and $A$ is the array. The
+Assume $T$ is phi::DenseTensor, $i$ is the subscript of the array, and $A$ is the array. The
 equation is
 $$T = A[i]$$

--- a/paddle/fluid/operators/controlflow/while_op.cc
+++ b/paddle/fluid/operators/controlflow/while_op.cc
@@ -32,7 +32,6 @@ namespace paddle {
 namespace operators {
 using StepScopeVar = std::vector<framework::Scope *>;
-using LoDTensor = phi::DenseTensor;
 namespace {  // NOLINT
 static std::string GetSkipEagerDeletionVarsDebugString(
@@ -62,7 +61,7 @@ class WhileOp : public framework::OperatorBase {
                            platform::errors::NotFound(
                                "Input(Condition) of WhileOp is not found."));
-    auto &cond = scope.FindVar(Input(kCondition))->Get<LoDTensor>();
+    auto &cond = scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>();
    PADDLE_ENFORCE_EQ(
        cond.dims(),
        phi::make_ddim({1}),
@@ -149,9 +148,10 @@ class WhileOp : public framework::OperatorBase {
            framework::Variable *input_var = scope.FindVar(input_var_name);
            if (input_var->IsType<phi::DenseTensor>()) {
              rename_vars.push_back(input_var_rename);
-              auto input_var_tensor = input_var->Get<LoDTensor>();
+              auto input_var_tensor = input_var->Get<phi::DenseTensor>();
              auto *rename_input_var_tensor =
-                  current_scope.Var(input_var_rename)->GetMutable<LoDTensor>();
+                  current_scope.Var(input_var_rename)
+                      ->GetMutable<phi::DenseTensor>();
              framework::TensorCopy(
                  input_var_tensor, dev_place, rename_input_var_tensor);
              rename_input_var_tensor->set_lod(input_var_tensor.lod());
@@ -166,8 +166,8 @@ class WhileOp : public framework::OperatorBase {
              var_rename.substr(0, var_rename.size() - strlen(kSuffix));
          current_scope.Rename(var_rename, input_var_name);
        }
-        cond_data =
+        cond_data = GetCondData(
-            GetCondData(scope.FindVar(Input(kCondition))->Get<LoDTensor>());
+            scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>());
      }
    } else {
      auto &current_scope = scope.NewScope();
@@ -188,8 +188,8 @@ class WhileOp : public framework::OperatorBase {
        }
        executor.RunPreparedContext(
            ctx.get(), &current_scope, false, false, false);
-        cond_data =
+        cond_data = GetCondData(
-            GetCondData(scope.FindVar(Input(kCondition))->Get<LoDTensor>());
+            scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>());
      }
      scope.DeleteScope(&current_scope);
    }
@@ -325,7 +325,8 @@ class WhileGradOp : public framework::OperatorBase {
          }
        } else {
          PADDLE_THROW(platform::errors::Unimplemented(
-              "Currently only support LoDTensor and LoDTensorArray in "
+              "Currently only support phi::DenseTensor and "
+              "phi::DenseTensorArray in "
              "WhileGradOp."));
        }
      }
@@ -398,16 +399,16 @@ class WhileGradOp : public framework::OperatorBase {
                                         inside_grad_name));
          PADDLE_ENFORCE_EQ(
              var->IsType<framework::LoDTensorArray>() ||
-                  var->IsType<LoDTensor>(),
+                  var->IsType<phi::DenseTensor>(),
              true,
              platform::errors::InvalidArgument(
                  "Currently the type of var only can be LoDTensorArray, "
-                  "or LoDTensor, but the received var[%s] is %s.",
+                  "or phi::DenseTensor, but the received var[%s] is %s.",
                  inside_grad_name,
                  framework::ToTypeName(var->Type())));
          if ((var_iter == outside_og_names.end()) &&
-              var->IsType<LoDTensor>()) {
+              var->IsType<phi::DenseTensor>()) {
            auto &inside_tensor = var->Get<phi::DenseTensor>();
            framework::AttributeMap attrs;
            attrs["dtype"] =

--- a/paddle/fluid/operators/detection/bbox_util.cu.h
+++ b/paddle/fluid/operators/detection/bbox_util.cu.h
@@ -31,7 +31,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))

--- a/paddle/fluid/operators/detection/bipartite_match_op.cc
+++ b/paddle/fluid/operators/detection/bipartite_match_op.cc
@@ -19,7 +19,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 class BipartiteMatchOp : public framework::OperatorWithKernel {
 public:
@@ -196,7 +195,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
  }
  void Compute(const framework::ExecutionContext& context) const override {
-    auto* dist_mat = context.Input<LoDTensor>("DistMat");
+    auto* dist_mat = context.Input<phi::DenseTensor>("DistMat");
    auto* match_indices =
        context.Output<phi::DenseTensor>("ColToRowMatchIndices");
    auto* match_dist = context.Output<phi::DenseTensor>("ColToRowMatchDist");
@@ -251,7 +250,8 @@ class BipartiteMatchOpMaker : public framework::OpProtoAndCheckerMaker {
  void Make() override {
    AddInput(
        "DistMat",
-        "(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
+        "(phi::DenseTensor or Tensor) this input is a 2-D phi::DenseTensor "
+        "with shape "
        "[K, M]. It is pair-wise distance matrix between the entities "
        "represented by each row and each column. For example, assumed one "
        "entity is A with shape [K], another entity is B with shape [M]. The "
@@ -302,8 +302,8 @@ row entity to the column entity and the matched indices are not duplicated
 in each row of ColToRowMatchIndices. If the column entity is not matched
 any row entity, set -1 in ColToRowMatchIndices.
-Please note that the input DistMat can be LoDTensor (with LoD) or Tensor.
+Please note that the input DistMat can be phi::DenseTensor (with LoD) or Tensor.
-If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size.
+If phi::DenseTensor with LoD, the height of ColToRowMatchIndices is batch size.
 If Tensor, the height of ColToRowMatchIndices is 1.
 )DOC");

--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@@ -66,15 +66,15 @@ class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("Input",
-             "(LoDTensor) "
+             "(phi::DenseTensor) "
-             "Input is a LoDTensor with shape [..., 4] holds 4 points"
+             "Input is a phi::DenseTensor with shape [..., 4] holds 4 points"
             "in last dimension in format [xmin, ymin, xmax, ymax]");
    AddInput("ImInfo",
             "(Tensor) Information for image reshape is in shape (N, 3), "
             "in format (height, width, im_scale)");
    AddOutput("Output",
-              "(LoDTensor) "
+              "(phi::DenseTensor) "
-              "Output is a LoDTensor with the same shape as Input"
+              "Output is a phi::DenseTensor with the same shape as Input"
              "and it is the result after clip");
    AddComment(R"DOC(
 This operator clips input boxes to original input images.

--- a/paddle/fluid/operators/detection/box_clip_op.cu
+++ b/paddle/fluid/operators/detection/box_clip_op.cu
@@ -49,9 +49,9 @@ template <typename DeviceContext, typename T>
 class GPUBoxClipKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
-    auto *input = context.Input<LoDTensor>("Input");
+    auto *input = context.Input<phi::DenseTensor>("Input");
    auto *im_info = context.Input<phi::DenseTensor>("ImInfo");
-    auto *output = context.Output<LoDTensor>("Output");
+    auto *output = context.Output<phi::DenseTensor>("Output");
    const int64_t num = input->dims()[0];
    const int64_t bbox_width = input->numel() / num;
    auto lod = input->lod();

--- a/paddle/fluid/operators/detection/box_clip_op.h
+++ b/paddle/fluid/operators/detection/box_clip_op.h
@@ -20,15 +20,14 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 template <typename DeviceContext, typename T>
 class BoxClipKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
-    auto* input_box = context.Input<LoDTensor>("Input");
+    auto* input_box = context.Input<phi::DenseTensor>("Input");
-    auto* im_info = context.Input<LoDTensor>("ImInfo");
+    auto* im_info = context.Input<phi::DenseTensor>("ImInfo");
-    auto* output_box = context.Output<LoDTensor>("Output");
+    auto* output_box = context.Output<phi::DenseTensor>("Output");
    auto& dev_ctx = context.template device_context<phi::CPUContext>();
    output_box->mutable_data<T>(context.GetPlace());
    if (input_box->lod().size()) {

--- a/paddle/fluid/operators/detection/box_coder_op.cc
+++ b/paddle/fluid/operators/detection/box_coder_op.cc
@@ -44,7 +44,8 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
        .AsDispensable();
    AddInput(
        "TargetBox",
-        "(LoDTensor or Tensor) This input can be a 2-D LoDTensor with shape "
+        "(phi::DenseTensor or Tensor) This input can be a 2-D phi::DenseTensor "
+        "with shape "
        "[N, 4] when code_type is 'encode_center_size'. This input also can "
        "be a 3-D Tensor with shape [N, M, 4] when code_type is "
        "'decode_center_size'. [N, 4], each box is represented as "
@@ -79,7 +80,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
        "not be provided at the same time.")
        .SetDefault(std::vector<float>{});
    AddOutput("OutputBox",
-              "(LoDTensor or Tensor) "
+              "(phi::DenseTensor or Tensor) "
              "When code_type is 'encode_center_size', the output tensor of "
              "box_coder_op with shape [N, M, 4] representing the result of N "
              "target boxes encoded with M Prior boxes and variances. When "

--- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
+++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
@@ -14,8 +14,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 class BoxDecoderAndAssignOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
@@ -157,12 +155,12 @@ class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
             "default.")
        .AsDispensable();
    AddInput("TargetBox",
-             "(LoDTensor or Tensor) "
+             "(phi::DenseTensor or Tensor) "
-             "This input can be a 2-D LoDTensor with shape "
+             "This input can be a 2-D phi::DenseTensor with shape "
             "[N, classnum*4]. It holds N targets for N boxes.");
    AddInput("BoxScore",
-             "(LoDTensor or Tensor) "
+             "(phi::DenseTensor or Tensor) "
-             "This input can be a 2-D LoDTensor with shape "
+             "This input can be a 2-D phi::DenseTensor with shape "
             "[N, classnum], each box is represented as [classnum] which is "
             "the classification probabilities.");
    AddAttr<float>("box_clip",
@@ -170,12 +168,12 @@ class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
                   "clip box to prevent overflowing")
        .SetDefault(4.135f);
    AddOutput("DecodeBox",
-              "(LoDTensor or Tensor) "
+              "(phi::DenseTensor or Tensor) "
              "the output tensor of op with shape [N, classnum * 4] "
              "representing the result of N target boxes decoded with "
              "M Prior boxes and variances for each class.");
    AddOutput("OutputAssignBox",
-              "(LoDTensor or Tensor) "
+              "(phi::DenseTensor or Tensor) "
              "the output tensor of op with shape [N, 4] "
              "representing the result of N target boxes decoded with "
              "M Prior boxes and variances with the best non-background class "

--- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
+++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
@@ -17,7 +17,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 class CollectFpnProposalsOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
@@ -76,8 +75,8 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel {
            PADDLE_GET(framework::Variable *, roi_inputs[i]);
        framework::Variable *score_var =
            PADDLE_GET(framework::Variable *, score_inputs[i]);
-        auto &roi_lod = roi_var->Get<LoDTensor>().lod();
+        auto &roi_lod = roi_var->Get<phi::DenseTensor>().lod();
-        auto &score_lod = score_var->Get<LoDTensor>().lod();
+        auto &score_lod = score_var->Get<phi::DenseTensor>().lod();
        PADDLE_ENFORCE_EQ(
            roi_lod,
            score_lod,
@@ -101,11 +100,13 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("MultiLevelRois",
-             "(LoDTensor) Multiple roi LoDTensors from each level in shape "
+             "(phi::DenseTensor) Multiple roi phi::DenseTensors from each "
+             "level in shape "
             "(N, 4), N is the number of RoIs")
        .AsDuplicable();
    AddInput("MultiLevelScores",
-             "(LoDTensor) Multiple score LoDTensors from each level in shape"
+             "(phi::DenseTensor) Multiple score phi::DenseTensors from each "
+             "level in shape"
             " (N, 1), N is the number of RoIs.")
        .AsDuplicable();
    AddInput(
@@ -115,7 +116,8 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
        "images.")
        .AsDuplicable()
        .AsDispensable();
-    AddOutput("FpnRois", "(LoDTensor) All selected RoIs with highest scores");
+    AddOutput("FpnRois",
+              "(phi::DenseTensor) All selected RoIs with highest scores");
    AddOutput("RoisNum", "(Tensor), Number of RoIs in each images.")
        .AsDispensable();
    AddAttr<int>("post_nms_topN",

--- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
+++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
@@ -34,7 +34,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 static constexpr int kNumCUDAThreads = 64;
 static constexpr int kNumMaxinumNumBlocks = 4096;
@@ -58,9 +57,9 @@ template <typename DeviceContext, typename T>
 class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
-    const auto roi_ins = ctx.MultiInput<LoDTensor>("MultiLevelRois");
+    const auto roi_ins = ctx.MultiInput<phi::DenseTensor>("MultiLevelRois");
-    const auto score_ins = ctx.MultiInput<LoDTensor>("MultiLevelScores");
+    const auto score_ins = ctx.MultiInput<phi::DenseTensor>("MultiLevelScores");
-    auto fpn_rois = ctx.Output<LoDTensor>("FpnRois");
+    auto fpn_rois = ctx.Output<phi::DenseTensor>("FpnRois");
    auto& dev_ctx = ctx.template device_context<DeviceContext>();
    const int post_nms_topN = ctx.Attr<int>("post_nms_topN");

--- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h
+++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h
@@ -91,7 +91,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel<T> {
            "is %d",
            multi_layer_rois.size(),
            multi_layer_scores.size()));
-    // Check if the lod information of two LoDTensor is same
+    // Check if the lod information of two phi::DenseTensor is same
    const int num_fpn_level = multi_layer_rois.size();
    std::vector<int> integral_of_all_rois(num_fpn_level + 1, 0);
    for (int i = 0; i < num_fpn_level; ++i) {

--- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
+++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
@@ -37,12 +37,14 @@ class DistributeFpnProposalsOp : public framework::OperatorWithKernel {
 class DistributeFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
-    AddInput("FpnRois", "(LoDTensor) The RoIs at all levels in shape (-1, 4)");
+    AddInput("FpnRois",
+             "(phi::DenseTensor) The RoIs at all levels in shape (-1, 4)");
    AddInput("RoisNum",
             "(Tensor) The number of RoIs in shape (B),"
             "B is the number of images")
        .AsDispensable();
-    AddOutput("MultiFpnRois", "(LoDTensor) Output with distribute operator")
+    AddOutput("MultiFpnRois",
+              "(phi::DenseTensor) Output with distribute operator")
        .AsDuplicable();
    AddOutput("RestoreIndex",
              "(Tensor) An array of positive number which is "

--- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc
+++ b/paddle/fluid/operators/detection/generate_mask_labels_op.cc
@@ -26,11 +26,12 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 const int kBoxDim = 4;
 template <typename T>
-void AppendMask(LoDTensor* out, int64_t offset, phi::DenseTensor* to_add) {
+void AppendMask(phi::DenseTensor* out,
+                int64_t offset,
+                phi::DenseTensor* to_add) {
  auto* out_data = out->data<T>();
  auto* to_add_data = to_add->data<T>();
  memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
@@ -331,16 +332,16 @@ template <typename T>
 class GenerateMaskLabelsKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* im_info = ctx.Input<LoDTensor>("ImInfo");
+    auto* im_info = ctx.Input<phi::DenseTensor>("ImInfo");
-    auto* gt_classes = ctx.Input<LoDTensor>("GtClasses");
+    auto* gt_classes = ctx.Input<phi::DenseTensor>("GtClasses");
-    auto* is_crowd = ctx.Input<LoDTensor>("IsCrowd");
+    auto* is_crowd = ctx.Input<phi::DenseTensor>("IsCrowd");
-    auto* gt_segms = ctx.Input<LoDTensor>("GtSegms");
+    auto* gt_segms = ctx.Input<phi::DenseTensor>("GtSegms");
-    auto* rois = ctx.Input<LoDTensor>("Rois");
+    auto* rois = ctx.Input<phi::DenseTensor>("Rois");
-    auto* label_int32 = ctx.Input<LoDTensor>("LabelsInt32");
+    auto* label_int32 = ctx.Input<phi::DenseTensor>("LabelsInt32");
-    auto* mask_rois = ctx.Output<LoDTensor>("MaskRois");
+    auto* mask_rois = ctx.Output<phi::DenseTensor>("MaskRois");
-    auto* roi_has_mask_int32 = ctx.Output<LoDTensor>("RoiHasMaskInt32");
+    auto* roi_has_mask_int32 = ctx.Output<phi::DenseTensor>("RoiHasMaskInt32");
-    auto* mask_int32 = ctx.Output<LoDTensor>("MaskInt32");
+    auto* mask_int32 = ctx.Output<phi::DenseTensor>("MaskInt32");
    int num_classes = ctx.Attr<int>("num_classes");
    int resolution = ctx.Attr<int>("resolution");
@@ -463,17 +464,20 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
             "B is the number of input images, "
             "each element consists of im_height, im_width, im_scale.");
    AddInput("GtClasses",
-             "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
+             "(phi::DenseTensor), This input is a 2D phi::DenseTensor with "
+             "shape [M, 1]. "
             "M is the number of groundtruth, "
             "each element is a class label of groundtruth.");
    AddInput(
        "IsCrowd",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[M, 1]. "
        "M is the number of groundtruth, "
        "each element is a flag indicates whether a groundtruth is crowd.");
    AddInput(
        "GtSegms",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [S, 2], it's LoD "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[S, 2], it's LoD "
        "level is 3. The LoD[0] represents the gt objects number of each "
        "instance. LoD[1] represents the segmentation counts of each objects. "
        "LoD[2] represents the polygons number of each segmentation. S the "
@@ -481,24 +485,29 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
        "coordinate points.");
    AddInput(
        "Rois",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [R, 4]. "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[R, 4]. "
        "R is the number of rois which is the output of "
        "generate_proposal_labels, "
        "each element is a bounding box with (xmin, ymin, xmax, ymax) format.");
    AddInput("LabelsInt32",
-             "(LoDTensor), This intput is a 2D LoDTensor with shape [R, 1], "
+             "(phi::DenseTensor), This intput is a 2D phi::DenseTensor with "
+             "shape [R, 1], "
             "each element represents a class label of a roi");
    AddOutput(
        "MaskRois",
-        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
+        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
+        "[P, 4]. "
        "P is the number of mask, "
        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
    AddOutput("RoiHasMaskInt32",
-              "(LoDTensor), This output is a 2D LoDTensor with shape [P, 1], "
+              "(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
+              "shape [P, 1], "
              "each element represents the output mask rois index with regard "
              "to input rois");
    AddOutput("MaskInt32",
-              "(LoDTensor), This output is a 4D LoDTensor with shape [P, Q], "
+              "(phi::DenseTensor), This output is a 4D phi::DenseTensor with "
+              "shape [P, Q], "
              "Q equal to num_classes * resolution * resolution");
    AddAttr<int>("num_classes", "Class number.");

--- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
@@ -26,11 +26,12 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 const int kBoxDim = 4;
 template <typename T>
-void AppendRois(LoDTensor* out, int64_t offset, phi::DenseTensor* to_add) {
+void AppendRois(phi::DenseTensor* out,
+                int64_t offset,
+                phi::DenseTensor* to_add) {
  auto* out_data = out->data<T>();
  auto* to_add_data = to_add->data<T>();
  memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
@@ -513,19 +514,21 @@ template <typename T>
 class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
-    auto* rpn_rois = context.Input<LoDTensor>("RpnRois");
+    auto* rpn_rois = context.Input<phi::DenseTensor>("RpnRois");
-    auto* gt_classes = context.Input<LoDTensor>("GtClasses");
+    auto* gt_classes = context.Input<phi::DenseTensor>("GtClasses");
-    auto* is_crowd = context.Input<LoDTensor>("IsCrowd");
+    auto* is_crowd = context.Input<phi::DenseTensor>("IsCrowd");
-    auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
+    auto* gt_boxes = context.Input<phi::DenseTensor>("GtBoxes");
-    auto* im_info = context.Input<LoDTensor>("ImInfo");
+    auto* im_info = context.Input<phi::DenseTensor>("ImInfo");
-    auto* rois = context.Output<LoDTensor>("Rois");
+    auto* rois = context.Output<phi::DenseTensor>("Rois");
-    auto* labels_int32 = context.Output<LoDTensor>("LabelsInt32");
+    auto* labels_int32 = context.Output<phi::DenseTensor>("LabelsInt32");
-    auto* bbox_targets = context.Output<LoDTensor>("BboxTargets");
+    auto* bbox_targets = context.Output<phi::DenseTensor>("BboxTargets");
-    auto* bbox_inside_weights = context.Output<LoDTensor>("BboxInsideWeights");
+    auto* bbox_inside_weights =
+        context.Output<phi::DenseTensor>("BboxInsideWeights");
    auto* bbox_outside_weights =
-        context.Output<LoDTensor>("BboxOutsideWeights");
+        context.Output<phi::DenseTensor>("BboxOutsideWeights");
-    auto* max_overlap_with_gt = context.Output<LoDTensor>("MaxOverlapWithGT");
+    auto* max_overlap_with_gt =
+        context.Output<phi::DenseTensor>("MaxOverlapWithGT");
    int batch_size_per_im = context.Attr<int>("batch_size_per_im");
    float fg_fraction = context.Attr<float>("fg_fraction");
@@ -685,21 +688,25 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
  void Make() override {
    AddInput(
        "RpnRois",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [N, 4]. "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[N, 4]. "
        "N is the number of the GenerateProposalOp's output, "
        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
    AddInput("GtClasses",
-             "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
+             "(phi::DenseTensor), This input is a 2D phi::DenseTensor with "
+             "shape [M, 1]. "
             "M is the number of groundtruth, "
             "each element is a class label of groundtruth.");
    AddInput(
        "IsCrowd",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[M, 1]. "
        "M is the number of groundtruth, "
        "each element is a flag indicates whether a groundtruth is crowd.");
    AddInput(
        "GtBoxes",
-        "(LoDTensor), This input is a 2D LoDTensor with shape [M, 4]. "
+        "(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
+        "[M, 4]. "
        "M is the number of groundtruth, "
        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
    AddInput("ImInfo",
@@ -707,7 +714,8 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
             "B is the number of input images, "
             "each element consists of im_height, im_width, im_scale.");
    AddInput("MaxOverlap",
-             "(LoDTensor), This input is a 1D LoDTensor with shape [N]."
+             "(phi::DenseTensor), This input is a 1D phi::DenseTensor with "
+             "shape [N]."
             "N is the number of Input(RpnRois), "
             "each element is the maximum overlap between "
             "the proposal RoI and ground-truth.")
@@ -715,28 +723,34 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
    AddOutput(
        "Rois",
-        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
+        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
+        "[P, 4]. "
        "P usuall equal to  batch_size_per_im * batch_size, "
        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
    AddOutput("LabelsInt32",
-              "(LoDTensor), This output is a 2D LoDTensor with shape [P, 1], "
+              "(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
+              "shape [P, 1], "
              "each element represents a class label of a roi");
    AddOutput("BboxTargets",
-              "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
+              "(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
+              "shape [P, 4 * "
              "class_nums], "
              "each element represents a box label of a roi");
    AddOutput(
        "BboxInsideWeights",
-        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
+        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
+        "[P, 4 * "
        "class_nums], "
        "each element indicates whether a box should contribute to loss.");
    AddOutput(
        "BboxOutsideWeights",
-        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
+        "(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
+        "[P, 4 * "
        "class_nums], "
        "each element indicates whether a box should contribute to loss.");
    AddOutput("MaxOverlapWithGT",
-              "(LoDTensor), This output is a 1D LoDTensor with shape [P], "
+              "(phi::DenseTensor), This output is a 1D phi::DenseTensor with "
+              "shape [P], "
              "each element indicates the maxoverlap "
              "between output RoIs and ground-truth. "
              "The output RoIs may include ground-truth "

--- a/paddle/fluid/operators/detection/generate_proposals_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposals_op.cc
@@ -28,7 +28,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 class GenerateProposalsOp : public framework::OperatorWithKernel {
 public:
@@ -90,8 +89,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
                        "Variances",
                        "GenerateProposals");
-    auto *rpn_rois = context.Output<LoDTensor>("RpnRois");
+    auto *rpn_rois = context.Output<phi::DenseTensor>("RpnRois");
-    auto *rpn_roi_probs = context.Output<LoDTensor>("RpnRoiProbs");
+    auto *rpn_roi_probs = context.Output<phi::DenseTensor>("RpnRoiProbs");
    int pre_nms_top_n = context.Attr<int>("pre_nms_topN");
    int post_nms_top_n = context.Attr<int>("post_nms_topN");
@@ -288,9 +287,10 @@ class GenerateProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
             "(Tensor) Bounding box variances with same shape as `Anchors`.");
    AddOutput("RpnRois",
-              "(LoDTensor), Output proposals with shape (rois_num, 4).");
+              "(phi::DenseTensor), Output proposals with shape (rois_num, 4).");
-    AddOutput("RpnRoiProbs",
+    AddOutput(
-              "(LoDTensor) Scores of proposals with shape (rois_num, 1).");
+        "RpnRoiProbs",
+        "(phi::DenseTensor) Scores of proposals with shape (rois_num, 1).");
    AddOutput("RpnRoisNum", "(Tensor), The number of Rpn RoIs in each image")
        .AsDispensable();
    AddAttr<int>("pre_nms_topN",

--- a/paddle/fluid/operators/detection/generate_proposals_op.cu
+++ b/paddle/fluid/operators/detection/generate_proposals_op.cu
@@ -29,7 +29,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 namespace {
 template <typename T>
@@ -144,8 +143,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
                        "Variances",
                        "GenerateProposals");
-    auto *rpn_rois = context.Output<LoDTensor>("RpnRois");
+    auto *rpn_rois = context.Output<phi::DenseTensor>("RpnRois");
-    auto *rpn_roi_probs = context.Output<LoDTensor>("RpnRoiProbs");
+    auto *rpn_roi_probs = context.Output<phi::DenseTensor>("RpnRoiProbs");
    int pre_nms_top_n = context.Attr<int>("pre_nms_topN");
    int post_nms_top_n = context.Attr<int>("post_nms_topN");

--- a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc
@@ -30,7 +30,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 class GenerateProposalsV2Op : public framework::OperatorWithKernel {
 public:
@@ -65,9 +64,10 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
             "(Tensor) Bounding box variances with same shape as `Anchors`.");
    AddOutput("RpnRois",
-              "(LoDTensor), Output proposals with shape (rois_num, 4).");
+              "(phi::DenseTensor), Output proposals with shape (rois_num, 4).");
-    AddOutput("RpnRoiProbs",
+    AddOutput(
-              "(LoDTensor) Scores of proposals with shape (rois_num, 1).");
+        "RpnRoiProbs",
+        "(phi::DenseTensor) Scores of proposals with shape (rois_num, 1).");
    AddOutput("RpnRoisNum", "(Tensor), The number of Rpn RoIs in each image")
        .AsDispensable();
    AddAttr<int>("pre_nms_topN",

--- a/paddle/fluid/operators/detection/iou_similarity_op.cc
+++ b/paddle/fluid/operators/detection/iou_similarity_op.cc
@@ -59,17 +59,18 @@ class IOUSimilarityOp : public framework::OperatorWithKernel {
 class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
-    AddInput("X",
+    AddInput(
-             "(LoDTensor, default LoDTensor<float>) "
+        "X",
-             "Box list X is a 2-D LoDTensor with shape [N, 4] holds N boxes, "
+        "(phi::DenseTensor, default phi::DenseTensor<float>) "
-             "each box is represented as [xmin, ymin, xmax, ymax], "
+        "Box list X is a 2-D phi::DenseTensor with shape [N, 4] holds N boxes, "
-             "the shape of X is [N, 4]. [xmin, ymin] is the left top "
+        "each box is represented as [xmin, ymin, xmax, ymax], "
-             "coordinate of the box if the input is image feature map, they "
+        "the shape of X is [N, 4]. [xmin, ymin] is the left top "
-             "are close to the origin of the coordinate system. "
+        "coordinate of the box if the input is image feature map, they "
-             "[xmax, ymax] is the right bottom coordinate of the box. "
+        "are close to the origin of the coordinate system. "
-             "This tensor can contain LoD information to represent a batch "
+        "[xmax, ymax] is the right bottom coordinate of the box. "
-             "of inputs. One instance of this batch can contain different "
+        "This tensor can contain LoD information to represent a batch "
-             "numbers of entities.");
+        "of inputs. One instance of this batch can contain different "
+        "numbers of entities.");
    AddInput("Y",
             "(Tensor, default Tensor<float>) "
             "Box list Y holds M boxes, each box is represented as "
@@ -82,7 +83,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
                  "whether treat the priorbox as a normalized box")
        .SetDefault(true);
    AddOutput("Out",
-              "(LoDTensor, the lod is same as input X) The output of "
+              "(phi::DenseTensor, the lod is same as input X) The output of "
              "iou_similarity op, a tensor with shape [N, M] "
              "representing pairwise iou scores.");
@@ -90,7 +91,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
 **IOU Similarity Operator**
 Computes intersection-over-union (IOU) between two box lists.
-Box list 'X' should be a LoDTensor and 'Y' is a common Tensor,
+Box list 'X' should be a phi::DenseTensor and 'Y' is a common Tensor,
 boxes in 'Y' are shared by all instance of the batched inputs of X.
 Given two boxes A and B, the calculation of IOU is as follows:

--- a/paddle/fluid/operators/detection/locality_aware_nms_op.cc
+++ b/paddle/fluid/operators/detection/locality_aware_nms_op.cc
@@ -20,7 +20,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 class LocalityAwareNMSOp : public framework::OperatorWithKernel {
 public:
@@ -352,15 +351,15 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
  }
  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* boxes_input = ctx.Input<LoDTensor>("BBoxes");
+    auto* boxes_input = ctx.Input<phi::DenseTensor>("BBoxes");
-    auto* scores_input = ctx.Input<LoDTensor>("Scores");
+    auto* scores_input = ctx.Input<phi::DenseTensor>("Scores");
-    auto* outs = ctx.Output<LoDTensor>("Out");
+    auto* outs = ctx.Output<phi::DenseTensor>("Out");
    auto& score_dims = scores_input->dims();
    auto score_size = score_dims.size();
    auto& dev_ctx = ctx.template device_context<phi::CPUContext>();
-    LoDTensor scores;
+    phi::DenseTensor scores;
-    LoDTensor boxes;
+    phi::DenseTensor boxes;
    paddle::framework::TensorCopySync(
        *scores_input, platform::CPUPlace(), &scores);
    paddle::framework::TensorCopySync(
@@ -476,10 +475,12 @@ class LocalityAwareNMSOpMaker : public framework::OpProtoAndCheckerMaker {
                  "Whether detections are normalized.")
        .SetDefault(true);
    AddOutput("Out",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
+              "represents the "
              "detections. Each row has 6 values: "
              "[label, confidence, xmin, ymin, xmax, ymax] or "
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] "
+              "represents the "
              "detections. Each row has 10 values: "
              "[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
              "total number of detections in this mini-batch."
@@ -501,7 +502,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
 per image if keep_top_k is larger than -1.
 This operator support multi-class and batched inputs. It applying NMS
 independently for each class. The outputs is a 2-D LoDTenosr, for each
-image, the offsets in first dimension of LoDTensor are called LoD, the number
+image, the offsets in first dimension of phi::DenseTensor are called LoD, the number
 of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
 means there is no detected bbox for this image.

--- a/paddle/fluid/operators/detection/matrix_nms_op.cc
+++ b/paddle/fluid/operators/detection/matrix_nms_op.cc
@@ -21,7 +21,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 class MatrixNMSOp : public framework::OperatorWithKernel {
 public:
@@ -89,14 +88,16 @@ class MatrixNMSOpMaker : public framework::OpProtoAndCheckerMaker {
                   "when 'use_gaussian' is enabled.")
        .SetDefault(2.);
    AddOutput("Out",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
+              "represents the "
              "detections. Each row has 6 values: "
              "[label, confidence, xmin, ymin, xmax, ymax]. "
              "the offsets in first dimension are called LoD, the number of "
              "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
              "no detected bbox.");
    AddOutput("Index",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 1] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 1] "
+              "represents the "
              "index of selected bbox. The index is the absolute index cross "
              "batches.");
    AddOutput("RoisNum", "(Tensor), Number of RoIs in each images.")
@@ -113,7 +114,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
 per image if keep_top_k is larger than -1.
 This operator support multi-class and batched inputs. It applying NMS
 independently for each class. The outputs is a 2-D LoDTenosr, for each
-image, the offsets in first dimension of LoDTensor are called LoD, the number
+image, the offsets in first dimension of phi::DenseTensor are called LoD, the number
 of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
 means there is no detected bbox for this image. Now this operator has one more
 output, which is RoisNum. The size of RoisNum is N, RoisNum[i] means the number of

--- a/paddle/fluid/operators/detection/mine_hard_examples_op.cc
+++ b/paddle/fluid/operators/detection/mine_hard_examples_op.cc
@@ -363,15 +363,15 @@ class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker {
        .SetDefault("max_negative")
        .InEnum({"hard_example", "max_negative"});
-    AddOutput(
+    AddOutput("NegIndices",
-        "NegIndices",
+              "(phi::DenseTensor<int>) The output of negative example indices. "
-        "(LoDTensor<int>) The output of negative example indices. a LoDTensor "
+              "a phi::DenseTensor "
-        "with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
+              "with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
-        "and each element is the prior box index. "
+              "and each element is the prior box index. "
-        "For example, the batch size is 2, the lod is [[0, 1, 2]], "
+              "For example, the batch size is 2, the lod is [[0, 1, 2]], "
-        "the sample 0's box 1(MatchIndices[0][1]) is selected, "
+              "the sample 0's box 1(MatchIndices[0][1]) is selected, "
-        "and sample 1's box 0 is selected. The output NegIndices is "
+              "and sample 1's box 0 is selected. The output NegIndices is "
-        "[[1], [0]].");
+              "[[1], [0]].");
    AddOutput("UpdatedMatchIndices",
              "(Tensor<int>) The output of updated MatchIndices, a tensor with "

--- a/paddle/fluid/operators/detection/multiclass_nms_op.cc
+++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc
@@ -22,7 +22,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 inline std::vector<size_t> GetNmsLodFromRoisNum(
    const phi::DenseTensor* rois_num) {
@@ -357,11 +356,11 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
  }
  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* boxes = ctx.Input<LoDTensor>("BBoxes");
+    auto* boxes = ctx.Input<phi::DenseTensor>("BBoxes");
-    auto* scores = ctx.Input<LoDTensor>("Scores");
+    auto* scores = ctx.Input<phi::DenseTensor>("Scores");
-    auto* outs = ctx.Output<LoDTensor>("Out");
+    auto* outs = ctx.Output<phi::DenseTensor>("Out");
    bool return_index = ctx.HasOutput("Index") ? true : false;
-    auto index = ctx.Output<LoDTensor>("Index");
+    auto index = ctx.Output<phi::DenseTensor>("Index");
    bool has_roisnum = ctx.HasInput("RoisNum") ? true : false;
    auto rois_num = ctx.Input<phi::DenseTensor>("RoisNum");
    auto score_dims = scores->dims();
@@ -496,7 +495,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
             "predicted locations of M bounding bboxes, N is the batch size. "
             "Each bounding box has four coordinate values and the layout is "
             "[xmin, ymin, xmax, ymax], when box size equals to 4."
-             "2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]"
+             "2. (phi::DenseTensor) A 3-D Tensor with shape [M, C, 4]"
             "M is the number of bounding boxes, C is the class number");
    AddInput("Scores",
             "Two types of scores are supported:"
@@ -505,7 +504,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
             "class number, M is number of bounding boxes. For each category "
             "there are total M scores which corresponding M bounding boxes. "
             " Please note, M is equal to the 2nd dimension of BBoxes. "
-             "2. (LoDTensor) A 2-D LoDTensor with shape [M, C]. "
+             "2. (phi::DenseTensor) A 2-D phi::DenseTensor with shape [M, C]. "
             "M is the number of bbox, C is the class number. In this case, "
             "Input BBoxes should be the second case with shape [M, C, 4].");
    AddAttr<int>(
@@ -540,10 +539,12 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
                  "Whether detections are normalized.")
        .SetDefault(true);
    AddOutput("Out",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
+              "represents the "
              "detections. Each row has 6 values: "
              "[label, confidence, xmin, ymin, xmax, ymax] or "
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] "
+              "represents the "
              "detections. Each row has 10 values: "
              "[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
              "total number of detections in this mini-batch."
@@ -564,7 +565,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
 per image if keep_top_k is larger than -1.
 This operator support multi-class and batched inputs. It applying NMS
 independently for each class. The outputs is a 2-D LoDTenosr, for each
-image, the offsets in first dimension of LoDTensor are called LoD, the number
+image, the offsets in first dimension of phi::DenseTensor are called LoD, the number
 of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
 means there is no detected bbox for this image.
 )DOC");
@@ -600,7 +601,8 @@ class MultiClassNMS2OpMaker : public MultiClassNMSOpMaker {
  void Make() override {
    MultiClassNMSOpMaker::Make();
    AddOutput("Index",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 1] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 1] "
+              "represents the "
              "index of selected bbox. The index is the absolute index cross "
              "batches.")
        .AsIntermediate();

--- a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc
+++ b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc
@@ -19,7 +19,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 class RetinanetDetectionOutputOp : public framework::OperatorWithKernel {
 public:
@@ -490,8 +489,8 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
    auto boxes = ctx.MultiInput<phi::DenseTensor>("BBoxes");
    auto scores = ctx.MultiInput<phi::DenseTensor>("Scores");
    auto anchors = ctx.MultiInput<phi::DenseTensor>("Anchors");
-    auto* im_info = ctx.Input<LoDTensor>("ImInfo");
+    auto* im_info = ctx.Input<phi::DenseTensor>("ImInfo");
-    auto* outs = ctx.Output<LoDTensor>("Out");
+    auto* outs = ctx.Output<phi::DenseTensor>("Out");
    std::vector<Tensor> boxes_list(boxes.size());
    std::vector<Tensor> scores_list(scores.size());
@@ -586,7 +585,8 @@ class RetinanetDetectionOutputOpMaker
             "[xmin, ymin, xmax, ymax].")
        .AsDuplicable();
    AddInput("ImInfo",
-             "(LoDTensor) A 2-D LoDTensor with shape [N, 3] represents the "
+             "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [N, 3] "
+             "represents the "
             "image information. N is the batch size, each image information "
             "includes height, width and scale.");
    AddAttr<float>("score_threshold",
@@ -609,7 +609,8 @@ class RetinanetDetectionOutputOpMaker
        "Number of total bounding boxes to be kept per image after NMS "
        "step.");
    AddOutput("Out",
-              "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
+              "(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
+              "represents the "
              "detections. Each row has 6 values: "
              "[label, confidence, xmin, ymin, xmax, ymax]"
              "No is the total number of detections in this mini-batch."
@@ -650,7 +651,7 @@ After NMS step, at most keep_top_k number of total bounding boxes are to be kept
 per image if keep_top_k is larger than -1.
 This operator support multi-class and batched inputs. It applying NMS
 independently for each class. The outputs is a 2-D LoDTenosr, for each
-image, the offsets in first dimension of LoDTensor are called LoD, the number
+image, the offsets in first dimension of phi::DenseTensor are called LoD, the number
 of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
 means there is no detected bounding box for this image. If there is no detected boxes
 for all images, all the elements in LoD are set to 0, and the output tensor is

--- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc
+++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc
@@ -23,7 +23,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 template <typename T>
 bool GT_E(T a, T b) {
@@ -504,7 +503,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
        rois_dims.size(),
        2,
        platform::errors::InvalidArgument(
-            "ROIs should be a 2-D LoDTensor of shape (num_rois, 8)"
+            "ROIs should be a 2-D phi::DenseTensor of shape (num_rois, 8)"
            "given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]. But received "
            "rois dims is %d",
            rois_dims.size()));
@@ -512,7 +511,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
        rois_dims[1],
        8,
        platform::errors::InvalidArgument(
-            "ROIs should be a 2-D LoDTensor of shape (num_rois, 8)"
+            "ROIs should be a 2-D phi::DenseTensor of shape (num_rois, 8)"
            "given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]. But received %d",
            rois_dims[1]));
@@ -608,9 +607,9 @@ class ROIPerspectiveTransformOpMaker
             "H is the height of the feature, and "
             "W is the width of the feature.");
    AddInput("ROIs",
-             "(LoDTensor), "
+             "(phi::DenseTensor), "
             "ROIs (Regions of Interest) to be transformed. "
-             "should be a 2-D LoDTensor of shape (num_rois, 8)"
+             "should be a 2-D phi::DenseTensor of shape (num_rois, 8)"
             "given as [[x1, y1, x2, y2, x3, y3, x4, y4], ...]."
             "(x1, y1) is the top left coordinates, and "
             "(x2, y2) is the top right coordinates, and"

--- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc
+++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc
@@ -22,7 +22,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 template <typename T,
          int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
@@ -105,7 +104,9 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
 };
 template <typename T>
-void AppendRpns(LoDTensor* out, int64_t offset, phi::DenseTensor* to_add) {
+void AppendRpns(phi::DenseTensor* out,
+                int64_t offset,
+                phi::DenseTensor* to_add) {
  auto* out_data = out->data<T>();
  auto* to_add_data = to_add->data<T>();
  memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T));
@@ -395,15 +396,16 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* anchor = context.Input<phi::DenseTensor>("Anchor");  // (H*W*A) * 4
-    auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
+    auto* gt_boxes = context.Input<phi::DenseTensor>("GtBoxes");
-    auto* is_crowd = context.Input<LoDTensor>("IsCrowd");
+    auto* is_crowd = context.Input<phi::DenseTensor>("IsCrowd");
-    auto* im_info = context.Input<LoDTensor>("ImInfo");
+    auto* im_info = context.Input<phi::DenseTensor>("ImInfo");
-    auto* loc_index = context.Output<LoDTensor>("LocationIndex");
+    auto* loc_index = context.Output<phi::DenseTensor>("LocationIndex");
-    auto* score_index = context.Output<LoDTensor>("ScoreIndex");
+    auto* score_index = context.Output<phi::DenseTensor>("ScoreIndex");
-    auto* tgt_bbox = context.Output<LoDTensor>("TargetBBox");
+    auto* tgt_bbox = context.Output<phi::DenseTensor>("TargetBBox");
-    auto* tgt_lbl = context.Output<LoDTensor>("TargetLabel");
+    auto* tgt_lbl = context.Output<phi::DenseTensor>("TargetLabel");
-    auto* bbox_inside_weight = context.Output<LoDTensor>("BBoxInsideWeight");
+    auto* bbox_inside_weight =
+        context.Output<phi::DenseTensor>("BBoxInsideWeight");
    PADDLE_ENFORCE_EQ(gt_boxes->lod().size(),
                      1UL,
@@ -598,11 +600,11 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("Anchor",
             "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4].");
    AddInput("GtBoxes",
-             "(LoDTensor) input ground-truth bbox with shape [K, 4].");
+             "(phi::DenseTensor) input ground-truth bbox with shape [K, 4].");
    AddInput("IsCrowd",
-             "(LoDTensor) input which indicates ground-truth is crowd.");
+             "(phi::DenseTensor) input which indicates ground-truth is crowd.");
    AddInput("ImInfo",
-             "(LoDTensor) input image information with shape [N, 3]. "
+             "(phi::DenseTensor) input image information with shape [N, 3]. "
             "N is the batch size, each image information includes height, "
             "width and scale.");
    AddAttr<int>("rpn_batch_size_per_im",
@@ -685,13 +687,13 @@ class RetinanetTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("Anchor",
             "(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4].");
    AddInput("GtBoxes",
-             "(LoDTensor) input ground-truth bbox with shape [K, 4].");
+             "(phi::DenseTensor) input ground-truth bbox with shape [K, 4].");
    AddInput("GtLabels",
-             "(LoDTensor) input ground-truth label with shape [K, 1].");
+             "(phi::DenseTensor) input ground-truth label with shape [K, 1].");
    AddInput("IsCrowd",
-             "(LoDTensor) input which indicates ground-truth is crowd.");
+             "(phi::DenseTensor) input which indicates ground-truth is crowd.");
    AddInput("ImInfo",
-             "(LoDTensor) input image information with shape [N, 3]. "
+             "(phi::DenseTensor) input image information with shape [N, 3]. "
             "N is the batch size, each image information includes height, "
             "width and scale.");
    AddAttr<float>(
@@ -994,17 +996,18 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* anchor = context.Input<phi::DenseTensor>("Anchor");  // (H*W*A) * 4
-    auto* gt_boxes = context.Input<LoDTensor>("GtBoxes");
+    auto* gt_boxes = context.Input<phi::DenseTensor>("GtBoxes");
-    auto* gt_labels = context.Input<LoDTensor>("GtLabels");
+    auto* gt_labels = context.Input<phi::DenseTensor>("GtLabels");
-    auto* is_crowd = context.Input<LoDTensor>("IsCrowd");
+    auto* is_crowd = context.Input<phi::DenseTensor>("IsCrowd");
-    auto* im_info = context.Input<LoDTensor>("ImInfo");
+    auto* im_info = context.Input<phi::DenseTensor>("ImInfo");
-    auto* loc_index = context.Output<LoDTensor>("LocationIndex");
+    auto* loc_index = context.Output<phi::DenseTensor>("LocationIndex");
-    auto* score_index = context.Output<LoDTensor>("ScoreIndex");
+    auto* score_index = context.Output<phi::DenseTensor>("ScoreIndex");
-    auto* tgt_bbox = context.Output<LoDTensor>("TargetBBox");
+    auto* tgt_bbox = context.Output<phi::DenseTensor>("TargetBBox");
-    auto* tgt_lbl = context.Output<LoDTensor>("TargetLabel");
+    auto* tgt_lbl = context.Output<phi::DenseTensor>("TargetLabel");
-    auto* bbox_inside_weight = context.Output<LoDTensor>("BBoxInsideWeight");
+    auto* bbox_inside_weight =
-    auto* fg_num = context.Output<LoDTensor>("ForegroundNumber");
+        context.Output<phi::DenseTensor>("BBoxInsideWeight");
+    auto* fg_num = context.Output<phi::DenseTensor>("ForegroundNumber");
    PADDLE_ENFORCE_EQ(
        gt_boxes->lod().size(),

--- a/paddle/fluid/operators/detection/target_assign_op.cc
+++ b/paddle/fluid/operators/detection/target_assign_op.cc
@@ -89,7 +89,8 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
-             "(LoDTensor), This input is a 3D LoDTensor with shape [M, P, K]. "
+             "(phi::DenseTensor), This input is a 3D phi::DenseTensor with "
+             "shape [M, P, K]. "
             "Some elements in X will be assigned to Out based on the "
             "MatchIndices and NegIndices.");
    AddInput("MatchIndices",
@@ -97,7 +98,8 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
             "with shape [N, P], If MatchIndices[i][j] is -1, the j-th entity "
             "of column is not matched to any entity of row in i-th instance.");
    AddInput("NegIndices",
-             "(LoDTensor, default LoDTensor<int>), The input negative example "
+             "(phi::DenseTensor, default phi::DenseTensor<int>), The input "
+             "negative example "
             "indices are an optional input with shape [Neg, 1], where Neg is "
             "the total number of negative example indices.")
        .AsDispensable();

--- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc
@@ -31,15 +31,17 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker {
  std::string GetEquation() const override { return "Out = X + Y"; }
  void AddInputX() override {
-    AddInput("X",
+    AddInput(
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
+        "X",
-             "should be int32, int64, float32, float64.");
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
  }
  void AddInputY() override {
-    AddInput("Y",
+    AddInput(
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
+        "Y",
-             "should be int32, int64, float32, float64.");
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
  }
  std::string GetOpFuntionality() const override {

--- a/paddle/fluid/operators/elementwise/elementwise_div_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_div_op.cc
@@ -29,15 +29,17 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker {
  std::string GetEquation() const override { return "Out = X / Y"; }
  void AddInputX() override {
-    AddInput("X",
+    AddInput(
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
+        "X",
-             "should be int32, int64, float32, float64.");
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
  }
  void AddInputY() override {
-    AddInput("Y",
+    AddInput(
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
+        "Y",
-             "should be int32, int64, float32, float64.");
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
  }
  std::string GetOpFuntionality() const override {

--- a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
@@ -35,15 +35,17 @@ class ElementwiseFloorDivOpMaker : public ElementwiseOpMaker {
  std::string GetEquation() const override { return "Out = X // Y"; }
  void AddInputX() override {
-    AddInput("X",
+    AddInput(
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
+        "X",
-             "should be int32, int64.");
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64.");
  }
  void AddInputY() override {
-    AddInput("Y",
+    AddInput(
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
+        "Y",
-             "should be int32, int64.");
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64.");
  }
  std::string GetOpFuntionality() const override {

--- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cc
@@ -28,15 +28,17 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker {
  std::string GetEquation() const override { return "Out = X \\\\odot Y"; }
  void AddInputX() override {
-    AddInput("X",
+    AddInput(
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
+        "X",
-             "should be int32, int64, float32, float64.");
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
  }
  void AddInputY() override {
-    AddInput("Y",
+    AddInput(
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
+        "Y",
-             "should be int32, int64, float32, float64.");
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
  }
  std::string GetOpFuntionality() const override {

--- a/paddle/fluid/operators/elementwise/elementwise_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_op.h
@@ -43,13 +43,14 @@ class ElementwiseOp : public framework::OperatorWithKernel {
    OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "ElementwiseOp");
    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "ElementwiseOp");
-    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Y").front(),
+    PADDLE_ENFORCE_EQ(
-                      framework::proto::VarType::LOD_TENSOR,
+        ctx->GetInputsVarType("Y").front(),
-                      platform::errors::InvalidArgument(
+        framework::proto::VarType::LOD_TENSOR,
-                          "The input var's type should be LoDTensor, but the "
+        platform::errors::InvalidArgument(
-                          "received is %s [%s].",
+            "The input var's type should be phi::DenseTensor, but the "
-                          ctx->GetInputsVarType("Y").front(),
+            "received is %s [%s].",
-                          ctx->Inputs("Y").front()));
+            ctx->GetInputsVarType("Y").front(),
+            ctx->Inputs("Y").front()));
    if (ctx->GetInputsVarType("X").front() ==
        framework::proto::VarType::SELECTED_ROWS) {

--- a/paddle/fluid/operators/elementwise/elementwise_op_function.h
+++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h
@@ -61,11 +61,11 @@ namespace operators {
 /*
 *  Pack input and output tensors into respective vectors with
 *  consideration of varible X`s class type.
- *  Input variable X is supported to be whether LoDTensor or
+ *  Input variable X is supported to be whether phi::DenseTensor or
 *  SelectedRows class type in this package function, once X
 *  was SelectedRows type, a valid pointer x_for_selectedrows
 *  is excepted to be passed in from op kernel for acquisition
- *  of the valid address of LoDTensor created ahead in the function.
+ *  of the valid address of phi::DenseTensor created ahead in the function.
 */
 template <typename OutT>
 int PackTensorsIntoVector(const framework::ExecutionContext &ctx,
@@ -112,7 +112,7 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx,
  } else {
    PADDLE_THROW(platform::errors::InvalidArgument(
        "X's type[%s] is not supported by elementwise_op. X's type should be "
-        "LoDTensor or SelectedRows.",
+        "phi::DenseTensor or SelectedRows.",
        framework::ToTypeName(x_var->Type())));
  }
  z->mutable_data<OutT>(ctx.GetPlace());

--- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc
+++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc
@@ -34,15 +34,17 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker {
  std::string GetEquation() const override { return "Out = X - Y"; }
  void AddInputX() override {
-    AddInput("X",
+    AddInput(
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
+        "X",
-             "should be int32, int64, float32, float64.");
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
  }
  void AddInputY() override {
-    AddInput("Y",
+    AddInput(
-             "(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
+        "Y",
-             "should be int32, int64, float32, float64.");
+        "(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
+        "should be int32, int64, float32, float64.");
  }
  std::string GetOpFuntionality() const override {

--- a/paddle/fluid/operators/elementwise/elementwise_xpu.h
+++ b/paddle/fluid/operators/elementwise/elementwise_xpu.h
@@ -43,8 +43,8 @@ void XPUElementwise(const framework::ExecutionContext& ctx,
  PADDLE_ENFORCE_EQ(
      x_var->IsType<phi::DenseTensor>(),
      true,
-      platform::errors::InvalidArgument(
+      platform::errors::InvalidArgument("XPU only support phi::DenseTensor, "
-          "XPU only support LoDTensor, Input(X) is not LoDTensor"));
+                                        "Input(X) is not phi::DenseTensor"));
  auto x = x_var->Get<phi::DenseTensor>();
  auto* y = ctx.Input<phi::DenseTensor>("Y");

--- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc
+++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc
@@ -24,8 +24,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const {
  PADDLE_ENFORCE_EQ(ctx->HasInput("X"),
                    true,
@@ -309,8 +307,8 @@ framework::OpKernelType FusedBatchNormActGradOp::GetExpectedKernelType(
  const Tensor *t = nullptr;
  if (var->IsType<Tensor>()) {
    t = &var->Get<Tensor>();
-  } else if (var->IsType<LoDTensor>()) {
+  } else if (var->IsType<phi::DenseTensor>()) {
-    t = &var->Get<LoDTensor>();
+    t = &var->Get<phi::DenseTensor>();
  }
  if (t == nullptr) {
    PADDLE_THROW(

--- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
+++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
@@ -23,8 +23,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 void FusedBatchNormAddActOp::InferShape(
    framework::InferShapeContext *ctx) const {
  OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FusedBatchNormAddActOp");
@@ -267,8 +265,8 @@ framework::OpKernelType FusedBatchNormAddActGradOp::GetExpectedKernelType(
  const Tensor *t = nullptr;
  if (var->IsType<Tensor>()) {
    t = &var->Get<Tensor>();
-  } else if (var->IsType<LoDTensor>()) {
+  } else if (var->IsType<phi::DenseTensor>()) {
-    t = &var->Get<LoDTensor>();
+    t = &var->Get<phi::DenseTensor>();
  }
  if (t == nullptr) {
    PADDLE_THROW(

--- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
+++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
@@ -213,23 +213,25 @@ void FusedEmbeddingFCLSTMOpMaker::Make() {
           "input. This is a tensor with shape (N x D), where N is the "
           "batch size. `H0` and `C0` can be NULL but only at the same time.")
      .AsDispensable();
-  AddOutput("Hidden",
+  AddOutput(
-            "(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. "
+      "Hidden",
-            "The shape is (T x D), and lod is the same with the `Input`.");
+      "(phi::DenseTensor) (same as LSTMOp) the hidden state of LSTM operator. "
-  AddOutput("Cell",
+      "The shape is (T x D), and lod is the same with the `Input`.");
-            "(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. "
+  AddOutput(
-            "The shape is (T x D), and lod is the same with the `Input`.");
+      "Cell",
+      "(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
+      "The shape is (T x D), and lod is the same with the `Input`.");
  AddOutput("XX",
-            "(LoDTensor) the result after X * WeightX (size is T x 4D)"
+            "(phi::DenseTensor) the result after X * WeightX (size is T x 4D)"
            " or batched_X (size is T x M), this will be automatically chosen,"
            " where T is the total time steps in this mini-batch,"
            " D is the hidden size, M is the dim size of x input.")
      .AsIntermediate();
-  AddOutput("BatchedInput", "(LoDTensor) (T x 4D).").AsIntermediate();
+  AddOutput("BatchedInput", "(phi::DenseTensor) (T x 4D).").AsIntermediate();
-  AddOutput("BatchedHidden", "(LoDTensor) (T x D).").AsIntermediate();
+  AddOutput("BatchedHidden", "(phi::DenseTensor) (T x D).").AsIntermediate();
-  AddOutput("BatchedCell", "(LoDTensor) (T x D).").AsIntermediate();
+  AddOutput("BatchedCell", "(phi::DenseTensor) (T x D).").AsIntermediate();
-  AddOutput("ReorderedH0", "(LoDTensor) (N x D).").AsIntermediate();
+  AddOutput("ReorderedH0", "(phi::DenseTensor) (N x D).").AsIntermediate();
-  AddOutput("ReorderedC0", "(LoDTensor) (N x D).").AsIntermediate();
+  AddOutput("ReorderedC0", "(phi::DenseTensor) (N x D).").AsIntermediate();
  AddAttr<bool>("use_peepholes",
                "(bool, default: True) "
                "whether to enable diagonal/peephole connections.")
@@ -286,15 +288,15 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
  }
 #define INIT_BASE_INPUT_OUTPUT                                  \
-  auto* ids = ctx.Input<LoDTensor>("Ids");                      \
+  auto* ids = ctx.Input<phi::DenseTensor>("Ids");               \
  auto* h0 = ctx.Input<phi::DenseTensor>("H0");                 \
  auto* c0 = ctx.Input<phi::DenseTensor>("C0");                 \
  auto* embeddings = ctx.Input<phi::DenseTensor>("Embeddings"); \
  auto* wh = ctx.Input<phi::DenseTensor>("WeightH");            \
  auto* bias = ctx.Input<phi::DenseTensor>("Bias");             \
-  auto* xx = ctx.Output<LoDTensor>("XX");                       \
+  auto* xx = ctx.Output<phi::DenseTensor>("XX");                \
-  auto* hidden_out = ctx.Output<LoDTensor>("Hidden");           \
+  auto* hidden_out = ctx.Output<phi::DenseTensor>("Hidden");    \
-  auto* cell_out = ctx.Output<LoDTensor>("Cell");               \
+  auto* cell_out = ctx.Output<phi::DenseTensor>("Cell");        \
  bool is_reverse = ctx.Attr<bool>("is_reverse");               \
  bool use_peepholes = ctx.Attr<bool>("use_peepholes");
@@ -508,9 +510,9 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
    auto* reordered_h0 = ctx.Output<phi::DenseTensor>("ReorderedH0");
    auto* reordered_c0 = ctx.Output<phi::DenseTensor>("ReorderedC0");
-    auto* batched_input = ctx.Output<LoDTensor>("BatchedInput");
+    auto* batched_input = ctx.Output<phi::DenseTensor>("BatchedInput");
-    auto* batched_c_out = ctx.Output<LoDTensor>("BatchedCell");
+    auto* batched_c_out = ctx.Output<phi::DenseTensor>("BatchedCell");
-    auto* batched_h_out = ctx.Output<LoDTensor>("BatchedHidden");
+    auto* batched_h_out = ctx.Output<phi::DenseTensor>("BatchedHidden");
    T* xx_data = xx->mutable_data<T>(place);
    T* batched_input_data = batched_input->mutable_data<T>(place);
    T* batched_c_out_data = batched_c_out->mutable_data<T>(place);

--- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
+++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 class FusedEmbeddingFCLSTMOp : public framework::OperatorWithKernel {

--- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
+++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
@@ -162,7 +162,7 @@ class FusedEmbeddingSeqPoolOpGradVarTypeInference
                         framework::proto::VarType::SELECTED_ROWS);
    } else {
      VLOG(3) << "fused_embedding_seq_pool_grad op "
-              << framework::GradVarName("W") << " is set to LoDTensor";
+              << framework::GradVarName("W") << " is set to phi::DenseTensor";
      ctx->SetOutputType(out_var_name, framework::proto::VarType::LOD_TENSOR);
    }
    ctx->SetOutputDataType(out_var_name, ctx->GetInputDataType("W"));

--- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
@@ -29,7 +29,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 using SelectedRows = phi::SelectedRows;
 using DDim = framework::DDim;
@@ -83,9 +82,9 @@ void prepare_csr_data(const std::vector<uint64_t> &offset,
 template <typename T>
 struct EmbeddingVSumFunctor {
  void operator()(const framework::ExecutionContext &context,
-                  const LoDTensor *table_t,
+                  const phi::DenseTensor *table_t,
-                  const LoDTensor *ids_t,
+                  const phi::DenseTensor *ids_t,
-                  LoDTensor *output_t) {
+                  phi::DenseTensor *output_t) {
    auto *table = table_t->data<T>();
    int64_t table_height = table_t->dims()[0];
    int64_t table_width = table_t->dims()[1];
@@ -141,9 +140,11 @@ template <typename T>
 class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
-    const LoDTensor *ids_t = context.Input<LoDTensor>("Ids");  // int tensor
+    const phi::DenseTensor *ids_t =
-    LoDTensor *output_t = context.Output<LoDTensor>("Out");    // float tensor
+        context.Input<phi::DenseTensor>("Ids");  // int tensor
-    const LoDTensor *table_var = context.Input<LoDTensor>("W");
+    phi::DenseTensor *output_t =
+        context.Output<phi::DenseTensor>("Out");  // float tensor
+    const phi::DenseTensor *table_var = context.Input<phi::DenseTensor>("W");
    const std::string &combiner_type = context.Attr<std::string>("combiner");
    int64_t last_dim =
@@ -228,23 +229,24 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
  void Compute(const framework::ExecutionContext &context) const override {
    auto *table_var = context.InputVar("W");
    DDim table_dim;
-    if (table_var->IsType<LoDTensor>()) {
+    if (table_var->IsType<phi::DenseTensor>()) {
-      table_dim = context.Input<LoDTensor>("W")->dims();
+      table_dim = context.Input<phi::DenseTensor>("W")->dims();
    } else if (table_var->IsType<phi::SelectedRows>()) {
      auto *table_t = context.Input<phi::SelectedRows>("W");
      table_dim = table_t->value().dims();
    } else {
      PADDLE_THROW(platform::errors::PermissionDenied(
          "The parameter W of a LookupTable "
-          "must be either LoDTensor or SelectedRows."));
+          "must be either phi::DenseTensor or SelectedRows."));
    }
    bool is_sparse = context.Attr<bool>("is_sparse");
    // Since paddings are not trainable and fixed in forward, the gradient of
    // paddings makes no sense and we don't deal with it in backward.
    if (is_sparse) {
-      auto *ids = context.Input<LoDTensor>("Ids");
+      auto *ids = context.Input<phi::DenseTensor>("Ids");
-      auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
+      auto *d_output =
+          context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
      auto *d_table =
          context.Output<phi::SelectedRows>(framework::GradVarName("W"));
      // runtime shape
@@ -276,9 +278,11 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
    } else {
 #if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
    !defined(__OSX__)
-      auto *ids = context.Input<LoDTensor>("Ids");
+      auto *ids = context.Input<phi::DenseTensor>("Ids");
-      auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
+      auto *d_output =
-      auto *d_table = context.Output<LoDTensor>(framework::GradVarName("W"));
+          context.Input<phi::DenseTensor>(framework::GradVarName("Out"));
+      auto *d_table =
+          context.Output<phi::DenseTensor>(framework::GradVarName("W"));
      int64_t padding_idx = context.Attr<int64_t>("padding_idx");
      d_table->Resize(table_dim);

--- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
+++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
@@ -95,7 +95,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel {
 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
-    auto inputs = ctx.MultiInput<LoDTensor>("X");
+    auto inputs = ctx.MultiInput<phi::DenseTensor>("X");
    auto input_data_type = framework::proto::VarType::Type(0);
    bool flag = 0;
    for (auto* input : inputs) {
@@ -121,7 +121,7 @@ class FusedSeqpoolCVMOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
-             "(vector<LoDTensor>) The input tensors of"
+             "(vector<phi::DenseTensor>) The input tensors of"
             " operator.")
        .AsDuplicable();
    AddInput("CVM",

--- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
+++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
@@ -424,7 +424,7 @@ template <typename T>
 class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
-    auto inputs = ctx.MultiInput<LoDTensor>("X");
+    auto inputs = ctx.MultiInput<phi::DenseTensor>("X");
    auto outputs = ctx.MultiOutput<phi::DenseTensor>("Out");
    auto &dev_ctx = ctx.template device_context<phi::GPUContext>();
    const auto slot_size = inputs.size();
@@ -432,7 +432,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
    std::vector<const size_t *> lods_data(slot_size);
    std::vector<T *> output_data(slot_size);
-    std::vector<LoDTensor> seqpool_outputs(slot_size);
+    std::vector<phi::DenseTensor> seqpool_outputs(slot_size);
    std::vector<T *> seqpool_output_data(slot_size);
    auto padding_value = ctx.Attr<float>("pad_value");
@@ -509,9 +509,11 @@ template <typename T>
 class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
-    auto out_grads = ctx.MultiInput<LoDTensor>(framework::GradVarName("Out"));
+    auto out_grads =
-    auto in_grads = ctx.MultiOutput<LoDTensor>(framework::GradVarName("X"));
+        ctx.MultiInput<phi::DenseTensor>(framework::GradVarName("Out"));
-    auto *cvm = ctx.Input<LoDTensor>("CVM");
+    auto in_grads =
+        ctx.MultiOutput<phi::DenseTensor>(framework::GradVarName("X"));
+    auto *cvm = ctx.Input<phi::DenseTensor>("CVM");
    auto &dev_ctx = ctx.template device_context<phi::GPUContext>();
    std::string pooltype = ctx.Attr<std::string>("pooltype");
    auto use_cvm = ctx.Attr<bool>("use_cvm");

--- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
+++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
@@ -23,8 +23,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 template <typename T>
 class FusedSeqpoolCVMOpCPUKernel : public framework::OpKernel<T> {
 public:

--- a/paddle/fluid/operators/fused/fusion_group_op.cc
+++ b/paddle/fluid/operators/fused/fusion_group_op.cc
@@ -87,10 +87,10 @@ class FusionGroupOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("Inputs",
-             "(std::vector<LoDTensor>) The inputs of fusion_group op.")
+             "(std::vector<phi::DenseTensor>) The inputs of fusion_group op.")
        .AsDuplicable();
    AddOutput("Outs",
-              "(std::vector<LoDTensor>) The outputs of fusion_group op.")
+              "(std::vector<phi::DenseTensor>) The outputs of fusion_group op.")
        .AsDuplicable();
    AddAttr<std::vector<int>>("outs_dtype",
                              "The data type of Outputs in fusion_group op.")

--- a/paddle/fluid/operators/fused/fusion_gru_op.cc
+++ b/paddle/fluid/operators/fused/fusion_gru_op.cc
@@ -154,11 +154,12 @@ framework::OpKernelType FusionGRUOp::GetExpectedKernelType(
 }
 void FusionGRUOpMaker::Make() {
-  AddInput("X",
+  AddInput(
-           "(LoDTensor) the input is a LodTensor, which support "
+      "X",
-           "variable-time length input sequence. The underlying tensor in "
+      "(phi::DenseTensor) the input is a LodTensor, which support "
-           "this LoDTensor is a matrix with shape (T X M), where T is the "
+      "variable-time length input sequence. The underlying tensor in "
-           "total time steps in this mini-batch, M is the dim size of x.");
+      "this phi::DenseTensor is a matrix with shape (T X M), where T is the "
+      "total time steps in this mini-batch, M is the dim size of x.");
  AddInput("H0",
           "(Tensor, optional) The initial hidden state is an optional "
           "input. This is a tensor with shape (N x D), where N is the "
@@ -181,18 +182,18 @@ void FusionGRUOpMaker::Make() {
  AddOutput("ReorderedH0", "(Tensor) (N x D), which N is the min-batch size.")
      .AsIntermediate();
  AddOutput("XX",
-            "(LoDTensor) the result after X * WeightX (size is T x 3D)"
+            "(phi::DenseTensor) the result after X * WeightX (size is T x 3D)"
            " or batched_X (size is T x M), this will be automatically chosen,"
            " where T is the total time steps in this mini-batch,"
            " D is the hidden size, M is the dim size of x input.")
      .AsIntermediate();
  AddOutput("BatchedInput",
-            "(LoDTensor) This is the batched result of input X"
+            "(phi::DenseTensor) This is the batched result of input X"
            "or the batched result after fc, shape (T x 3D)")
      .AsIntermediate();
-  AddOutput("BatchedOut", "(LoDTensor) (T X D) save batched hidden.")
+  AddOutput("BatchedOut", "(phi::DenseTensor) (T X D) save batched hidden.")
      .AsIntermediate();
-  AddOutput("Hidden", "(LoDTensor) (T x D) Same as GRUOp");
+  AddOutput("Hidden", "(phi::DenseTensor) (T x D) Same as GRUOp");
  AddAttr<std::string>("activation",
                       "(string, default tanh) "
                       "The activation type used for output candidate {h}_t.")
@@ -257,9 +258,9 @@ class FusionGRUKernel : public framework::OpKernel<T> {
  }
 #define INIT_BASE_DEFINES                                  \
-  auto* x = ctx.Input<LoDTensor>("X");                     \
+  auto* x = ctx.Input<phi::DenseTensor>("X");              \
  auto* wh = ctx.Input<phi::DenseTensor>("WeightH");       \
-  auto* xx = ctx.Output<LoDTensor>("XX");                  \
+  auto* xx = ctx.Output<phi::DenseTensor>("XX");           \
  auto x_lod = x->lod();                                   \
  auto x_dims = x->dims(); /* T x M*/                      \
  auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) \
@@ -273,7 +274,7 @@ class FusionGRUKernel : public framework::OpKernel<T> {
  auto* h0 = ctx.Input<phi::DenseTensor>("H0");                              \
  auto* wx = ctx.Input<phi::DenseTensor>("WeightX");                         \
  auto* bias = ctx.Input<phi::DenseTensor>("Bias");                          \
-  auto* hidden_out = ctx.Output<LoDTensor>("Hidden");                        \
+  auto* hidden_out = ctx.Output<phi::DenseTensor>("Hidden");                 \
  bool is_reverse = ctx.Attr<bool>("is_reverse");                            \
  const int M = x_mat_dims[1];                                               \
  const int D = wh_dims[0];                                                  \
@@ -398,8 +399,8 @@ class FusionGRUKernel : public framework::OpKernel<T> {
    }
    INIT_OTHER_DEFINES;
    auto* reordered_h0 = ctx.Output<phi::DenseTensor>("ReorderedH0");
-    auto* batched_input = ctx.Output<LoDTensor>("BatchedInput");
+    auto* batched_input = ctx.Output<phi::DenseTensor>("BatchedInput");
-    auto* batched_out = ctx.Output<LoDTensor>("BatchedOut");
+    auto* batched_out = ctx.Output<phi::DenseTensor>("BatchedOut");
    T* batched_input_data = batched_input->mutable_data<T>(place);
    T* batched_out_data = batched_out->mutable_data<T>(place);
    hidden_out->mutable_data<T>(place);

--- a/paddle/fluid/operators/fused/fusion_gru_op.h
+++ b/paddle/fluid/operators/fused/fusion_gru_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 class FusionGRUOp : public framework::OperatorWithKernel {

--- a/paddle/fluid/operators/fused/fusion_lstm_op.cc
+++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc
@@ -177,11 +177,12 @@ framework::OpKernelType FusionLSTMOp::GetExpectedKernelType(
 }
 void FusionLSTMOpMaker::Make() {
-  AddInput("X",
+  AddInput(
-           "(LoDTensor) the input is a LodTensor, which support "
+      "X",
-           "variable-time length input sequence. The underlying tensor in "
+      "(phi::DenseTensor) the input is a LodTensor, which support "
-           "this LoDTensor is a matrix with shape (T X M), where T is the "
+      "variable-time length input sequence. The underlying tensor in "
-           "total time steps in this mini-batch, M is the dim size of x.");
+      "this phi::DenseTensor is a matrix with shape (T X M), where T is the "
+      "total time steps in this mini-batch, M is the dim size of x.");
  AddInput("WeightX",
           "(Tensor) the learnable weights of X."
           " - The shape is (M x 4D), where M is the dim size of x, D is the "
@@ -214,23 +215,25 @@ void FusionLSTMOpMaker::Make() {
           "input. This is a tensor with shape (N x D), where N is the "
           "batch size. `H0` and `C0` can be NULL but only at the same time.")
      .AsDispensable();
-  AddOutput("Hidden",
+  AddOutput(
-            "(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. "
+      "Hidden",
-            "The shape is (T x D), and lod is the same with the `Input`.");
+      "(phi::DenseTensor) (same as LSTMOp) the hidden state of LSTM operator. "
-  AddOutput("Cell",
+      "The shape is (T x D), and lod is the same with the `Input`.");
-            "(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. "
+  AddOutput(
-            "The shape is (T x D), and lod is the same with the `Input`.");
+      "Cell",
+      "(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
+      "The shape is (T x D), and lod is the same with the `Input`.");
  AddOutput("XX",
-            "(LoDTensor) the result after X * WeightX (size is T x 4D)"
+            "(phi::DenseTensor) the result after X * WeightX (size is T x 4D)"
            " or batched_X (size is T x M), this will be automatically chosen,"
            " where T is the total time steps in this mini-batch,"
            " D is the hidden size, M is the dim size of x input.")
      .AsIntermediate();
-  AddOutput("BatchedInput", "(LoDTensor) (T x 4D).").AsIntermediate();
+  AddOutput("BatchedInput", "(phi::DenseTensor) (T x 4D).").AsIntermediate();
-  AddOutput("BatchedHidden", "(LoDTensor) (T x D).").AsIntermediate();
+  AddOutput("BatchedHidden", "(phi::DenseTensor) (T x D).").AsIntermediate();
-  AddOutput("BatchedCell", "(LoDTensor) (T x D).").AsIntermediate();
+  AddOutput("BatchedCell", "(phi::DenseTensor) (T x D).").AsIntermediate();
-  AddOutput("ReorderedH0", "(LoDTensor) (N x D).").AsIntermediate();
+  AddOutput("ReorderedH0", "(phi::DenseTensor) (N x D).").AsIntermediate();
-  AddOutput("ReorderedC0", "(LoDTensor) (N x D).").AsIntermediate();
+  AddOutput("ReorderedC0", "(phi::DenseTensor) (N x D).").AsIntermediate();
  AddOutput("CheckedCell", "(Tensor) (2 x D) only for peephole.")
      .AsIntermediate();
  AddAttr<bool>("use_peepholes",
@@ -295,23 +298,23 @@ This operator fuse the X into LSTM, more details can refer to LSTM op.
 template <typename T>
 class FuisonLSTMKernel : public framework::OpKernel<T> {
 public:
-#define INIT_BASE_DEFINES                               \
+#define INIT_BASE_DEFINES                                    \
-  using DeviceContext = phi::CPUContext;                \
+  using DeviceContext = phi::CPUContext;                     \
-  auto* x = ctx.Input<LoDTensor>("X");                  \
+  auto* x = ctx.Input<phi::DenseTensor>("X");                \
-  auto* h0 = ctx.Input<phi::DenseTensor>("H0");         \
+  auto* h0 = ctx.Input<phi::DenseTensor>("H0");              \
-  auto* c0 = ctx.Input<phi::DenseTensor>("C0");         \
+  auto* c0 = ctx.Input<phi::DenseTensor>("C0");              \
-  auto* wx = ctx.Input<phi::DenseTensor>("WeightX");    \
+  auto* wx = ctx.Input<phi::DenseTensor>("WeightX");         \
-  auto* wh = ctx.Input<phi::DenseTensor>("WeightH");    \
+  auto* wh = ctx.Input<phi::DenseTensor>("WeightH");         \
-  auto* bias = ctx.Input<phi::DenseTensor>("Bias");     \
+  auto* bias = ctx.Input<phi::DenseTensor>("Bias");          \
-  auto* xx = ctx.Output<LoDTensor>("XX");               \
+  auto* xx = ctx.Output<phi::DenseTensor>("XX");             \
-  auto* hidden_out = ctx.Output<LoDTensor>("Hidden");   \
+  auto* hidden_out = ctx.Output<phi::DenseTensor>("Hidden"); \
-  auto* cell_out = ctx.Output<LoDTensor>("Cell");       \
+  auto* cell_out = ctx.Output<phi::DenseTensor>("Cell");     \
-  bool is_reverse = ctx.Attr<bool>("is_reverse");       \
+  bool is_reverse = ctx.Attr<bool>("is_reverse");            \
-  bool use_peepholes = ctx.Attr<bool>("use_peepholes"); \
+  bool use_peepholes = ctx.Attr<bool>("use_peepholes");      \
-  auto x_dims = x->dims();   /* T x M*/                 \
+  auto x_dims = x->dims();   /* T x M*/                      \
-  auto wh_dims = wh->dims(); /* D x 4D*/                \
+  auto wh_dims = wh->dims(); /* D x 4D*/                     \
-  const int M = x_dims[1];                              \
+  const int M = x_dims[1];                                   \
-  const int D = wh_dims[0];                             \
+  const int D = wh_dims[0];                                  \
  const int D4 = wh_dims[1]
 #define INIT_OTHER_DEFINES                                                     \
@@ -439,9 +442,9 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
    auto* reordered_h0 = ctx.Output<phi::DenseTensor>("ReorderedH0");
    auto* reordered_c0 = ctx.Output<phi::DenseTensor>("ReorderedC0");
-    auto* batched_input = ctx.Output<LoDTensor>("BatchedInput");
+    auto* batched_input = ctx.Output<phi::DenseTensor>("BatchedInput");
-    auto* batched_c_out = ctx.Output<LoDTensor>("BatchedCell");
+    auto* batched_c_out = ctx.Output<phi::DenseTensor>("BatchedCell");
-    auto* batched_h_out = ctx.Output<LoDTensor>("BatchedHidden");
+    auto* batched_h_out = ctx.Output<phi::DenseTensor>("BatchedHidden");
    T* xx_data = xx->mutable_data<T>(place);
    T* batched_input_data = batched_input->mutable_data<T>(place);
    T* batched_c_out_data = batched_c_out->mutable_data<T>(place);

--- a/paddle/fluid/operators/fused/fusion_lstm_op.h
+++ b/paddle/fluid/operators/fused/fusion_lstm_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 class FusionLSTMOp : public framework::OperatorWithKernel {

--- a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
+++ b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
@@ -106,14 +106,14 @@ framework::OpKernelType FusionRepeatedFCReluOp::GetExpectedKernelType(
 }
 void FusionRepeatedFCReluOpMaker::Make() {
-  AddInput("X", "(LoDTensor) Input tensors of this operator.");
+  AddInput("X", "(phi::DenseTensor) Input tensors of this operator.");
  AddInput("W", "(Tensor) The weight tensors of this operator.").AsDuplicable();
  AddInput("Bias", "(Tensor) The bias tensors of this operator.")
      .AsDuplicable();
  AddOutput("ReluOut", "(Tensor) The output tensor of each relu operator.")
      .AsDuplicable()
      .AsIntermediate();
-  AddOutput("Out", "(LoDTensor) Output tensor of this operator.");
+  AddOutput("Out", "(phi::DenseTensor) Output tensor of this operator.");
  AddComment(R"DOC(
  Fusion Repeated FC with Relu Operator.
 )DOC");

--- a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
+++ b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
@@ -18,7 +18,6 @@
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 class FusionRepeatedFCReluOp : public framework::OperatorWithKernel {

--- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
+++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
@@ -95,11 +95,12 @@ framework::OpKernelType FusionSeqConvEltAddReluOp::GetExpectedKernelType(
 }
 void FusionSeqConvEltAddReluOpMaker::Make() {
-  AddInput("X",
+  AddInput(
-           "(LoDTensor) the input is a LodTensor, which support "
+      "X",
-           "variable-time length input sequence. The underlying tensor in "
+      "(phi::DenseTensor) the input is a LodTensor, which support "
-           "this LoDTensor is a matrix with shape (T X M), where T is the "
+      "variable-time length input sequence. The underlying tensor in "
-           "total time steps in this mini-batch, M is the dim size of x.");
+      "this phi::DenseTensor is a matrix with shape (T X M), where T is the "
+      "total time steps in this mini-batch, M is the dim size of x.");
  // PaddingData only support false yet, should be ensured at pass.
  AddInput("Filter",
           "(Tensor) same as the input(Filter) of sequence conv op is an "
@@ -111,9 +112,9 @@ void FusionSeqConvEltAddReluOpMaker::Make() {
           "output feature size");
  AddOutput(
      "Out",
-      "(LoDTensor) the output(Out) is a LodTensor, which support "
+      "(phi::DenseTensor) the output(Out) is a LodTensor, which support "
      "variable-time length output sequence. The underlying tensor in "
-      "this LoDTensor is a matrix with shape (T, N), where, T is the "
+      "this phi::DenseTensor is a matrix with shape (T, N), where, T is the "
      "total time steps in this mini-batch, N is the output feature size.");
  AddOutput("ColMat",
            "(Tensor) (T, K), where T is where T is the "
@@ -150,10 +151,10 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    using DeviceContext = phi::CPUContext;
-    auto* x = ctx.Input<LoDTensor>("X");
+    auto* x = ctx.Input<phi::DenseTensor>("X");
    auto* w = ctx.Input<phi::DenseTensor>("Filter");
    auto* b = ctx.Input<phi::DenseTensor>("Bias");
-    auto* y = ctx.Output<LoDTensor>("Out");
+    auto* y = ctx.Output<phi::DenseTensor>("Out");
    auto* col = ctx.Output<phi::DenseTensor>("ColMat");
    auto x_lod = x->lod();

--- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
+++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 class FusionSeqConvEltAddReluOp : public framework::OperatorWithKernel {

--- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
+++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
@@ -110,12 +110,13 @@ framework::OpKernelType FusionSeqExpandConcatFCOp::GetExpectedKernelType(
 void FusionSeqExpandConcatFCOpMaker::Make() {
  AddInput("X",
-           "(LoDTensor) input LodDTensors, the first one must be have ref lod "
+           "(phi::DenseTensor) input LodDTensors, the first one must be have "
+           "ref lod "
           "for sequence expand, and the rest input should have same lod.")
      .AsDuplicable();
  AddInput("FCWeight", "(Tensor) the weights of fc.");
  AddInput("FCBias", "(Tensor, optional) the bias of fc.").AsDispensable();
-  AddOutput("Out", "(LoDTensor) Output LodTensor.");
+  AddOutput("Out", "(phi::DenseTensor) Output LodTensor.");
  AddOutput(
      "FCOut",
      "(Tensor) the intermediate tensor to keep the result of fc."
@@ -150,10 +151,10 @@ class FusionSeqExpandConcatFCOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    using DeviceContext = phi::CPUContext;
-    auto ins = ctx.MultiInput<LoDTensor>("X");
+    auto ins = ctx.MultiInput<phi::DenseTensor>("X");
    auto* w = ctx.Input<phi::DenseTensor>("FCWeight");
    auto* b = ctx.Input<phi::DenseTensor>("FCBias");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
    auto* fc_out = ctx.Output<phi::DenseTensor>("FCOut");
    auto* ref_in = ins[0];

--- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
+++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
@@ -18,7 +18,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 class FusionSeqExpandConcatFCOp : public framework::OperatorWithKernel {

--- a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
+++ b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
@@ -75,8 +75,9 @@ framework::OpKernelType FusionSeqPoolConcatOp::GetExpectedKernelType(
 }
 void FusionSeqPoolConcatOpMaker::Make() {
-  AddInput("X", "(LoDTensor) Input tensors of this operator.").AsDuplicable();
+  AddInput("X", "(phi::DenseTensor) Input tensors of this operator.")
-  AddOutput("Out", "(LoDTensor) Output tensor of concat operator.");
+      .AsDuplicable();
+  AddOutput("Out", "(phi::DenseTensor) Output tensor of concat operator.");
  AddAttr<std::string>("pooltype",
                       "(string, default 'SUM') some of the pooling "
                       "pooltype of SequencePoolOp.")
@@ -95,8 +96,8 @@ template <typename T>
 class FusionSeqPoolConcatKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto ins = ctx.MultiInput<LoDTensor>("X");
+    auto ins = ctx.MultiInput<phi::DenseTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
    std::string pooltype = ctx.Attr<std::string>("pooltype");
    auto x0_lod = ins[0]->lod();
    const auto& x0_dims = ins[0]->dims();

--- a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
+++ b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
@@ -18,7 +18,6 @@
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 class FusionSeqPoolConcatOp : public framework::OperatorWithKernel {

--- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
+++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
@@ -74,11 +74,12 @@ framework::OpKernelType FusionSeqPoolCVMConcatOp::GetExpectedKernelType(
 }
 void FusionSeqPoolCVMConcatOpMaker::Make() {
-  AddInput("X", "(LoDTensor) Input tensors of this operator.").AsDuplicable();
+  AddInput("X", "(phi::DenseTensor) Input tensors of this operator.")
+      .AsDuplicable();
  AddInput("CVM",
           "(Tensor),  a 2-D Tensor with shape [N x 2], where N is the batch "
           "size, 2 is show and click.");
-  AddOutput("Out", "(LoDTensor) Output tensor of concat operator.");
+  AddOutput("Out", "(phi::DenseTensor) Output tensor of concat operator.");
  AddAttr<std::string>("pooltype",
                       "(string, default 'SUM') some of the pooling "
                       "pooltype of SequencePoolOp.")
@@ -98,8 +99,8 @@ template <typename T>
 class FusionSeqPoolCVMConcatKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto ins = ctx.MultiInput<LoDTensor>("X");
+    auto ins = ctx.MultiInput<phi::DenseTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
    std::string pooltype = ctx.Attr<std::string>("pooltype");
    auto x0_lod = ins[0]->lod();
    const auto& x0_dims = ins[0]->dims();

--- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
+++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
@@ -18,7 +18,6 @@
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 class FusionSeqPoolCVMConcatOp : public framework::OperatorWithKernel {

--- a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
+++ b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
@@ -18,7 +18,6 @@
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using Tensor = phi::DenseTensor;
 // ( (A.^2 * B.^2) - (A * B).^2 ) .* scalar

--- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
@@ -59,11 +59,11 @@ class MultiGRUHandler {
        origin_mode_(ctx.Attr<bool>("origin_mode")),
        layers_(ctx.Attr<int>("layers")),
        concat_pds_(layers_, std::shared_ptr<dnnl::concat::primitive_desc>()),
-        x_(ctx.Input<LoDTensor>("X")),
+        x_(ctx.Input<phi::DenseTensor>("X")),
        weights_x_(ctx.MultiInput<phi::DenseTensor>("WeightX")),
        weights_h_(ctx.MultiInput<phi::DenseTensor>("WeightH")),
        biases_(ctx.MultiInput<phi::DenseTensor>("Bias")),
-        hidden_(ctx.Output<LoDTensor>("Hidden")),
+        hidden_(ctx.Output<phi::DenseTensor>("Hidden")),
        x_lod_(x_->lod()[0]) {
    PADDLE_ENFORCE_EQ(
        weights_x_.size(),
@@ -127,7 +127,8 @@ class MultiGRUHandler {
    if (is_int8) {
      // Add int8 attributes
-      const auto scale_weights = ctx.MultiInput<LoDTensor>("Scale_weights");
+      const auto scale_weights =
+          ctx.MultiInput<phi::DenseTensor>("Scale_weights");
      PADDLE_ENFORCE_EQ(
          scale_weights.size(),
          layers_ * 2,
@@ -669,11 +670,11 @@ class MultiGRUHandler {
  // on Ti size, thus we need another key to cache them
  std::string memory_key_;
-  const LoDTensor* x_;
+  const phi::DenseTensor* x_;
  const std::vector<const phi::DenseTensor*> weights_x_;
  const std::vector<const phi::DenseTensor*> weights_h_;
  const std::vector<const phi::DenseTensor*> biases_;
-  LoDTensor* hidden_;
+  phi::DenseTensor* hidden_;
  std::vector<dnnl::primitive_attr> attrs_;
  const paddle::framework::Vector<size_t>& x_lod_;
 };

--- a/paddle/fluid/operators/fused/multi_gru_op.cc
+++ b/paddle/fluid/operators/fused/multi_gru_op.cc
@@ -148,11 +148,12 @@ framework::OpKernelType MultiGRUOp::GetExpectedKernelType(
 }
 void MultiGRUOpMaker::Make() {
-  AddInput("X",
+  AddInput(
-           "(LoDTensor) the input is an LodTensor, which support "
+      "X",
-           "variable-time length input sequence. The underlying tensor in "
+      "(phi::DenseTensor) the input is an LodTensor, which support "
-           "this LoDTensor is a matrix with shape (T X M), where T is the "
+      "variable-time length input sequence. The underlying tensor in "
-           "total time steps in this mini-batch, M is the dim size of x.");
+      "this phi::DenseTensor is a matrix with shape (T X M), where T is the "
+      "total time steps in this mini-batch, M is the dim size of x.");
  AddInput("WeightX",
           "(MultiTensor) The FC weight with shape (M x 3D),"
           "where M is the dim size of x, D is the hidden size. ")
@@ -176,7 +177,7 @@ void MultiGRUOpMaker::Make() {
      "Only used with MKL-DNN INT8.")
      .AsDuplicable()
      .AsDispensable();
-  AddOutput("Hidden", "(LoDTensor) (T x D) Same as GRUOp");
+  AddOutput("Hidden", "(phi::DenseTensor) (T x D) Same as GRUOp");
  AddAttr<std::string>("activation",
                       "(string, default tanh) "
                       "The activation type used for output candidate {h}_t.")

--- a/paddle/fluid/operators/fused/multi_gru_op.h
+++ b/paddle/fluid/operators/fused/multi_gru_op.h
@@ -20,7 +20,6 @@ namespace paddle {
 namespace operators {
 using framework::ExecutionContext;
-using LoDTensor = phi::DenseTensor;
 class MultiGRUOp : public framework::OperatorWithKernel {
 public:

--- a/paddle/fluid/operators/math/context_project.h
+++ b/paddle/fluid/operators/math/context_project.h
@@ -27,7 +27,6 @@ namespace operators {
 namespace math {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 /*
 * \brief Context projection concatenates features in adjacent time-steps in
@@ -51,9 +50,8 @@ using LoDTensor = phi::DenseTensor;
 * For a mini-batch of 2 variable lengths sentences, containing 3, and 1
 * time-steps:
 *
- * Assumed input (X) is a [4, M, N] float LoDTensor, and X->lod()[0] = [0, 3,
+ * Assumed input (X) is a [4, M, N] float phi::DenseTensor, and X->lod()[0] =
- * 4].
+ * [0, 3, 4]. Besides, for the sake of simplicity, we assume M=1 and N=2.
- * Besides, for the sake of simplicity, we assume M=1 and N=2.
 *
 * X = [[a1, a2;
 *       b1, b2;
@@ -89,7 +87,7 @@ template <typename DeviceContext, typename T>
 class ContextProjectFunctor {
 public:
  void operator()(const DeviceContext& context,
-                  const LoDTensor& in,
+                  const phi::DenseTensor& in,
                  const phi::DenseTensor* padding_data,
                  bool padding_trainable,
                  const int context_start,
@@ -218,7 +216,7 @@ template <typename DeviceContext, typename T>
 class ContextProjectGradFunctor {
 public:
  void operator()(const DeviceContext& context,
-                  const LoDTensor& in,
+                  const phi::DenseTensor& in,
                  bool padding_trainable,
                  const int context_start,
                  const int context_length,

--- a/paddle/fluid/operators/math/sequence_padding.h
+++ b/paddle/fluid/operators/math/sequence_padding.h
@@ -82,8 +82,8 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
 }
 /*
- * \brief   Padding/Unpadding LoDTensor to/from normal Tensor of the shape
+ * \brief   Padding/Unpadding phi::DenseTensor to/from normal Tensor of the
- *          [max_sequence_length, num_sequences, sequence_width].
+ * shape [max_sequence_length, num_sequences, sequence_width].
 *
 *  Padding sequence:
 *        padding[i] = seq[lod[level][i]]
@@ -97,13 +97,11 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
 *    padding (s0, s1, s2, s3; s0, s1, s2, 0; s0, 0, s2, 0; s0, 0, 0, 0)
 *
 * \param context       device context of this functor.
- * \param seq           LoDTensor which is stored in sequence format, the shape
+ * \param seq           phi::DenseTensor which is stored in sequence format, the
- *                      is [total_sequence_length, sequence_width] where
+ * shape is [total_sequence_length, sequence_width] where total_sequence_length
- *                      total_sequence_length is the sum of all sequences'
+ * is the sum of all sequences' length. \param padding       Tensor which is
- *                      length.
+ * padded to the same length, the shape is [max_sequence_length, num_sequences,
- * \param padding       Tensor which is padded to the same length, the shape is
+ * sequence_width]. \param norm_by_times whether dividing sequence's length.
- *                      [max_sequence_length, num_sequences, sequence_width].
- * \param norm_by_times whether dividing sequence's length.
 *
 * \note  transposition is also done in this functor.
 */

--- a/paddle/fluid/operators/math/sequence_pooling.cc
+++ b/paddle/fluid/operators/math/sequence_pooling.cc
@@ -25,7 +25,6 @@ namespace operators {
 namespace math {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 template <typename T,
          int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>

--- a/paddle/fluid/operators/math/sequence_scale.h
+++ b/paddle/fluid/operators/math/sequence_scale.h
@@ -35,7 +35,8 @@ namespace math {
 *
 * \param context       Device context of this functor.
- * \param seq           LoDTensor which is stored in sequence format, the shape
+ * \param seq           phi::DenseTensor which is stored in sequence format, the
+ shape
 *                      is [total_sequence_length, sequence_width] where
 *                      total_sequence_length is the sum of all sequences'
 *                      length.

--- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
@@ -28,7 +28,6 @@ using dnnl::prop_kind;
 using dnnl::stream;
 using framework::DDim;
 using framework::ExecutionContext;
-using LoDTensor = phi::DenseTensor;
 using phi::funcs::OneDNNGetDataType;
 using phi::funcs::to_void_cast;
 using platform::MKLDNNDeviceContext;
@@ -382,7 +381,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
  void PrepareSrcMem(const std::shared_ptr<inner_product_forward>& fc_p,
                     const std::shared_ptr<dnnl::memory>& src_mem,
-                     const LoDTensor* x,
+                     const phi::DenseTensor* x,
                     const dnnl::engine& engine) const {
    auto x_md = x->mem_desc().reshape(src_mem->get_desc().dims());
    if (x_md != src_mem->get_desc()) {
@@ -403,10 +402,10 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
        ctx.template device_context<platform::MKLDNNDeviceContext>();
    const auto& mkldnn_engine = dev_ctx.GetEngine();
-    const auto* x = ctx.Input<LoDTensor>("Input");
+    const auto* x = ctx.Input<phi::DenseTensor>("Input");
    const auto* weights = ctx.Input<phi::DenseTensor>("W");
    const auto* bias = ctx.Input<phi::DenseTensor>("Bias");
-    auto out = ctx.Output<LoDTensor>("Out");
+    auto out = ctx.Output<phi::DenseTensor>("Out");
    const auto& scale_weights = ctx.Attr<std::vector<float>>("Scale_weights");
@@ -513,9 +512,9 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
  }
  void RecomputeOutputDims(const ExecutionContext& ctx,
-                           const LoDTensor* x,
+                           const phi::DenseTensor* x,
                           const phi::DenseTensor* weights,
-                           LoDTensor* out) const {
+                           phi::DenseTensor* out) const {
    int in_num_col_dims = ctx.Attr<int>("in_num_col_dims");
    bool padding_weights = ctx.Attr<bool>("padding_weights");
    PADDLE_ENFORCE_EQ(padding_weights,

--- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
@@ -26,7 +26,6 @@ namespace operators {
 using framework::DDim;
 using framework::ExecutionContext;
-using LoDTensor = phi::DenseTensor;
 using platform::MatMulV2MKLDNNHandler;
 using platform::MKLDNNDeviceContext;

--- a/paddle/fluid/operators/nccl/nccl_op.cu.cc
+++ b/paddle/fluid/operators/nccl/nccl_op.cu.cc
@@ -19,8 +19,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-using LoDTensor = phi::DenseTensor;
 using platform::Communicator;
 template <typename Type>
@@ -62,8 +60,8 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
                      true,
                      platform::errors::PreconditionNotMet(
                          "This kernel only runs on GPU device."));
-    auto* x = ctx.Input<LoDTensor>("X");
+    auto* x = ctx.Input<phi::DenseTensor>("X");
-    auto* out = ctx.Output<LoDTensor>("Out");
+    auto* out = ctx.Output<phi::DenseTensor>("Out");
    auto* comm = ctx.Input<Communicator>("Communicator");
    std::string reduction = ctx.Attr<std::string>("reduction");
@@ -97,8 +95,8 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
                      true,
                      platform::errors::InvalidArgument(
                          "This kernel only runs on GPU device."));
-    auto x = ctx.Input<LoDTensor>("X");  // x0, x1, x2
+    auto x = ctx.Input<phi::DenseTensor>("X");  // x0, x1, x2
-    auto out = ctx.Output<LoDTensor>("Out");
+    auto out = ctx.Output<phi::DenseTensor>("Out");
    auto* comm = ctx.Input<Communicator>("Communicator");
    int root = ctx.Attr<int>("root");
    std::string reduction = ctx.Attr<std::string>("reduction");
@@ -144,7 +142,7 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
    int gpu_id = ctx.GetPlace().GetDeviceId();
    int idx = comm->GetCommId(gpu_id);
    if (idx == root) {
-      auto* x = ctx.Input<LoDTensor>("X");
+      auto* x = ctx.Input<phi::DenseTensor>("X");
      VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel();
      PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast(
          reinterpret_cast<void*>(const_cast<T*>(x->data<T>())),
@@ -155,7 +153,7 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
          ctx.cuda_device_context().stream()));
      VLOG(3) << "gpu : " << gpu_id << " finished Bcast.";
    } else {
-      auto* out = ctx.Output<LoDTensor>("Out");
+      auto* out = ctx.Output<phi::DenseTensor>("Out");
      VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. recv buffer "
              << phi::product(out->dims());
      PADDLE_ENFORCE_GPU_SUCCESS(

--- a/paddle/fluid/operators/optimizers/adam_op_mlu.cc
+++ b/paddle/fluid/operators/optimizers/adam_op_mlu.cc
@@ -20,7 +20,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 template <typename T>
 class AdamMLUKernel : public framework::OpKernel<T> {
@@ -30,32 +29,32 @@ class AdamMLUKernel : public framework::OpKernel<T> {
    PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                      true,
                      platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                          "but the received is %s",
                          ctx.InputNames("Param").front(),
                          framework::ToTypeName(param_var->Type())));
-    auto* param = ctx.Input<LoDTensor>("Param");
+    auto* param = ctx.Input<phi::DenseTensor>("Param");
    auto* grad_var = ctx.InputVar("Grad");
    PADDLE_ENFORCE_EQ(grad_var->IsType<phi::DenseTensor>(),
                      true,
                      platform::errors::InvalidArgument(
-                          "The Grad(%s)'s type should be LoDTensor, "
+                          "The Grad(%s)'s type should be phi::DenseTensor, "
                          "but the received is %s",
                          ctx.InputNames("Grad").front(),
                          framework::ToTypeName(param_var->Type())));
-    auto* grad = ctx.Input<LoDTensor>("Grad");
+    auto* grad = ctx.Input<phi::DenseTensor>("Grad");
-    auto* mom1 = ctx.Input<LoDTensor>("Moment1");
+    auto* mom1 = ctx.Input<phi::DenseTensor>("Moment1");
-    auto* mom2 = ctx.Input<LoDTensor>("Moment2");
+    auto* mom2 = ctx.Input<phi::DenseTensor>("Moment2");
-    auto* lr = ctx.Input<LoDTensor>("LearningRate");
+    auto* lr = ctx.Input<phi::DenseTensor>("LearningRate");
    auto* beta1_pow = ctx.Input<phi::DenseTensor>("Beta1Pow");
    auto* beta2_pow = ctx.Input<phi::DenseTensor>("Beta2Pow");
-    auto* param_out = ctx.Output<LoDTensor>("ParamOut");
+    auto* param_out = ctx.Output<phi::DenseTensor>("ParamOut");
-    auto* mom1_out = ctx.Output<LoDTensor>("Moment1Out");
+    auto* mom1_out = ctx.Output<phi::DenseTensor>("Moment1Out");
-    auto* mom2_out = ctx.Output<LoDTensor>("Moment2Out");
+    auto* mom2_out = ctx.Output<phi::DenseTensor>("Moment2Out");
-    auto* beta1_pow_out = ctx.Output<LoDTensor>("Beta1PowOut");
+    auto* beta1_pow_out = ctx.Output<phi::DenseTensor>("Beta1PowOut");
-    auto* beta2_pow_out = ctx.Output<LoDTensor>("Beta2PowOut");
+    auto* beta2_pow_out = ctx.Output<phi::DenseTensor>("Beta2PowOut");
    bool skip_update = false;
    if (ctx.HasInput("SkipUpdate")) {
@@ -110,8 +109,8 @@ class AdamMLUKernel : public framework::OpKernel<T> {
    mom1_out->ShareDataWith(*mom1);
    mom2_out->ShareDataWith(*mom2);
-    LoDTensor beta1_pow_tmp;
+    phi::DenseTensor beta1_pow_tmp;
-    LoDTensor beta2_pow_tmp;
+    phi::DenseTensor beta2_pow_tmp;
    if (beta1_pow->place() == platform::CPUPlace()) {
      T beta1 = *beta1_pow->data<T>();
      beta1_pow_tmp.mutable_data<T>({1}, ctx.GetPlace());
@@ -292,13 +291,13 @@ class AdamWMLUKernel : public AdamMLUKernel<T> {
    }
    bool with_decay = ctx.Attr<bool>("with_decay");
    const bool multi_precision = ctx.Attr<bool>("multi_precision");
-    auto* param_out = ctx.Output<LoDTensor>("ParamOut");
+    auto* param_out = ctx.Output<phi::DenseTensor>("ParamOut");
-    auto* master_param_out = ctx.Output<LoDTensor>("MasterParamOut");
+    auto* master_param_out = ctx.Output<phi::DenseTensor>("MasterParamOut");
-    const auto* master_param = ctx.Input<LoDTensor>("MasterParam");
+    const auto* master_param = ctx.Input<phi::DenseTensor>("MasterParam");
    VLOG(3) << "Skip update: " << skip_update << ", With decay: " << with_decay;
    if (!skip_update && with_decay) {
-      auto* param = ctx.Input<LoDTensor>("Param");
+      auto* param = ctx.Input<phi::DenseTensor>("Param");
      MLUCnnlTensorDesc param_desc(*param);
      if (multi_precision) {
        VLOG(3) << "[adamw] multi_precision, cast masterparam to param.";
@@ -328,12 +327,12 @@ class AdamWMLUKernel : public AdamMLUKernel<T> {
        PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                          true,
                          platform::errors::InvalidArgument(
-                              "The Var(%s)'s type should be LoDTensor, "
+                              "The Var(%s)'s type should be phi::DenseTensor, "
                              "but the received is %s",
                              ctx.InputNames("Param").front(),
                              framework::ToTypeName(param_var->Type())));
-        auto* lr = ctx.Input<LoDTensor>("LearningRate");
+        auto* lr = ctx.Input<phi::DenseTensor>("LearningRate");
        float coeff = ctx.Attr<float>("coeff");
        // update param with decay coeff: mul(-1 * lr, coeff * param) + param
@@ -502,8 +501,8 @@ class MergedAdamMLUKernel : public framework::OpKernel<T> {
      mom1_outs[i]->ShareDataWith(*mom1s[i]);
      mom2_outs[i]->ShareDataWith(*mom2s[i]);
-      LoDTensor beta1_pow_tmp;
+      phi::DenseTensor beta1_pow_tmp;
-      LoDTensor beta2_pow_tmp;
+      phi::DenseTensor beta2_pow_tmp;
      if (beta1_pows[i]->place() == platform::CPUPlace()) {
        T beta1 = *beta1_pows[i]->data<T>();
        beta1_pow_tmp.mutable_data<T>({1}, ctx.GetPlace());

--- a/paddle/fluid/operators/optimizers/adam_op_npu.cc
+++ b/paddle/fluid/operators/optimizers/adam_op_npu.cc
@@ -23,7 +23,6 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 template <typename DeviceContext, typename T>
 class AdamNPUKernel : public framework::OpKernel<T> {
@@ -33,32 +32,32 @@ class AdamNPUKernel : public framework::OpKernel<T> {
    PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                      true,
                      platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                          "but the received is %s",
                          ctx.InputNames("Param").front(),
                          framework::ToTypeName(param_var->Type())));
-    auto* param = ctx.Input<LoDTensor>("Param");
+    auto* param = ctx.Input<phi::DenseTensor>("Param");
    auto* grad_var = ctx.InputVar("Grad");
    PADDLE_ENFORCE_EQ(grad_var->IsType<phi::DenseTensor>(),
                      true,
                      platform::errors::InvalidArgument(
-                          "The Grad(%s)'s type should be LoDTensor, "
+                          "The Grad(%s)'s type should be phi::DenseTensor, "
                          "but the received is %s",
                          ctx.InputNames("Grad").front(),
                          framework::ToTypeName(param_var->Type())));
-    auto* grad = ctx.Input<LoDTensor>("Grad");
+    auto* grad = ctx.Input<phi::DenseTensor>("Grad");
-    auto* mom1 = ctx.Input<LoDTensor>("Moment1");
+    auto* mom1 = ctx.Input<phi::DenseTensor>("Moment1");
-    auto* mom2 = ctx.Input<LoDTensor>("Moment2");
+    auto* mom2 = ctx.Input<phi::DenseTensor>("Moment2");
-    auto* lr = ctx.Input<LoDTensor>("LearningRate");
+    auto* lr = ctx.Input<phi::DenseTensor>("LearningRate");
    auto* beta1_pow = ctx.Input<phi::DenseTensor>("Beta1Pow");
    auto* beta2_pow = ctx.Input<phi::DenseTensor>("Beta2Pow");
-    auto* param_out = ctx.Output<LoDTensor>("ParamOut");
+    auto* param_out = ctx.Output<phi::DenseTensor>("ParamOut");
-    auto* mom1_out = ctx.Output<LoDTensor>("Moment1Out");
+    auto* mom1_out = ctx.Output<phi::DenseTensor>("Moment1Out");
-    auto* mom2_out = ctx.Output<LoDTensor>("Moment2Out");
+    auto* mom2_out = ctx.Output<phi::DenseTensor>("Moment2Out");
-    auto* beta1_pow_out = ctx.Output<LoDTensor>("Beta1PowOut");
+    auto* beta1_pow_out = ctx.Output<phi::DenseTensor>("Beta1PowOut");
-    auto* beta2_pow_out = ctx.Output<LoDTensor>("Beta2PowOut");
+    auto* beta2_pow_out = ctx.Output<phi::DenseTensor>("Beta2PowOut");
    bool skip_update = false;
    if (ctx.HasInput("SkipUpdate")) {
@@ -114,8 +113,8 @@ class AdamNPUKernel : public framework::OpKernel<T> {
    // NOTE(zhiqiu): beta1_pow and beta2_pow may on CPU and not transform
    // place.
-    LoDTensor beta1_pow_tmp;
+    phi::DenseTensor beta1_pow_tmp;
-    LoDTensor beta2_pow_tmp;
+    phi::DenseTensor beta2_pow_tmp;
    if (beta1_pow->place() == platform::CPUPlace()) {
      T beta1 = *beta1_pow->data<T>();
      beta1_pow_tmp.mutable_data<T>({1}, ctx.GetPlace());
@@ -279,7 +278,7 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
    bool with_decay = ctx.Attr<bool>("with_decay");
    if (!skip_update && with_decay) {
      float coeff = ctx.Attr<float>("coeff");
-      auto* lr = ctx.Input<LoDTensor>("LearningRate");
+      auto* lr = ctx.Input<phi::DenseTensor>("LearningRate");
      auto place = ctx.GetPlace();
@@ -308,18 +307,18 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
        PADDLE_THROW(platform::errors::Unimplemented(
            "Master Parma is not supported on npu"));
      } else {
-        auto* param_out = ctx.Output<LoDTensor>("ParamOut");
+        auto* param_out = ctx.Output<phi::DenseTensor>("ParamOut");
        param_out->mutable_data<T>(ctx.GetPlace());
        const auto* param_var = ctx.InputVar("Param");
        PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                          true,
                          platform::errors::InvalidArgument(
-                              "The Var(%s)'s type should be LoDTensor, "
+                              "The Var(%s)'s type should be phi::DenseTensor, "
                              "but the received is %s",
                              ctx.InputNames("Param").front(),
                              framework::ToTypeName(param_var->Type())));
-        auto* param = ctx.Input<LoDTensor>("Param");
+        auto* param = ctx.Input<phi::DenseTensor>("Param");
        const auto& runner =
            NpuOpRunner("Mul",

--- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
+++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
@@ -32,20 +32,20 @@ class DecayedAdagradOp : public framework::OperatorWithKernel {
                   "Input",
                   "LearningRate",
                   "DecayedAdagradOp");
-    PADDLE_ENFORCE_EQ(
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(),
-        ctx->GetInputsVarType("Param").front(),
+                      framework::proto::VarType::LOD_TENSOR,
-        framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
-        platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
-            "The input var's type should be LoDTensor, but the received is %s",
+                          "but the received is %s",
-            ctx->Inputs("Param").front(),
+                          ctx->Inputs("Param").front(),
-            ctx->GetInputsVarType("Param").front()));
+                          ctx->GetInputsVarType("Param").front()));
-    PADDLE_ENFORCE_EQ(
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad").front(),
-        ctx->GetInputsVarType("Grad").front(),
+                      framework::proto::VarType::LOD_TENSOR,
-        framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
-        platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
-            "The input var's type should be LoDTensor, but the received is %s",
+                          "but the received is %s",
-            ctx->Inputs("Grad").front(),
+                          ctx->Inputs("Grad").front(),
-            ctx->GetInputsVarType("Grad").front()));
+                          ctx->GetInputsVarType("Grad").front()));
    OP_INOUT_CHECK(
        ctx->HasOutput("ParamOut"), "Output", "ParamOut", "DecayedAdagradOp");

--- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h
+++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h
@@ -27,7 +27,7 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
    PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                      true,
                      platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                          "but the received is %s",
                          ctx.InputNames("Param").front(),
                          framework::ToTypeName(param_var->Type())));
@@ -35,7 +35,7 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
    PADDLE_ENFORCE_EQ(grad_var->IsType<phi::DenseTensor>(),
                      true,
                      platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                          "but the received is %s",
                          ctx.InputNames("Grad").front(),
                          framework::ToTypeName(grad_var->Type())));

--- a/paddle/fluid/operators/optimizers/dpsgd_op.cc
+++ b/paddle/fluid/operators/optimizers/dpsgd_op.cc
@@ -36,18 +36,18 @@ class DpsgdOp : public framework::OperatorWithKernel {
        true,
        platform::errors::NotFound(
            "Input(LearningRate) of DpsgdOp should not be null."));
-    PADDLE_ENFORCE_EQ(
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(),
-        ctx->GetInputsVarType("Param").front(),
+                      framework::proto::VarType::LOD_TENSOR,
-        framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
-        platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
-            "The input var's type should be LoDTensor, but the received is %s",
+                          "but the received is %s",
-            ctx->GetInputsVarType("Param").front()));
+                          ctx->GetInputsVarType("Param").front()));
-    PADDLE_ENFORCE_EQ(
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad").front(),
-        ctx->GetInputsVarType("Grad").front(),
+                      framework::proto::VarType::LOD_TENSOR,
-        framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
-        platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
-            "The input var's type should be LoDTensor, but the received is %s",
+                          "but the received is %s",
-            ctx->GetInputsVarType("Grad").front()));
+                          ctx->GetInputsVarType("Grad").front()));
    PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"),
                      true,

--- a/paddle/fluid/operators/optimizers/dpsgd_op.h
+++ b/paddle/fluid/operators/optimizers/dpsgd_op.h
@@ -32,7 +32,7 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
    PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                      true,
                      platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                          "but the received is %s",
                          ctx.InputNames("Param").front(),
                          framework::ToTypeName(param_var->Type())));
@@ -41,7 +41,7 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
    PADDLE_ENFORCE_EQ(grad_var->IsType<phi::DenseTensor>(),
                      true,
                      platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                          "but the received is %s",
                          ctx.InputNames("Grad").front(),
                          framework::ToTypeName(grad_var->Type())));

--- a/paddle/fluid/operators/optimizers/lamb_op.cc
+++ b/paddle/fluid/operators/optimizers/lamb_op.cc
@@ -52,10 +52,10 @@ class LambOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("Param",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
             "Input parameter that has to be updated.");
    AddInput("Grad",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
             "Input gradient of the parameter.");
    AddInput("LearningRate", "(Tensor) Learning rate.");
    AddInput("Moment1", "(Tensor) Input first moment.");
@@ -63,7 +63,7 @@ class LambOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator.");
    AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator.");
    AddInput("MasterParam",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
             "Input master parameter that has to be updated.")
        .AsDispensable();
    AddInput(

--- a/paddle/fluid/operators/optimizers/lars_momentum_op.cc
+++ b/paddle/fluid/operators/optimizers/lars_momentum_op.cc
@@ -37,12 +37,12 @@ class LarsMomentumOp : public framework::OperatorWithKernel {
                   "Output",
                   "VelocityOut",
                   "LarsMomentum");
-    PADDLE_ENFORCE_EQ(
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(),
-        ctx->GetInputsVarType("Param").front(),
+                      framework::proto::VarType::LOD_TENSOR,
-        framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
-        platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
-            "The input var's type should be LoDTensor, but the received is %s",
+                          "but the received is %s",
-            ctx->GetInputsVarType("Param").front()));
+                          ctx->GetInputsVarType("Param").front()));
    auto lr_dims = ctx->GetInputsDim("LearningRate");
    auto grad_dim = ctx->GetInputsDim("Grad");
@@ -102,7 +102,7 @@ class LarsMomentumOp : public framework::OperatorWithKernel {
      PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Grad")[i],
                        framework::proto::VarType::LOD_TENSOR,
                        platform::errors::InvalidArgument(
-                            "The Var(%s)'s type should be LoDTensor, "
+                            "The Var(%s)'s type should be phi::DenseTensor, "
                            "but the received is %s",
                            ctx->Inputs("Grad")[i].front(),
                            ctx->GetInputsVarType("Grad")[i]));
@@ -145,31 +145,31 @@ class LarsMomentumOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("Param",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
             "Input parameter that has to be updated")
        .AsDuplicable();
    AddInput("Grad",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
             "Input gradient of the parameter")
        .AsDuplicable();
    AddInput("Velocity",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
             "Input velocity (corresponding to the parameter) "
             "that has to be updated")
        .AsDuplicable();
    AddInput("LearningRate",
-             "(LoDTensor, default LoDTensor<float>) "
+             "(phi::DenseTensor, default phi::DenseTensor<float>) "
             "Input learning rate")
        .AsDuplicable();
    AddInput("MasterParam", "FP32 master weight for AMP.")
        .AsDuplicable()
        .AsDispensable();
    AddOutput("ParamOut",
-              "(LoDTensor) This output is updated parameter. "
+              "(phi::DenseTensor) This output is updated parameter. "
              "It shared memory with Input(Param).")
        .AsDuplicable();
    AddOutput("VelocityOut",
-              "(LoDTensor) This output is updated velocity. "
+              "(phi::DenseTensor) This output is updated velocity. "
              "It shared memory with Input(Velocity).")
        .AsDuplicable();
    AddOutput("MasterParamOut",

--- a/paddle/fluid/operators/optimizers/momentum_op.h
+++ b/paddle/fluid/operators/optimizers/momentum_op.h
@@ -54,12 +54,12 @@ class MomentumOp : public framework::OperatorWithKernel {
        true,
        platform::errors::NotFound(
            "Input(LearningRate) of Momentum should not be null."));
-    PADDLE_ENFORCE_EQ(
+    PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(),
-        ctx->GetInputsVarType("Param").front(),
+                      framework::proto::VarType::LOD_TENSOR,
-        framework::proto::VarType::LOD_TENSOR,
+                      platform::errors::InvalidArgument(
-        platform::errors::InvalidArgument(
+                          "The input var's type should be phi::DenseTensor, "
-            "The input var's type should be LoDTensor, but the received is %s",
+                          "but the received is %s",
-            ctx->GetInputsVarType("Param").front()));
+                          ctx->GetInputsVarType("Param").front()));
    PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"),
                      true,

--- a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
+++ b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
@@ -16,16 +16,15 @@ namespace paddle {
 namespace operators {
 using Tensor = phi::DenseTensor;
-using LoDTensor = phi::DenseTensor;
 template <typename DeviceContext, typename T>
 class RMSPROPNPUKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
    auto *grad_var = ctx.InputVar("Grad");
-    auto *param_out = ctx.Output<LoDTensor>("ParamOut");
+    auto *param_out = ctx.Output<phi::DenseTensor>("ParamOut");
-    auto *moment_out = ctx.Output<LoDTensor>("MomentOut");
+    auto *moment_out = ctx.Output<phi::DenseTensor>("MomentOut");
-    auto *mean_square_out = ctx.Output<LoDTensor>("MeanSquareOut");
+    auto *mean_square_out = ctx.Output<phi::DenseTensor>("MeanSquareOut");
    param_out->mutable_data<T>(ctx.GetPlace());
    moment_out->mutable_data<T>(ctx.GetPlace());
@@ -34,17 +33,17 @@ class RMSPROPNPUKernel : public framework::OpKernel<T> {
    auto epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
    auto rho = static_cast<T>(ctx.Attr<float>("decay"));
    auto momentum = static_cast<T>(ctx.Attr<float>("momentum"));
-    auto *p_tensor = ctx.Input<LoDTensor>("Param");
+    auto *p_tensor = ctx.Input<phi::DenseTensor>("Param");
-    auto *ms_tensor = ctx.Input<LoDTensor>("MeanSquare");
+    auto *ms_tensor = ctx.Input<phi::DenseTensor>("MeanSquare");
-    auto *lr_tensor = ctx.Input<LoDTensor>("LearningRate");
+    auto *lr_tensor = ctx.Input<phi::DenseTensor>("LearningRate");
-    auto *mom_tensor = ctx.Input<LoDTensor>("Moment");
+    auto *mom_tensor = ctx.Input<phi::DenseTensor>("Moment");
    bool centered = ctx.Attr<bool>("centered");
    auto stream =
        ctx.template device_context<paddle::platform::NPUDeviceContext>()
            .stream();
-    if (grad_var->IsType<LoDTensor>()) {
+    if (grad_var->IsType<phi::DenseTensor>()) {
-      auto *grad_tensor = ctx.Input<LoDTensor>("Grad");
+      auto *grad_tensor = ctx.Input<phi::DenseTensor>("Grad");
      if (centered) {
        framework::NPUAttributeMap attr_input = {{"use_locking", false}};
        const Tensor *rho_tensor = nullptr;

--- a/paddle/fluid/operators/optimizers/sgd_op.cu
+++ b/paddle/fluid/operators/optimizers/sgd_op.cu
@@ -72,7 +72,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
    PADDLE_ENFORCE_EQ(param_var->IsType<phi::DenseTensor>(),
                      true,
                      platform::errors::InvalidArgument(
-                          "The Var(%s)'s type should be LoDTensor, "
+                          "The Var(%s)'s type should be phi::DenseTensor, "
                          "but the received is %s",
                          ctx.InputNames("Param").front(),
                          paddle::framework::ToTypeName(param_var->Type())));
@@ -107,7 +107,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
            ? master_param_out->mutable_data<MPDType>(ctx.GetPlace())
            : nullptr;
-    // Actually, all tensors are LoDTensor except SelectedRows.
+    // Actually, all tensors are phi::DenseTensor except SelectedRows.
    if (grad_var->IsType<phi::DenseTensor>()) {
      auto* grad = ctx.Input<phi::DenseTensor>("Grad");

--- a/paddle/fluid/operators/optimizers/sgd_op.h
+++ b/paddle/fluid/operators/optimizers/sgd_op.h
--- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
+++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
--- a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
+++ b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
--- a/paddle/fluid/operators/pscore/fake_init_op.cc
+++ b/paddle/fluid/operators/pscore/fake_init_op.cc
--- a/paddle/fluid/operators/reader/create_py_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_py_reader_op.cc
--- a/paddle/fluid/operators/reader/read_op.cc
+++ b/paddle/fluid/operators/reader/read_op.cc