[Unify Tensors PR #7] Merged LoDTensor with Tensor, test=allcases (#38880)

* Merged LoDTensor with Tensor,test=allcases * Patched python level LoDTensor * Fixed example code failure * Polished function names, removed duplicated forward declarations

[Unify Tensors PR #7] Merged LoDTensor with Tensor, test=allcases (#38880)
* Merged LoDTensor with Tensor,test=allcases * Patched python level LoDTensor * Fixed example code failure * Polished function names, removed duplicated forward declarations
88966b28 · Zhanlue Yang · GitHub · a8879148 · 88966b28 · 88966b28
54 changed file
--- a/paddle/fluid/distributed/fleet.h
+++ b/paddle/fluid/distributed/fleet.h
@@ -36,7 +36,7 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Scope;
 class SelectedRows;
 class Variable;

--- a/paddle/fluid/distributed/service/brpc_utils.cc
+++ b/paddle/fluid/distributed/service/brpc_utils.cc
@@ -20,7 +20,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 class Variable;
-class LoDTensor;
+class Tensor;
 }  // namespace framework
 }  // namespace paddle


--- a/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc
+++ b/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc
@@ -31,7 +31,7 @@ class PSClient;
 class PSServer;
 }  // namespace distributed
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Variable;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc
+++ b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc
@@ -32,7 +32,7 @@ class PSClient;
 class PSServer;
 }  // namespace distributed
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Variable;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/framework/data_feed.h
+++ b/paddle/fluid/framework/data_feed.h
@@ -50,7 +50,7 @@ DECLARE_bool(enable_slotrecord_reset_shrink);
 namespace paddle {
 namespace framework {
 class DataFeedDesc;
-class LoDTensor;
+class Tensor;
 class Scope;
 class Variable;
 }  // namespace framework

--- a/paddle/fluid/framework/details/fetch_async_op_handle.h
+++ b/paddle/fluid/framework/details/fetch_async_op_handle.h
@@ -24,7 +24,7 @@

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;

 namespace ir {
 class Node;

--- a/paddle/fluid/framework/details/variable_visitor.cc
+++ b/paddle/fluid/framework/details/variable_visitor.cc
@@ -18,7 +18,7 @@

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Variable;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/framework/device_worker.cc
+++ b/paddle/fluid/framework/device_worker.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {

-class LoDTensor;
+class Tensor;
 class Scope;

 void DeviceWorker::SetRootScope(Scope* root_scope) { root_scope_ = root_scope; }

--- a/paddle/fluid/framework/device_worker.h
+++ b/paddle/fluid/framework/device_worker.h
@@ -43,10 +43,9 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class ProgramDesc;
 class Scope;
-class Tensor;
 }  // namespace framework
 namespace platform {
 class DeviceContext;

--- a/paddle/fluid/framework/downpour_worker.cc
+++ b/paddle/fluid/framework/downpour_worker.cc
@@ -17,7 +17,7 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Variable;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/framework/feed_fetch_method.cc
+++ b/paddle/fluid/framework/feed_fetch_method.cc
@@ -22,7 +22,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {

-class LoDTensor;
+class Tensor;
 class Variable;

 void SetFeedVariable(Scope* scope, const LoDTensor& input,

--- a/paddle/fluid/framework/feed_fetch_method.h
+++ b/paddle/fluid/framework/feed_fetch_method.h
@@ -23,7 +23,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {

-class LoDTensor;
+class Tensor;
 class Scope;

 void SetFeedVariable(Scope* scope, const LoDTensor& input,

--- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
@@ -20,7 +20,7 @@

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Scope;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
@@ -21,7 +21,7 @@

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Scope;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/framework/ir/delete_dropout_op_pass.cc
+++ b/paddle/fluid/framework/ir/delete_dropout_op_pass.cc
@@ -17,7 +17,7 @@

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 }  // namespace framework
 }  // namespace paddle


--- a/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc
+++ b/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc
@@ -18,7 +18,7 @@

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 }  // namespace framework
 }  // namespace paddle


--- a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc
+++ b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc
@@ -24,7 +24,7 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 }  // namespace framework
 }  // namespace paddle


--- a/paddle/fluid/framework/lod_tensor.cc
+++ b/paddle/fluid/framework/lod_tensor.cc
@@ -27,34 +27,6 @@ class DeviceContext;
 namespace paddle {
 namespace framework {

-std::ostream &operator<<(std::ostream &os, const LoD &lod) {
-  os << "{";
-  for (auto &v : lod) {
-    os << "{";
-    bool is_first = true;
-    for (auto &i : v) {
-      if (is_first) {
-        os << i;
-        is_first = false;
-      } else {
-        os << ", " << i;
-      }
-    }
-    os << "}";
-  }
-  os << "}";
-
-  return os;
-}
-
-std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
-  if (t.lod().size() > 0) {
-    os << "  - lod: " << t.lod() << "\n";
-  }
-  os << static_cast<Tensor>(t);
-  return os;
-}
-
 std::string LoDToString(const LoD &lod) {
  std::ostringstream stream;
  stream << lod;

--- a/paddle/fluid/framework/lod_tensor.h
+++ b/paddle/fluid/framework/lod_tensor.h
@@ -28,9 +28,6 @@ limitations under the License. */
 #include "paddle/fluid/platform/place.h"

 namespace paddle {
-namespace framework {
-class LoDTensor;
-}  // namespace framework
 namespace platform {
 class DeviceContext;
 }  // namespace platform
@@ -39,6 +36,8 @@ class DeviceContext;
 namespace paddle {
 namespace framework {

+using LoDTensor = paddle::framework::Tensor;
+
 /*
 * LoD is short for Level of Details.
 *
@@ -56,9 +55,6 @@ namespace framework {
 */
 using LoD = std::vector<Vector<size_t>>;

-std::ostream& operator<<(std::ostream& os, const LoD& lod);
-std::ostream& operator<<(std::ostream& os, const LoDTensor& t);
-
 std::string LoDToString(const LoD& lod);

 LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
@@ -102,22 +98,6 @@ bool CheckLoD(const LoD& in, int tensor_height = -1);
 */
 bool CheckAbsLoD(const LoD& in, int tensor_height = -1);

-/*
- * LoDTensor (Level of details Tensor)
- * see https://en.wikipedia.org/wiki/Level_of_details for reference.
- */
-class LoDTensor : public Tensor {
- public:
-  using Tensor::Tensor;
-
-  // Split LoDTensor and copy to each place specified in places.
-  std::vector<LoDTensor> SplitLoDTensor(
-      const std::vector<platform::Place> places) const;
-
-  void MergeLoDTensor(const std::vector<const LoDTensor*>& lod_tensors,
-                      platform::Place place);
-};
-
 /*
 * Expand the `source` to fit the LoD of `lod`. For example, a `source`
 * LoDTensor is

--- a/paddle/fluid/framework/naive_executor.h
+++ b/paddle/fluid/framework/naive_executor.h
@@ -31,7 +31,7 @@ namespace framework {
 * Simple, intuitive and effective. Only single thread is supported, and
 * currently designed for inference.
 */
-class LoDTensor;
+class Tensor;
 class ProgramDesc;
 class Scope;


--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -34,7 +34,7 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 }  // namespace framework
 }  // namespace paddle
 #ifdef PADDLE_WITH_XPU
@@ -555,11 +555,6 @@ Variable* ExecutionContext::OutputVar(const std::string& name) const {
  return it->second.empty() ? nullptr : it->second[0];
 }

-template <>
-const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const {
-  return Input<LoDTensor>(name);
-}
-
 template <>
 const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
    const std::string& name) const {
@@ -584,11 +579,6 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
  return res;
 }

-template <>
-Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const {
-  return Output<LoDTensor>(name);
-}
-
 template <>
 std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
    const std::string& name) const {

--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@@ -479,16 +479,10 @@ class ExecutionArgumentMappingContext : public pten::ArgumentMappingContext {
  const ExecutionContext& ctx_;
 };

-template <>
-const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const;
-
 template <>
 const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
    const std::string& name) const;

-template <>
-Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const;
-
 template <>
 std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
    const std::string& name) const;

--- a/paddle/fluid/framework/pull_dense_worker.cc
+++ b/paddle/fluid/framework/pull_dense_worker.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {

-class LoDTensor;
+class Tensor;
 class Scope;
 class Variable;


--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@@ -36,7 +36,7 @@ namespace paddle {

 namespace framework {

-class LoDTensor;
+using LoD = std::vector<paddle::framework::Vector<size_t>>;

 /*
 NOTE(liym27): [ What is TensorInplaceVersion used for? ]
@@ -74,6 +74,13 @@ class Tensor : public pten::DenseTensor {
  using DenseTensor = pten::DenseTensor;
  using DenseTensor::DenseTensor;

+  // Split Tensor and copy to each place specified in places.
+  std::vector<Tensor> SplitLoDTensor(
+      const std::vector<platform::Place> places) const;
+
+  void MergeLoDTensor(const std::vector<const Tensor*>& lod_tensors,
+                      platform::Place place);
+
  /*! The internal of two tensors share the same memory block. */
  Tensor& ShareDataWith(const Tensor& src);


--- a/paddle/fluid/framework/tensor_util.cc
+++ b/paddle/fluid/framework/tensor_util.cc
@@ -1428,7 +1428,31 @@ std::ostream& print_tensor<paddle::platform::complex<double>>(
  return os;
 }

+std::ostream& operator<<(std::ostream& os, const LoD& lod) {
+  os << "{";
+  for (auto& v : lod) {
+    os << "{";
+    bool is_first = true;
+    for (auto& i : v) {
+      if (is_first) {
+        os << i;
+        is_first = false;
+      } else {
+        os << ", " << i;
+      }
+    }
+    os << "}";
+  }
+  os << "}";
+
+  return os;
+}
+
 std::ostream& operator<<(std::ostream& os, const Tensor& t) {
+  if (t.lod().size() > 0) {
+    os << "  - lod: " << t.lod() << "\n";
+  }
+
  os << "  - place: " << t.place() << "\n";
  os << "  - shape: [" << t.dims() << "]\n";
  os << "  - layout: " << DataLayoutToString(t.layout()) << "\n";

--- a/paddle/fluid/framework/tensor_util.h
+++ b/paddle/fluid/framework/tensor_util.h
@@ -39,6 +39,9 @@ limitations under the License. */
 namespace paddle {
 namespace framework {

+std::ostream& operator<<(std::ostream& os, const LoD& lod);
+std::ostream& operator<<(std::ostream& os, const Tensor& t);
+
 class PrintOptions {
 public:
  static PrintOptions& Instance() {
@@ -494,6 +497,5 @@ inline void TensorToVector(const Tensor& src, std::vector<bool>* dst) {
  delete[] array;
 }

-std::ostream& operator<<(std::ostream& os, const Tensor& t);
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/trainer.h
+++ b/paddle/fluid/framework/trainer.h
@@ -40,7 +40,7 @@ namespace paddle {
 namespace framework {

 class Dataset;
-class LoDTensor;
+class Tensor;
 class ProgramDesc;
 class PullDenseWorker;
 class Scope;

--- a/paddle/fluid/framework/var_type_traits.h
+++ b/paddle/fluid/framework/var_type_traits.h
@@ -70,11 +70,10 @@ class BKCLCommunicator;
 namespace framework {
 class LoDRankTable;
 class ScopeBase;
-class LoDTensor;
+class Tensor;
 class ReaderHolder;
 class Scope;
 class SelectedRows;
-class Tensor;
 }  // namespace framework

 namespace operators {
@@ -164,8 +163,8 @@ struct VarTypeRegistryImpl {
 // Users should add other variable types below.
 // Paddle would generate unique Ids for each registered variable types.
 using VarTypeRegistry = detail::VarTypeRegistryImpl<
-    Tensor, LoDTensor, SelectedRows, std::vector<Scope *>, LoDRankTable,
-    Strings, LoDTensorArray, platform::PlaceList, ReaderHolder, String, Scope *,
+    Tensor, SelectedRows, std::vector<Scope *>, LoDRankTable, Strings,
+    LoDTensorArray, platform::PlaceList, ReaderHolder, String, Scope *,
    operators::reader::LoDTensorBlockingQueueHolder, FetchList, FeedList,
    operators::reader::OrderedMultiDeviceLoDTensorBlockingQueueHolder,
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)

--- a/paddle/fluid/inference/api/api_impl.h
+++ b/paddle/fluid/inference/api/api_impl.h
@@ -35,7 +35,7 @@ limitations under the License. */
 namespace paddle {

 namespace framework {
-class LoDTensor;
+class Tensor;
 class Scope;
 }  // namespace framework


--- a/paddle/fluid/inference/api/details/reset_tensor_array.h
+++ b/paddle/fluid/inference/api/details/reset_tensor_array.h
@@ -23,7 +23,7 @@

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Scope;
 class SelectedRows;
 }  // namespace framework

--- a/paddle/fluid/operators/assert_op.cc
+++ b/paddle/fluid/operators/assert_op.cc
@@ -19,7 +19,7 @@
 namespace paddle {
 namespace framework {
 class InferShapeContext;
-class LoDTensor;
+class Tensor;
 class OpDesc;
 class Scope;
 class Variable;

--- a/paddle/fluid/operators/assign_op.h
+++ b/paddle/fluid/operators/assign_op.h
@@ -27,7 +27,7 @@ class DeviceContext;

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Variable;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/operators/controlflow/while_op_helper.h
+++ b/paddle/fluid/operators/controlflow/while_op_helper.h
@@ -24,7 +24,7 @@

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class ProgramDesc;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/operators/math/beam_search.cc
+++ b/paddle/fluid/operators/math/beam_search.cc
@@ -16,7 +16,6 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
 class Tensor;
 }  // namespace framework
 namespace platform {

--- a/paddle/fluid/operators/math/beam_search_npu.cc
+++ b/paddle/fluid/operators/math/beam_search_npu.cc
@@ -17,7 +17,6 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
 class Tensor;
 }  // namespace framework
 namespace platform {

--- a/paddle/fluid/operators/math/sequence_padding.cc
+++ b/paddle/fluid/operators/math/sequence_padding.cc
@@ -16,7 +16,6 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
 class Tensor;
 }  // namespace framework
 namespace platform {

--- a/paddle/fluid/operators/math/sequence_scale.cc
+++ b/paddle/fluid/operators/math/sequence_scale.cc
@@ -16,7 +16,7 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 }  // namespace framework
 }  // namespace paddle


--- a/paddle/fluid/operators/math/sequence_scale.h
+++ b/paddle/fluid/operators/math/sequence_scale.h
@@ -19,7 +19,7 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 }  // namespace framework
 }  // namespace paddle


--- a/paddle/fluid/operators/memcpy_d2h_op.h
+++ b/paddle/fluid/operators/memcpy_d2h_op.h
@@ -24,7 +24,7 @@ class DeviceContext;

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Variable;
 class SelectedRows;
 }  // namespace framework

--- a/paddle/fluid/operators/memcpy_h2d_op.h
+++ b/paddle/fluid/operators/memcpy_h2d_op.h
@@ -25,7 +25,7 @@ class DeviceContext;

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Variable;
 class SelectedRows;
 }  // namespace framework

--- a/paddle/fluid/operators/memcpy_op.h
+++ b/paddle/fluid/operators/memcpy_op.h
@@ -27,7 +27,7 @@ class DeviceContext;

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Variable;
 class SelectedRows;
 }  // namespace framework

--- a/paddle/fluid/operators/merge_lod_tensor_op.cc
+++ b/paddle/fluid/operators/merge_lod_tensor_op.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 class InferShapeContext;
-class LoDTensor;
+class Tensor;
 class OpDesc;
 class Scope;
 }  // namespace framework

--- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
@@ -19,7 +19,6 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
 class Tensor;
 }  // namespace framework
 namespace platform {

--- a/paddle/fluid/operators/print_op.cc
+++ b/paddle/fluid/operators/print_op.cc
@@ -19,7 +19,7 @@
 namespace paddle {
 namespace framework {
 class InferShapeContext;
-class LoDTensor;
+class Tensor;
 class OpDesc;
 class Scope;
 }  // namespace framework

--- a/paddle/fluid/operators/recurrent_op.cc
+++ b/paddle/fluid/operators/recurrent_op.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 class InferShapeContext;
-class LoDTensor;
+class Tensor;
 class OpDesc;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc
+++ b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc
@@ -18,7 +18,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 class LoDRankTable;
-class LoDTensor;
+class Tensor;
 class OpDesc;
 class Scope;
 }  // namespace framework

--- a/paddle/fluid/operators/split_lod_tensor_op.cc
+++ b/paddle/fluid/operators/split_lod_tensor_op.cc
@@ -18,7 +18,7 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 class InferShapeContext;
-class LoDTensor;
+class Tensor;
 class OpDesc;
 class Scope;
 }  // namespace framework

--- a/paddle/fluid/operators/tensor_formatter.h
+++ b/paddle/fluid/operators/tensor_formatter.h
@@ -20,7 +20,7 @@

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 }  // namespace framework
 }  // namespace paddle


--- a/paddle/fluid/operators/transfer_layout_op.h
+++ b/paddle/fluid/operators/transfer_layout_op.h
@@ -29,7 +29,7 @@ class DeviceContext;

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Variable;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/platform/lodtensor_printer.cc
+++ b/paddle/fluid/platform/lodtensor_printer.cc
@@ -18,7 +18,7 @@ limitations under the License. */

 namespace paddle {
 namespace framework {
-class LoDTensor;
+class Tensor;
 class Variable;
 }  // namespace framework
 }  // namespace paddle

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -875,12 +875,12 @@ PYBIND11_MODULE(core_noavx, m) {
      .def("set", SetTensorFromPyArray<paddle::platform::CUDAPinnedPlace>,
           py::arg("array"), py::arg("place"), py::arg("zero_copy") = false,
           R"DOC(
-        Set the data of LoDTensor on place with given numpy array.
+        Set the data of Tensor on place with given numpy array.
        
        Args:
          lod (numpy.ndarray): The data to set.
          place (CPUPlace|CUDAPlace|XPUPlace|IPUPlace|CUDAPinnedPlace|NPUPlace|MLUPlace): The place where the
-          LoDTensor is to be set.
+          Tensor is to be set.
          zero_copy (bool, optional): Whether to share memory with the input numpy array.
          This parameter only works with CPUPlace. Default: False.

@@ -893,17 +893,17 @@ PYBIND11_MODULE(core_noavx, m) {
                import paddle.fluid as fluid
                import numpy as np

-                t = fluid.LoDTensor()
+                t = fluid.Tensor()
                t.set(np.ndarray([5, 30]), fluid.CPUPlace())
          )DOC")

      .def("shape",
           [](framework::Tensor &self) { return vectorize(self.dims()); },
           R"DOC(
-           Return the shape of LoDTensor.
+           Return the shape of Tensor.

           Returns:
-               list[int]: The shape of LoDTensor.
+               list[int]: The shape of Tensor.


           Examples:
@@ -912,7 +912,7 @@ PYBIND11_MODULE(core_noavx, m) {
                  import paddle.fluid as fluid
                  import numpy as np

-                  t = fluid.LoDTensor()
+                  t = fluid.Tensor()
                  t.set(np.ndarray([5, 30]), fluid.CPUPlace())
                  print(t.shape())  # [5, 30]
           )DOC")
@@ -949,117 +949,34 @@ PYBIND11_MODULE(core_noavx, m) {
           })
      .def("_share_data_with", &framework::Tensor::ShareDataWith)
      .def("__getitem__", PySliceTensor, py::return_value_policy::reference)
-      .def("__str__", [](const framework::Tensor &self) {
-        std::stringstream ostr;
-        ostr << self;
-        return ostr.str();
-      });
-
-  // TODO(cql): add reference: en_user_guide_lod_tensor
-  py::class_<LoDTensor, framework::Tensor>(m, "LoDTensor", R"DOC(
-    LoDTensor is a Tensor with optional LoD (Level of Details) information, 
-    it can be used for variable-length sequences, 
-    see :ref:`user_guide_lod_tensor` for details.
-
-    LoDTensor can be converted to numpy array using :code:`numpy.array(lod_tensor)`.
-
-    You can skip the following explanation if you don't need to know details 
-    of LoDTensor.
-
-    The following two examples show how to use LODtensor to represent 
-    variable-length sequences.
-    
-    Example 1:
-    
-    Suppose x is a LoDTensor representing a variable-length sequence. 
-    It contains two logical subsequences, the length of first logical sequence 
-    is 2 (e.g., number of samples is 2), the length of second logical sequence 
-    is 3, and the total length is 5. The data of the first logical sequence is 
-    [1, 2], [3, 4], and the data of the second logical sequence is [5, 6], 
-    [7, 8], [9, 10]. The data dimension of each sample is 2. So, the final 
-    shape of the LoDTensor is [5, 2], of which 5 is the total length and 2 is 
-    the dimension of each sample.
-    
-    Logically, we can represent the variable-length sequence in two ways: one 
-    is in the form of recursive sequence lengths, that is, 
-    x.recursive_sequence_lengths=[[2, 3]]; the other is in the form of offsets, 
-    that is, x.lod=[[0, 2, 2+3]]. These two representations are equivalent, and 
-    you can set and retrieve recursive_sequence_lengths or LoD through the 
-    corresponding interfaces of LoDTensor introduced later.
-
-    Actually, in order to access sequence faster, Paddle uses offset to store 
-    different lengths of sequences. 
-    Therefore, the operations on recursive_sequence_lengths will be converted 
-    to the operations on LoD eventually.
-    
-    .. code-block:: python
-
-      y.data = [[1, 2], [3, 4],
-                [5, 6], [7, 8],
-                [9, 10], [11, 12], [13, 14]]
-
-      y.shape = [2+2+3, 2]
-
-      y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]]
-
-      y.lod = [[0, 2, 3], [0, 2, 4, 7]]
-
-    Example 2:
-
-    LoD may have more than one level (for example, a paragraph may have more 
-    than one sentence and a sentence may have more than one word). Suppose y 
-    is a LoDTensor and its lod_level is 2. 
-    From level = 0, there are two logical sequences, the length of which is 
-    2 and 1, respectively, indicating that the first logical sequence contains 
-    two sub-sequences and the second logical sequence contains one sub-sequence. 
-    From level = 1, the lengths of two sub-sequences contained by the first 
-    logical sequence is 2 and 2, and the length of sub-sequence contained by 
-    the second logical sequence is 3.
-      
-    Therefore, the LoDTensor is represented in the form of recursive sequence 
-    lengths as y.recursive_sequence_lengths=[[2,1], [2,2,3]]; and equally, in 
-    the form of offset, it is represented as y.lod=[[0,2,3], [0,2,4,7]].
-
-    .. code-block:: python
-
-      y.data = [[1, 2], [3, 4],
-                [5, 6], [7, 8],
-                [9, 10], [11, 12], [13, 14]]
-
-      y.shape = [2+2+3, 2]
-
-      y.recursive_sequence_lengths = [[2, 1], [2, 2, 3]]
-
-      y.lod = [[0, 2, 3], [0, 2, 4, 7]]
-
-    Examples:
-        .. code-block:: python
-
-          import paddle.fluid as fluid
-
-          t = fluid.LoDTensor()
-
-        )DOC")
-      .def("__array__",
-           [](framework::Tensor &self) { return TensorToPyArray(self); })
+      .def("__str__",
+           [](const framework::Tensor &self) {
+             std::stringstream ostr;
+             ostr << self;
+             return ostr.str();
+           }) /* ------ End of original Tensor ------ */
+      .def(
+          "__init__",
+          [](framework::Tensor &instance, const std::vector<std::vector<size_t>>
+                                              &recursive_sequence_lengths) {
+            LoD new_lod;
+            new_lod.reserve(recursive_sequence_lengths.size());
+            std::copy(recursive_sequence_lengths.begin(),
+                      recursive_sequence_lengths.end(),
+                      std::back_inserter(new_lod));
+            LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
+            PADDLE_ENFORCE_EQ(
+                CheckLoD(new_offset_lod, -1), true,
+                platform::errors::InvalidArgument(
+                    "The provided recursive_sequence_lengths info is invalid, "
+                    "the LoD converted by recursive_sequence_lengths is %s",
+                    new_lod));
+            new (&instance) framework::Tensor(new_offset_lod);
+          })
      .def("__init__",
-           [](LoDTensor &instance, const std::vector<std::vector<size_t>>
-                                       &recursive_sequence_lengths) {
-             LoD new_lod;
-             new_lod.reserve(recursive_sequence_lengths.size());
-             std::copy(recursive_sequence_lengths.begin(),
-                       recursive_sequence_lengths.end(),
-                       std::back_inserter(new_lod));
-             LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
-             PADDLE_ENFORCE_EQ(
-                 CheckLoD(new_offset_lod, -1), true,
-                 platform::errors::InvalidArgument(
-                     "The provided recursive_sequence_lengths info is invalid, "
-                     "the LoD converted by recursive_sequence_lengths is %s",
-                     new_lod));
-             new (&instance) LoDTensor(new_offset_lod);
+           [](framework::Tensor &instance) {
+             new (&instance) framework::Tensor();
           })
-      .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); })
      // We implement offset based LOD in C++ while we use length based with
      // Python API. So we changed set_lod to set_recursive_sequence_lengths
      // to
@@ -1067,7 +984,8 @@ PYBIND11_MODULE(core_noavx, m) {
      // The discussion is here:
      // https://github.com/PaddlePaddle/Paddle/issues/10855
      .def("set_lod",
-           [](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
+           [](framework::Tensor &self,
+              const std::vector<std::vector<size_t>> &lod) {
             // the input lod is offset-based level-of-detail info
             LoD new_lod;
             new_lod.reserve(lod.size());
@@ -1079,7 +997,7 @@ PYBIND11_MODULE(core_noavx, m) {
             self.set_lod(new_lod);
           },
           py::arg("lod"), R"DOC(
-           Set LoD of the LoDTensor.
+           Set LoD of the Tensor.

           Args:
               lod (list[list[int]]): The lod to set.
@@ -1093,14 +1011,14 @@ PYBIND11_MODULE(core_noavx, m) {
                 import paddle.fluid as fluid
                 import numpy as np

-                 t = fluid.LoDTensor()
+                 t = fluid.Tensor()
                 t.set(np.ndarray([5, 30]), fluid.CPUPlace())
                 t.set_lod([[0, 2, 5]])
                 print(t.lod()) # [[0, 2, 5]]
           )DOC")
      .def("set_recursive_sequence_lengths",
-           [](LoDTensor &self, const std::vector<std::vector<size_t>>
-                                   &recursive_sequence_lengths) {
+           [](framework::Tensor &self, const std::vector<std::vector<size_t>>
+                                           &recursive_sequence_lengths) {
             // the input recursive_sequence_lengths is length-based
             // level-of-detail info
             LoD new_lod;
@@ -1119,7 +1037,7 @@ PYBIND11_MODULE(core_noavx, m) {
             self.set_lod(new_offset_lod);
           },
           py::arg("recursive_sequence_lengths"), R"DOC(
-           Set LoD of the LoDTensor according to recursive sequence lengths.
+           Set LoD of the Tensor according to recursive sequence lengths.

           For example, if recursive_sequence_lengths=[[2, 3]], which means
           there are two sequences with length 2 and 3 respectively, the
@@ -1137,14 +1055,14 @@ PYBIND11_MODULE(core_noavx, m) {
                 import paddle.fluid as fluid
                 import numpy as np

-                 t = fluid.LoDTensor()
+                 t = fluid.Tensor()
                 t.set(np.ndarray([5, 30]), fluid.CPUPlace())
                 t.set_recursive_sequence_lengths([[2, 3]])
-                 print(t.recursive_sequence_length())  # [[2, 3]]
+                 print(t.recursive_sequence_lengths())  # [[2, 3]]
                 print(t.lod())  # [[0, 2, 5]]
           )DOC")
      .def("lod",
-           [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
+           [](framework::Tensor &self) -> std::vector<std::vector<size_t>> {
             // output the offset-based lod info
             LoD lod = self.lod();
             std::vector<std::vector<size_t>> new_lod;
@@ -1153,10 +1071,10 @@ PYBIND11_MODULE(core_noavx, m) {
             return new_lod;
           },
           R"DOC(
-           Return the LoD of the LoDTensor.
+           Return the LoD of the Tensor.

           Returns:
-               list[list[int]]: The lod of the LoDTensor.
+               list[list[int]]: The lod of the Tensor.
           
           Examples:
               .. code-block:: python
@@ -1164,14 +1082,14 @@ PYBIND11_MODULE(core_noavx, m) {
                 import paddle.fluid as fluid
                 import numpy as np

-                 t = fluid.LoDTensor()
+                 t = fluid.Tensor()
                 t.set(np.ndarray([5, 30]), fluid.CPUPlace())
                 t.set_lod([[0, 2, 5]])
                 print(t.lod()) # [[0, 2, 5]]
           )DOC")
      // Set above comments of set_lod.
      .def("recursive_sequence_lengths",
-           [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
+           [](framework::Tensor &self) -> std::vector<std::vector<size_t>> {
             // output the length-based lod info
             LoD lod = ConvertToLengthBasedLoD(self.lod());
             std::vector<std::vector<size_t>> new_lod;
@@ -1181,7 +1099,7 @@ PYBIND11_MODULE(core_noavx, m) {
           },
           R"DOC(
           Return the recursive sequence lengths corresponding to of the LodD 
-           of the LoDTensor.
+           of the Tensor.

           Returns:
                list[list[int]]: The recursive sequence lengths.
@@ -1192,19 +1110,19 @@ PYBIND11_MODULE(core_noavx, m) {
                 import paddle.fluid as fluid
                 import numpy as np

-                 t = fluid.LoDTensor()
+                 t = fluid.Tensor()
                 t.set(np.ndarray([5, 30]), fluid.CPUPlace())
                 t.set_recursive_sequence_lengths([[2, 3]])
                 print(t.recursive_sequence_lengths()) # [[2, 3]]
           )DOC")
      .def("has_valid_recursive_sequence_lengths",
-           [](LoDTensor &self) -> bool {
+           [](framework::Tensor &self) -> bool {
             // Check that the lod info is valid and match the outermost
-             // dimension of the LoDTensor data
+             // dimension of the Tensor data
             return CheckLoD(self.lod(), vectorize(self.dims()).front());
           },
           R"DOC(
-           Check whether the LoD of the LoDTensor is valid.
+           Check whether the LoD of the Tensor is valid.

           Returns:
               bool: Whether the LoD is valid.
@@ -1215,91 +1133,80 @@ PYBIND11_MODULE(core_noavx, m) {
                 import paddle.fluid as fluid
                 import numpy as np

-                 t = fluid.LoDTensor()
+                 t = fluid.Tensor()
                 t.set(np.ndarray([5, 30]), fluid.CPUPlace())
                 t.set_recursive_sequence_lengths([[2, 3]])
                 print(t.has_valid_recursive_sequence_lengths()) # True
           )DOC")
-      .def("__getitem__", PySliceTensor, py::return_value_policy::reference,
-           R"DOC(
-           Slice the original Tensor, and remove the LoD information.
-
-           Returns:
-               out (Tensor): new Tensor(NOT LoDTensor).
-           )DOC")
-      .def("__str__",
-           [](const LoDTensor &self) {
-             std::stringstream ostr;
-             ostr << self;
-             return ostr.str();
-           })
      .def("_as_type",
-           [](const LoDTensor &self,
+           [](const framework::Tensor &self,
              paddle::framework::proto::VarType::Type type) {
-             LoDTensor dst;
+             framework::Tensor dst;
             if (self.IsInitialized() && self.numel() > 0) {
               TransDataType(self, type, &dst);
             }
             return dst;
           })
-      .def("_copy", [](const LoDTensor &self, const platform::Place &place) {
-        // follow fetch_op's inplementation
-        LoDTensor dst;
-        if (self.IsInitialized() && self.numel() > 0) {
-          TensorCopySync(self, place, &dst);
-        } else {
-          // Not copy, if the src tensor is empty.
-          dst.clear();
-          dst.Resize({0});
-        }
-        dst.set_lod(self.lod());
-        return dst;
+      .def("_copy",
+           [](const framework::Tensor &self, const platform::Place &place) {
+             // follow fetch_op's inplementation
+             framework::Tensor dst;
+             if (self.IsInitialized() && self.numel() > 0) {
+               TensorCopySync(self, place, &dst);
+             } else {
+               // Not copy, if the src tensor is empty.
+               dst.clear();
+               dst.Resize({0});
+             }
+             dst.set_lod(self.lod());
+             return dst;
 #ifdef _WIN32
-      });
+           });
 #else
           })
      .def(py::pickle(
-          [](const LoDTensor &t) {  // __getstate__
+          [](const framework::Tensor &t) {  // __getstate__
            auto holder = t.Holder();
-            PADDLE_ENFORCE_EQ(
-              platform::is_cpu_place(holder->place()), true,
-              platform::errors::PreconditionNotMet(
-                  "LoDTensor is not on CPU."
-                  "Now only LoDTensor on CPU can be serialized."));
-            auto* mmap_writer_allocation =
-              dynamic_cast<memory::allocation::MemoryMapWriterAllocation *>(
-                holder.get());
-            PADDLE_ENFORCE_NOT_NULL(mmap_writer_allocation,
-              platform::errors::PreconditionNotMet(
-                "LoDTensor is not in shared memory."
-                "Now only LoDTensor on shared memory can be serialized."));
+            PADDLE_ENFORCE_EQ(platform::is_cpu_place(holder->place()), true,
+                              platform::errors::PreconditionNotMet(
+                                  "Tensor is not on CPU."
+                                  "Now only Tensor on CPU can be serialized."));
+            auto *mmap_writer_allocation =
+                dynamic_cast<memory::allocation::MemoryMapWriterAllocation *>(
+                    holder.get());
+            PADDLE_ENFORCE_NOT_NULL(
+                mmap_writer_allocation,
+                platform::errors::PreconditionNotMet(
+                    "Tensor is not in shared memory."
+                    "Now only Tensor on shared memory can be serialized."));
            int type_idx = static_cast<int>(t.type());

            return py::make_tuple(mmap_writer_allocation->ipc_name(),
-                                  mmap_writer_allocation->size(),
-                                  type_idx, vectorize(t.dims()), t.lod());
+                                  mmap_writer_allocation->size(), type_idx,
+                                  vectorize(t.dims()), t.lod());
          },
          [](py::tuple t) {  // __setstate__
            if (t.size() != 5)
-              throw std::runtime_error("Invalid LoDTensor state!");
+              throw std::runtime_error("Invalid Tensor state!");

            // 1. Create a new C++ instance
-            LoDTensor tensor;
+            framework::Tensor tensor;

            // 2. Rebuild Allocation
            const std::string &ipc_name = t[0].cast<std::string>();
            size_t size = t[1].cast<size_t>();
            auto shared_reader_holder =
-              memory::allocation::RebuildMemoryMapReaderAllocation(
-                ipc_name, size);
+                memory::allocation::RebuildMemoryMapReaderAllocation(ipc_name,
+                                                                     size);

            // 3. Maintain global fd set
-            VLOG(3) << "LoDTensor ipc name: " << ipc_name;
+            VLOG(3) << "Tensor ipc name: " << ipc_name;
            memory::allocation::MemoryMapFdSet::Instance().Insert(ipc_name);

-            // 4. Rebuild LoDTensor
-            tensor.ResetHolderWithType(shared_reader_holder,
-              static_cast<proto::VarType::Type>(t[2].cast<int>()));
+            // 4. Rebuild Tensor
+            tensor.ResetHolderWithType(
+                shared_reader_holder,
+                static_cast<proto::VarType::Type>(t[2].cast<int>()));
            tensor.Resize(make_ddim(t[3].cast<std::vector<int>>()));
            tensor.set_lod(t[4].cast<framework::LoD>());


--- a/paddle/pten/api/lib/utils/tensor_utils.cc
+++ b/paddle/pten/api/lib/utils/tensor_utils.cc
@@ -31,7 +31,7 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) {
  }
 }

-std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
+std::unique_ptr<pten::DenseTensor> MakePtenDenseTensorBase(
    const paddle::framework::Tensor& src) {
  VLOG(3) << "MakePtenDenseTensor based Tensor.";
  pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()),
@@ -44,15 +44,15 @@ std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
 }

 std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
-    const paddle::framework::LoDTensor& src) {
-  auto out =
-      MakePtenDenseTensor(static_cast<const paddle::framework::Tensor&>(src));
+    const paddle::framework::Tensor& src) {
+  auto out = MakePtenDenseTensorBase(
+      static_cast<const paddle::framework::Tensor&>(src));
  SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod),
         src.lod());
  return std::move(out);
 }

-std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
+std::unique_ptr<pten::DenseTensor> MakePtenDenseTensorBase(
    const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) {
  pten::DenseTensorMeta meta{
      arg_def.dtype, src.dims(), src.layout(), src.offset()};
@@ -71,16 +71,15 @@ std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
 }

 std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
-    const paddle::framework::LoDTensor& src,
-    const pten::TensorArgDef& arg_def) {
-  auto out = MakePtenDenseTensor(
+    const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) {
+  auto out = MakePtenDenseTensorBase(
      static_cast<const paddle::framework::Tensor&>(src), arg_def);
  SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod),
         src.lod());
  return std::move(out);
 }

-pten::Scalar MakePtenScalar(const paddle::framework::LoDTensor& src) {
+pten::Scalar MakePtenScalar(const paddle::framework::Tensor& src) {
  PADDLE_ENFORCE_EQ(src.numel(),
                    1,
                    paddle::platform::errors::InvalidArgument(
@@ -138,7 +137,7 @@ pten::Scalar MakePtenScalarFromVar(const framework::Variable& variable) {
  }
 }

-pten::ScalarArray MakePtenScalarArray(const paddle::framework::LoDTensor& src) {
+pten::ScalarArray MakePtenScalarArray(const paddle::framework::Tensor& src) {
  if (src.type() == paddle::framework::proto::VarType::INT64) {
    return {src.data<int64_t>(), src.numel()};
  } else if (src.type() == paddle::framework::proto::VarType::INT32) {
@@ -295,7 +294,7 @@ std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
  return {};
 }

-void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
+void MovesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
  PADDLE_ENFORCE_NOT_NULL(
      src,
      platform::errors::InvalidArgument(
@@ -311,12 +310,12 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
  dst->set_offset(src->meta().offset);
 }

-void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) {
-  MovesStorage(src, static_cast<paddle::framework::Tensor*>(dst));
+void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
+  MovesStorageBase(src, static_cast<paddle::framework::Tensor*>(dst));
  SetLoD(dst->mutable_lod(), src->lod());
 }

-void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
+void SharesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
  PADDLE_ENFORCE_NOT_NULL(
      src,
      platform::errors::InvalidArgument(
@@ -333,13 +332,13 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
  dst->set_offset(src->meta().offset);
 }

-void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) {
-  SharesStorage(src, static_cast<paddle::framework::Tensor*>(dst));
+void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
+  SharesStorageBase(src, static_cast<paddle::framework::Tensor*>(dst));
  SetLoD(dst->mutable_lod(), src->lod());
 }

-void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
-                           pten::DenseTensor* dst) {
+void ReMakePtenDenseTensorBase(const paddle::framework::Tensor& src,
+                               pten::DenseTensor* dst) {
  VLOG(3) << "ReMakePtenDenseTensor based Tensor.";
  auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
  meta->dims = src.dims();
@@ -361,17 +360,17 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
  shared_storage->ResetAllocation(src.Holder());
 }

-void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
+void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
                           pten::DenseTensor* dst) {
  auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
  SetLoD(&meta->lod, src.lod());
-  ReMakePtenDenseTensor(static_cast<const paddle::framework::Tensor&>(src),
-                        dst);
+  ReMakePtenDenseTensorBase(static_cast<const paddle::framework::Tensor&>(src),
+                            dst);
 }

-void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
-                                   const pten::TensorArgDef& arg_def,
-                                   pten::DenseTensor* dst) {
+void ReMakePtenDenseTensorByArgDefBase(const paddle::framework::Tensor& src,
+                                       const pten::TensorArgDef& arg_def,
+                                       pten::DenseTensor* dst) {
  VLOG(3) << "ReMakePtenDenseTensor based Tensor and TensorArgDef.";
  auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
  meta->dims = src.dims();
@@ -395,12 +394,12 @@ void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
  }
 }

-void ReMakePtenDenseTensorByArgDef(const paddle::framework::LoDTensor& src,
+void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
                                   const pten::TensorArgDef& arg_def,
                                   pten::DenseTensor* dst) {
  auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
  SetLoD(&meta->lod, src.lod());
-  ReMakePtenDenseTensorByArgDef(
+  ReMakePtenDenseTensorByArgDefBase(
      static_cast<const paddle::framework::Tensor&>(src), arg_def, dst);
 }


--- a/paddle/pten/api/lib/utils/tensor_utils.h
+++ b/paddle/pten/api/lib/utils/tensor_utils.h
@@ -33,12 +33,9 @@ namespace experimental {
 std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
    const paddle::framework::Tensor& src);

-std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
-    const paddle::framework::LoDTensor& src);
-
-pten::Scalar MakePtenScalar(const paddle::framework::LoDTensor& src);
+pten::Scalar MakePtenScalar(const paddle::framework::Tensor& src);

-pten::ScalarArray MakePtenScalarArray(const paddle::framework::LoDTensor& src);
+pten::ScalarArray MakePtenScalarArray(const paddle::framework::Tensor& src);

 pten::Scalar MakePtenScalarFromVar(const framework::Variable& variable);

@@ -56,12 +53,8 @@ std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(

 void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);

-void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst);
-
 void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);

-void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst);
-
 /**
 * In order to improve the compatibility state performance, some tricky tool
 * functions are added.
@@ -74,17 +67,10 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst);
 void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
                           pten::DenseTensor* dst);

-void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
-                           pten::DenseTensor* dst);
-
 void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
                                   const pten::TensorArgDef& arg_def,
                                   pten::DenseTensor* dst);

-void ReMakePtenDenseTensorByArgDef(const paddle::framework::LoDTensor& src,
-                                   const pten::TensorArgDef& arg_def,
-                                   pten::DenseTensor* dst);
-
 void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
                                  const pten::TensorArgDef& arg_def,
                                  pten::DenseTensor* dst);

--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -32,6 +32,10 @@ if os.path.exists(legacy_core):
    except Exception as e:
        raise e

+# Patch LoDTensor
+from . import core
+core.LoDTensor = core.Tensor
+
 # import all class inside framework into fluid module
 from . import framework
 from .framework import *
@@ -69,6 +73,7 @@ from .input import embedding, one_hot
 from . import distribute_lookup_table
 from .param_attr import ParamAttr, WeightNormParamAttr
 from .data_feeder import DataFeeder
+
 from .core import LoDTensor, LoDTensorArray, Scope, _Scope
 from .core import CPUPlace, XPUPlace, CUDAPlace, CUDAPinnedPlace, NPUPlace, IPUPlace, MLUPlace
 from .incubate import fleet