diff --git a/paddle/fluid/eager/nan_inf_utils.cc b/paddle/fluid/eager/nan_inf_utils.cc
index f8c06a5afff121bc7e770a94b1d157ae99682274..e15b91e480a6ab5f662e409b4176072a3dce5354 100644
--- a/paddle/fluid/eager/nan_inf_utils.cc
+++ b/paddle/fluid/eager/nan_inf_utils.cc
@@ -15,6 +15,8 @@
 #include "paddle/fluid/eager/nan_inf_utils.h"
 
 #include "paddle/fluid/framework/details/nan_inf_utils_detail.h"
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/selected_rows.h"
 
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 8af48ef51db5c218d80ff34dcddcfe18a2d6046c..2e189e4865f4d820d09d5b135cadbb31f9500279 100755
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -83,26 +83,27 @@ if(WITH_GPU)
     windows_symbolic(tensor_util SRCS tensor_util.cu)
     nv_library(
       tensor
-      SRCS tensor.cc .tensor_util.cu
+      SRCS .tensor_util.cu
       DEPS place memory data_type device_context dense_tensor)
     add_dependencies(tensor tensor_util)
   else()
     nv_library(
       tensor
-      SRCS tensor.cc tensor_util.cu
-      DEPS place memory data_type device_context profiler dense_tensor)
+      SRCS tensor_util.cu
+      DEPS place memory data_type device_context dense_tensor)
   endif()
 elseif(WITH_ROCM)
   hip_library(
     tensor
-    SRCS tensor.cc tensor_util.cu
-    DEPS place memory data_type device_context profiler dense_tensor)
+    SRCS tensor_util.cu
+    DEPS place memory data_type device_context dense_tensor)
 else()
   cc_library(
     tensor
-    SRCS tensor.cc tensor_util.cc
-    DEPS place memory data_type device_context profiler dense_tensor)
+    SRCS tensor_util.cc
+    DEPS place memory data_type device_context dense_tensor)
 endif()
+# target_link(tensor profiler)
 
 cc_test(
   tensor_test
@@ -1167,9 +1168,6 @@ cc_library(
        op_meta_info
        phi_api)
 
-#cc_binary(test_executor SRCS test_executor.cc DEPS executor op_registry ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} )
-#cc_binary(new_executor SRCS new_exec_test.cc DEPS operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler)
-
 set(FLUID_FRAMEWORK_MODULES
     proto_desc
     memory
diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.h b/paddle/fluid/framework/details/nan_inf_utils_detail.h
index 226a1db1d3b8f0fbd7409bccabce5568387d5d87..99186c43e129e7641fba4ffad9d086764ba3aef4 100644
--- a/paddle/fluid/framework/details/nan_inf_utils_detail.h
+++ b/paddle/fluid/framework/details/nan_inf_utils_detail.h
@@ -17,12 +17,9 @@
 #include <string>
 
 #include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/place.h"
 
-namespace phi {
-class DenseTensor;
-}  // namespace phi
-
 namespace paddle {
 namespace framework {
 namespace details {
diff --git a/paddle/fluid/framework/dlpack_tensor_test.cc b/paddle/fluid/framework/dlpack_tensor_test.cc
index 6c19cf3450dbd8b98f2356f2a3faff50c545de75..9e3604e71a245cc7759c2ded46315cd565d16cea 100644
--- a/paddle/fluid/framework/dlpack_tensor_test.cc
+++ b/paddle/fluid/framework/dlpack_tensor_test.cc
@@ -17,6 +17,8 @@
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
+#include "paddle/fluid/platform/device/gpu/gpu_info.h"
+
 namespace paddle {
 namespace framework {
 
diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc
deleted file mode 100644
index 2f0c7c5b2c86f32a0c4fbe2561fc639400f738b1..0000000000000000000000000000000000000000
--- a/paddle/fluid/framework/tensor.cc
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/framework/tensor.h"
-
-DECLARE_bool(use_stream_safe_cuda_allocator);
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-class Allocation;
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
-
-namespace paddle {
-namespace framework {}  // namespace framework
-}  // namespace paddle
diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h
index 8b79d4079118f6faef3d731cadf38dcf43aeabb7..fcb061aa93288fa54a73a26e9879e6343c92acc7 100644
--- a/paddle/fluid/framework/tensor.h
+++ b/paddle/fluid/framework/tensor.h
@@ -14,63 +14,15 @@ limitations under the License. */
 
 #pragma once
 
-#include <cstdint>
-#include <cstring>
-#include <memory>
-#include <typeindex>
-#include <utility>
-#include <vector>
-
-#include "paddle/fluid/framework/data_layout.h"
-#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/mixed_vector.h"
-#include "paddle/fluid/memory/memory.h"
-#include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/place.h"
-#include "paddle/phi/core/ddim.h"
 #include "paddle/phi/core/dense_tensor.h"
 
 namespace paddle {
-
 namespace framework {
 
 using LoD = std::vector<paddle::framework::Vector<size_t>>;
-
-/*
- NOTE(liym27): [ What is TensorInplaceVersion used for? ]
-
- TensorInplaceVersion is a version counter and every Tensor has a version
- counter. It's used to check whether an inplace operation will result in an
- incorrect gradient calculation. Version is incremented when the data of the
- Variable is modified in place.
-
- - Question: In what scenarios will version counters be shared?
- - Answer: When two Variables/VarBases share the same C++ Tensor(its Allocation
- may change), both of them share the same version counter. For examples:
-  1. `z = paddle.assign(input=x, output=y)`, `z` shares the same version counter
-    of `y` because z and y is the same VarBase;
-  2. `y = x.detach()`, `y` shares the same version counter of `x`.
-
- - Question: In what scenarios will version counters NOT be shared?
- - Answer: Replacing a `Variable`'s data by calling `Tensor::ShareDataWith(...)`
- or `Tensor::ShareBufferWith(...)`. Because they share the same Allocation but
- not framework::Tensor.
-
- - Question: Why put the inplace_version_counter_ in framework::Tensor instead
- of Allocation or Variable?
- - Answer:
-  1. Tensor can call ResetHolder() to reset the corresponding Allocation so that
-  the inplace_version_counter_ changes if it's in Allocation, which will lead to
-  confusing information about inplace version.
-  2. If inplace_version_counter_ is in Variable, different VariableWrappers
-  should be able to share the same Variable. However, a VariableWrapper hold a
-  Variable object but not a pointer.
-*/
-
 using Tensor = phi::DenseTensor;
 
 }  // namespace framework
 }  // namespace paddle
-
-#include "paddle/fluid/framework/tensor_impl.h"
diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h
deleted file mode 100644
index b314fb151959e91c02f034fbd7fbd9776b47b625..0000000000000000000000000000000000000000
--- a/paddle/fluid/framework/tensor_impl.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include "paddle/fluid/framework/data_type.h"
-#include "paddle/fluid/memory/memcpy.h"
-#include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/float16.h"
-
-namespace paddle {
-namespace framework {
-
-inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
-  int rank = src.dims().size();
-  PADDLE_ENFORCE_GE(
-      rank,
-      2,
-      platform::errors::InvalidArgument(
-          "'ReshapeToMatrix()' is only used for flatten high rank "
-          "tensors to matrixs. The dimensions of Tensor must be "
-          "greater or equal than 2. "
-          "But received dimensions of Tensor is %d",
-          rank));
-  if (rank == 2) {
-    return src;
-  }
-  Tensor res;
-  res.ShareDataWith(src);
-  res.Resize(phi::flatten_to_2d(src.dims(), num_col_dims));
-  return res;
-}
-
-}  // namespace framework
-}  // namespace paddle
diff --git a/paddle/fluid/framework/tensor_test.cc b/paddle/fluid/framework/tensor_test.cc
index 05dd41eb6ffc56cf9fb945b5e4e06157e5b27fde..fcf255dafc2e00473ec8ed73b299f6defaf738bb 100644
--- a/paddle/fluid/framework/tensor_test.cc
+++ b/paddle/fluid/framework/tensor_test.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/framework/tensor_util.h"
 
 #include <gtest/gtest.h>
 
diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h
index b1bba0f7c35f81b374f84c9fc90d6edef863541a..3c9d1284cefdb447bdba0dad2b8b1b8e152745a0 100644
--- a/paddle/fluid/framework/tensor_util.h
+++ b/paddle/fluid/framework/tensor_util.h
@@ -34,6 +34,7 @@ limitations under the License. */
 #include "paddle/fluid/platform/device/mlu/device_context.h"
 #endif
 
+#include "paddle/fluid/memory/memory.h"
 #include "paddle/phi/core/dense_tensor.h"
 
 namespace paddle {
@@ -580,6 +581,26 @@ inline void TensorToVector(const Tensor& src, std::vector<bool>* dst) {
 
 std::ostream& operator<<(std::ostream& os, const LoD& lod);
 
+inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
+  int rank = src.dims().size();
+  PADDLE_ENFORCE_GE(
+      rank,
+      2,
+      platform::errors::InvalidArgument(
+          "'ReshapeToMatrix()' is only used for flatten high rank "
+          "tensors to matrixs. The dimensions of Tensor must be "
+          "greater or equal than 2. "
+          "But received dimensions of Tensor is %d",
+          rank));
+  if (rank == 2) {
+    return src;
+  }
+  Tensor res;
+  res.ShareDataWith(src);
+  res.Resize(phi::flatten_to_2d(src.dims(), num_col_dims));
+  return res;
+}
+
 }  // namespace framework
 }  // namespace paddle
 
diff --git a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.cu
index f47a65d104513a153bf188ebd9f5b1aba764ca84..046a725b2bd1e3831119b8d5866c013869ca4cee 100644
--- a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.cu
@@ -23,6 +23,7 @@
 #include "NvInferRuntimeCommon.h"
 #include "paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h"
 #include "paddle/fluid/platform/place.h"
+#include "paddle/phi/backends/gpu/gpu_helper.h"
 
 namespace paddle {
 namespace inference {
diff --git a/paddle/fluid/operators/gather_scatter_kernel.h b/paddle/fluid/operators/gather_scatter_kernel.h
index c8a63e4c35a3e33109772d511311a4012d10238c..6aa6e4ff7b8589d705a1214f8dfc337641f7747f 100644
--- a/paddle/fluid/operators/gather_scatter_kernel.h
+++ b/paddle/fluid/operators/gather_scatter_kernel.h
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/platform/device_context.h"
+#include "paddle/fluid/platform/float16.h"
 
 #pragma once
 
diff --git a/paddle/fluid/operators/math/cross_entropy.h b/paddle/fluid/operators/math/cross_entropy.h
index 8cb7fad48e9de8621886aec5c805f8a9fa6655ed..0de10789ba02ee97b3710ac766519217347a0fcd 100644
--- a/paddle/fluid/operators/math/cross_entropy.h
+++ b/paddle/fluid/operators/math/cross_entropy.h
@@ -17,6 +17,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/platform/float16.h"
 #include "paddle/phi/core/hostdevice.h"
 
 namespace paddle {
diff --git a/paddle/fluid/operators/math/sample_prob.h b/paddle/fluid/operators/math/sample_prob.h
index 1e8fb983a94992abe9f8ead34b38f9b78294c19a..ad4d3489c21fea521334ace55392f5246c9c9c5a 100644
--- a/paddle/fluid/operators/math/sample_prob.h
+++ b/paddle/fluid/operators/math/sample_prob.h
@@ -20,6 +20,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/math/sampler.h"
+#include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/core/ddim.h"
 
 namespace paddle {
diff --git a/paddle/fluid/operators/math/softmax_impl.h b/paddle/fluid/operators/math/softmax_impl.h
index 18cd3e7261dd7967229c6d7f4658d3f7dde6e682..7cf7b25233550ae57dbe2c12666a4adbc4703b3c 100644
--- a/paddle/fluid/operators/math/softmax_impl.h
+++ b/paddle/fluid/operators/math/softmax_impl.h
@@ -19,7 +19,11 @@ limitations under the License. */
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/operators/jit/kernels.h"
 #include "paddle/fluid/operators/math/cpu_vec.h"
+#include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/cpu_info.h"
+#include "paddle/fluid/platform/float16.h"
+#include "paddle/phi/backends/cpu/cpu_context.h"
+#include "paddle/phi/backends/gpu/gpu_context.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/fluid/operators/math/unpooling.h b/paddle/fluid/operators/math/unpooling.h
index 30b6db78f31b6c3126d98f65162f0c04a561a6ad..1b0f52dacd970df4ff5a2695de9624819209b2d7 100644
--- a/paddle/fluid/operators/math/unpooling.h
+++ b/paddle/fluid/operators/math/unpooling.h
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #pragma once
 #include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/platform/device_context.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/phi/core/dense_tensor.h b/paddle/phi/core/dense_tensor.h
index 09098705b11e44a1acad18e1e16d22a7bef6f741..d16a019c7ab0df756d3db372b77787b498e22aee 100644
--- a/paddle/phi/core/dense_tensor.h
+++ b/paddle/phi/core/dense_tensor.h
@@ -22,7 +22,7 @@ limitations under the License. */
 /* @jim19930609: Move to MKLDNN_Tensor in the future
  */
 #ifdef PADDLE_WITH_MKLDNN
-#include "dnnl.hpp"
+#include "dnnl.hpp"  // NOLINT
 #endif
 
 namespace phi {
@@ -174,6 +174,36 @@ class DenseTensor : public TensorBase,
   /* Temporarily put InplaceVersion inside DenseTensor.
   Will move to AutogradMeta as soon as we switch to Eager Dygraph.
   */
+  /*
+  NOTE(liym27): [ What is TensorInplaceVersion used for? ]
+
+  TensorInplaceVersion is a version counter and every Tensor has a version
+  counter. It's used to check whether an inplace operation will result in an
+  incorrect gradient calculation. Version is incremented when the data of the
+  Variable is modified in place.
+
+  - Question: In what scenarios will version counters be shared?
+  - Answer: When two Variables/VarBases share the same C++ Tensor(its Allocation
+  may change), both of them share the same version counter. For examples:
+   1. `z = paddle.assign(input=x, output=y)`, `z` shares the same version
+  counter of `y` because z and y is the same VarBase;
+   2. `y = x.detach()`, `y` shares the same version counter of `x`.
+
+  - Question: In what scenarios will version counters NOT be shared?
+  - Answer: Replacing a `Variable`'s data by calling
+  `Tensor::ShareDataWith(...)` or `Tensor::ShareBufferWith(...)`. Because they
+  share the same Allocation but not framework::Tensor.
+
+  - Question: Why put the inplace_version_counter_ in framework::Tensor instead
+  of Allocation or Variable?
+  - Answer:
+   1. Tensor can call ResetHolder() to reset the corresponding Allocation so
+  that the inplace_version_counter_ changes if it's in Allocation, which will
+  lead to confusing information about inplace version.
+   2. If inplace_version_counter_ is in Variable, different VariableWrappers
+   should be able to share the same Variable. However, a VariableWrapper hold a
+   Variable object but not a pointer.
+ */
   class InplaceVersion {
    public:
     bool IsUnique() const { return inplace_version_ == 0; }