From 62c78e261d0eb7935610448c6037d082028c8ae3 Mon Sep 17 00:00:00 2001
From: Chen Weihang <chenweihang@baidu.com>
Date: Mon, 28 Aug 2023 10:19:01 +0800
Subject: [PATCH] [AutoParallel] Simplify PADDLE_WITH_DISTRIBUTE marco using
 (#56361)

* simplify with dist marco

* polish error message format

* fix vtable error

* fix cmake error

* fix winsock redefined error

* fix windows compile error

* fix windows conpile failed

* fix merge error

* fix vec compile error

* add port.h into test_cpu_vec

* fix merge error

* try to fix winsock error
---
 paddle/fluid/eager/grad_node_info.cc          |  8 +----
 paddle/fluid/eager/grad_tensor_holder.cc      | 10 ++----
 paddle/fluid/pybind/auto_parallel_py.cc       |  4 ---
 paddle/fluid/pybind/eager.cc                  | 34 +++++++++----------
 paddle/fluid/pybind/eager_method.cc           | 27 +++++++++++----
 paddle/fluid/pybind/eager_properties.cc       |  6 +++-
 paddle/fluid/pybind/eager_utils.cc            | 27 +++++++++++----
 paddle/fluid/pybind/eager_utils.h             | 10 ++----
 paddle/fluid/pybind/tensor.cc                 |  4 +--
 paddle/phi/api/lib/api_gen_utils.cc           |  4 ---
 paddle/phi/api/lib/api_gen_utils.h            |  2 --
 paddle/phi/api/lib/data_transform.cc          |  6 +---
 paddle/phi/api/lib/data_transform.h           |  2 --
 paddle/phi/api/lib/kernel_dispatch.h          |  9 +----
 paddle/phi/api/lib/tensor.cc                  |  8 +----
 paddle/phi/backends/dynload/dynamic_loader.cc |  1 +
 .../distributed/auto_parallel/CMakeLists.txt  | 23 ++++---------
 .../distributed/auto_parallel/dist_tensor.h   |  2 +-
 .../reshard_all_gather_functor.cc             |  1 -
 .../auto_parallel/reshard_utils.cc            |  6 +++-
 .../distributed/auto_parallel/reshard_utils.h |  4 +--
 paddle/phi/core/enforce.h                     |  1 -
 paddle/phi/core/meta_tensor.cc                | 29 ++++------------
 paddle/phi/core/tensor_utils.h                |  1 +
 paddle/phi/core/utils/type_info.cc            |  9 ++---
 test/cpp/phi/kernels/test_cpu_vec.cc          |  1 +
 26 files changed, 98 insertions(+), 141 deletions(-)

diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc
index 9df0a25d9bc..718bc51b4f5 100644
--- a/paddle/fluid/eager/grad_node_info.cc
+++ b/paddle/fluid/eager/grad_node_info.cc
@@ -27,11 +27,9 @@
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/core/dense_tensor.h"
 
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/sparse_coo_tensor.h"
 #include "paddle/phi/core/sparse_csr_tensor.h"
-#ifdef PADDLE_WITH_DISTRIBUTE
-#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
 
 /**
  * Implementation of GradNodeBase, Edge and GradTensorHolder.
@@ -125,14 +123,12 @@ void GradNodeBase::SetGradInMeta(const paddle::Tensor& fwd_out,
     phi::SparseCsrTensor* csr_tensor =
         static_cast<phi::SparseCsrTensor*>(fwd_out.impl().get());
     dense_tensor = csr_tensor->mutable_non_zero_elements();
-#ifdef PADDLE_WITH_DISTRIBUTE
   } else if (phi::distributed::DistTensor::classof(fwd_out.impl().get())) {
     // TODO(chenweihang): DistTensor contains global and local meta, here
     // only set the local meta now, we should set global meta later
     dense_tensor =
         &(static_cast<phi::distributed::DistTensor*>(fwd_out.impl().get())
               ->value());
-#endif
   } else {
     VLOG(7) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
                "non-DenseTensor argument.";
@@ -268,7 +264,6 @@ void GradNodeBase::SetGradOutMeta(const paddle::Tensor& fwd_in,
                                           "which is illegal."));
       meta.SetTensorMeta(dense_tensor->meta());
       meta.SetPlace(fwd_in.place());
-#ifdef PADDLE_WITH_DISTRIBUTE
     } else if (phi::distributed::DistTensor::classof(fwd_in.impl().get())) {
       const phi::DenseTensor& dense_tensor =
           static_cast<phi::distributed::DistTensor*>(fwd_in.impl().get())
@@ -281,7 +276,6 @@ void GradNodeBase::SetGradOutMeta(const paddle::Tensor& fwd_in,
                                           "which is illegal."));
       meta.SetTensorMeta(dense_tensor.meta());
       meta.SetPlace(fwd_in.place());
-#endif
     } else {
       VLOG(7)
           << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc
index 4dd5bb7a2f5..056141bb92e 100644
--- a/paddle/fluid/eager/grad_tensor_holder.cc
+++ b/paddle/fluid/eager/grad_tensor_holder.cc
@@ -18,12 +18,10 @@
 #include "paddle/fluid/framework/convert_utils.h"
 #include "paddle/fluid/framework/var_type.h"
 #include "paddle/fluid/imperative/gradient_accumulator.h"
-#include "paddle/phi/core/sparse_coo_tensor.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
-#ifdef PADDLE_WITH_DISTRIBUTE
 #include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
 #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
+#include "paddle/phi/core/sparse_coo_tensor.h"
+#include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace egr {
 
@@ -87,7 +85,6 @@ void GradTensorHolder::CopyValueFromTensor(size_t slot_id,
       } else if (t.is_sparse_csr_tensor() || t.is_sparse_coo_tensor()) {
         buffer_[slot_id][rank] =
             paddle::experimental::sparse::full_like(t, 1, t.dtype());
-#ifdef PADDLE_WITH_DISTRIBUTE
       } else if (t.is_dist_tensor()) {
         VLOG(6) << "Create a new dist tensor.";
         // TODO(chenweihang): we need a shard_tensor API in C++
@@ -99,7 +96,6 @@ void GradTensorHolder::CopyValueFromTensor(size_t slot_id,
             *dense_temp, phi::distributed::TensorDistAttr());
         temp.set_impl(dist_tensor);
         buffer_[slot_id][rank] = temp;
-#endif
       } else {
         PADDLE_THROW(paddle::platform::errors::Fatal(
             "Only Support DENSE_TENSOR, SPARSE_COO_TENSOR, SPARSE_CSR_TENSOR "
@@ -195,10 +191,8 @@ void GradTensorHolder::add(size_t slot_id,
                                                         &buffer_values);
         }
       }
-#ifdef PADDLE_WITH_DISTRIBUTE
     } else if (t.is_dist_tensor()) {
       buffer_tensor = add_ad_func(t, buffer_tensor);
-#endif
     } else {
       // TODO(jiabin): Support Other TensorBase later
       // TODO(zhanlve): Replace SelectedRowsAddTensor with add_dygraph_function
diff --git a/paddle/fluid/pybind/auto_parallel_py.cc b/paddle/fluid/pybind/auto_parallel_py.cc
index 977583daf73..e03292faa9e 100644
--- a/paddle/fluid/pybind/auto_parallel_py.cc
+++ b/paddle/fluid/pybind/auto_parallel_py.cc
@@ -28,11 +28,9 @@
 
 #include "paddle/fluid/distributed/auto_parallel/spmd_rules/common.h"
 #include "paddle/fluid/distributed/auto_parallel/spmd_rules/dist_tensor_spec.h"
-#ifdef PADDLE_WITH_DISTRIBUTE
 #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/distributed/auto_parallel/r_to_s_reshard_function.h"
 #include "paddle/phi/core/distributed/auto_parallel/s_to_r_reshard_function.h"
-#endif
 
 namespace py = pybind11;
 
@@ -114,7 +112,6 @@ static inline void reset_operator_dist_attr(OperatorDistAttr *dist_attr) {
 }
 
 void BindAutoParallel(py::module *m) {
-#ifdef PADDLE_WITH_DISTRIBUTE
   auto ReshardFunction =
       py::class_<phi::distributed::ReshardFunction>(*m, "ReshardFunction")
           .def(
@@ -151,7 +148,6 @@ void BindAutoParallel(py::module *m) {
   py::class_<phi::distributed::SToRReshardFunction>(
       *m, "SToRReshardFunction", ReshardFunction)
       .def(py::init<>());
-#endif
 
   py::class_<ProcessMesh>(*m, "ProcessMesh")
       .def(py::init<>())
diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index d500d8241da..f545679c902 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -40,14 +40,11 @@ limitations under the License. */
 #include "paddle/fluid/framework/python_headers.h"
 #include "paddle/fluid/pybind/exception.h"
 #include "paddle/fluid/pybind/tensor_py.h"
-#include "paddle/phi/core/string_tensor.h"
-
-#ifdef PADDLE_WITH_DISTRIBUTE
 #include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
 #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
+#include "paddle/phi/core/string_tensor.h"
 using phi::distributed::DistTensor;
 using phi::distributed::TensorDistAttr;
-#endif
 
 namespace paddle {
 namespace pybind {
@@ -137,7 +134,6 @@ void EmptyStringTensorInitializer(TensorObject* self,
   self->tensor.set_impl(string_tensor);
 }
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 void CreateDistTensorWithNumpyValue(TensorObject* self,
                                     const std::string& name,
                                     const paddle::platform::Place& place,
@@ -149,6 +145,7 @@ void CreateDistTensorWithNumpyValue(TensorObject* self,
                                     framework::proto::VarType::Type dtype =
                                         paddle::framework::proto::VarType::FP32,
                                     const std::vector<int>& dims = {0}) {
+#ifdef PADDLE_WITH_DISTRIBUTE
   auto ddims = phi::make_ddim(dims);
   self->tensor.set_name(name);
   auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->tensor));
@@ -203,8 +200,13 @@ void CreateDistTensorWithNumpyValue(TensorObject* self,
             << ") have not GradNode, add GradNodeAccumulation"
             << autograd_meta->GradNode() << " for it.";
   }
-}
+#else
+  PADDLE_THROW(platform::errors::Unavailable(
+      "The numpy value-based initialization of (Dist)Tensor is not supported "
+      "in the current PaddlePaddle, please recompile and install PaddlePaddle "
+      "with the option of `WITH_DISTRIBUTE=ON`."));
 #endif
+}
 
 void InitTensorWithNumpyValue(TensorObject* self,
                               const py::object& array,
@@ -264,12 +266,12 @@ void InitStringTensorWithNumpyValue(TensorObject* self, const py::object& obj) {
   }
 }
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 void InitDistTensorWithTensor(TensorObject* self,
                               const paddle::Tensor& src,
                               const paddle::platform::Place& place,
                               const std::string& name,
                               const TensorDistAttr& dist_attr) {
+#ifdef PADDLE_WITH_DISTRIBUTE
   PADDLE_ENFORCE(src.is_dense_tensor(),
                  paddle::platform::errors::InvalidArgument(
                      "DistTensor can only initialize by DenseTensor"));
@@ -293,8 +295,13 @@ void InitDistTensorWithTensor(TensorObject* self,
   } else {
     egr::EagerUtils::autograd_meta(&(self->tensor))->SetPersistable(false);
   }
-}
+#else
+  PADDLE_THROW(platform::errors::Unavailable(
+      "The tensor-based initialization of (Dist)Tensor is not supported "
+      "in the current PaddlePaddle, please recompile and install PaddlePaddle "
+      "with the option of `WITH_DISTRIBUTE=ON`."));
 #endif
+}
 
 void InitTensorWithTensor(TensorObject* self,
                           const paddle::Tensor& src,
@@ -393,7 +400,6 @@ paddle::platform::Place ParsePlace(
   return place;
 }
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 TensorDistAttr ParseDistAttrArgs(
     std::unordered_map<std::string, PyObject*> kws_map,
     std::unordered_map<std::string, Py_ssize_t> kw_order_map,
@@ -410,7 +416,6 @@ TensorDistAttr ParseDistAttrArgs(
   }
   return dist_attr;
 }
-#endif
 
 // boolean arguments: zero_copy, stop_gradient, persistable
 int ParseBooleanArgs(std::string key,
@@ -507,7 +512,6 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr,
   stop_gradient = ParseBooleanArgs(
       "stop_gradient", kws_map, kw_order_map, args, flag_kwargs, args_num);
 
-#ifdef PADDLE_WITH_DISTRIBUTE
   TensorDistAttr dist_attr =
       ParseDistAttrArgs(kws_map, kw_order_map, args, flag_kwargs, args_num);
 
@@ -522,7 +526,6 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr,
                                    zero_copy);
     return;
   }
-#endif
 
   EmptyTensorInitializer(
       py_tensor_ptr, act_name, place, persistable, stop_gradient);
@@ -554,10 +557,8 @@ void AutoInitTensorByTensor(TensorObject* py_tensor_ptr,
   place = ParsePlace(kws_map, kw_order_map, args, flag_kwargs, args_num);
   act_name = ParseName(kws_map, kw_order_map, args, flag_kwargs, args_num);
 
-#ifdef PADDLE_WITH_DISTRIBUTE
   TensorDistAttr dist_attr =
       ParseDistAttrArgs(kws_map, kw_order_map, args, flag_kwargs, args_num);
-#endif
 
   if (init_by_egr_tensor) {
     paddle::Tensor src_tensor;
@@ -577,16 +578,13 @@ void AutoInitTensorByTensor(TensorObject* py_tensor_ptr,
             "way."));
       }
     }
-#ifdef PADDLE_WITH_DISTRIBUTE
+
     if (!dist_attr.empty()) {
       InitDistTensorWithTensor(
           py_tensor_ptr, src_tensor, place, act_name, dist_attr);
     } else {
       InitTensorWithTensor(py_tensor_ptr, src_tensor, place, act_name);
     }
-#else
-    InitTensorWithTensor(py_tensor_ptr, src_tensor, place, act_name);
-#endif
   } else {
     // init by framework tensor
     phi::DenseTensor src_tensor;
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index fd43d99aec3..f667602a493 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -60,13 +60,11 @@ typedef SSIZE_T ssize_t;
 #include "paddle/fluid/pybind/tensor_py.h"
 #include "paddle/phi/api/lib/data_transform.h"
 #include "paddle/phi/core/ddim.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/flags.h"
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/utils/pybind.h"
-#ifdef PADDLE_WITH_DISTRIBUTE
-#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
 
 PHI_DECLARE_bool(set_to_1d);
 DECLARE_bool(use_stride_kernel);
@@ -258,8 +256,8 @@ static PyObject* tensor_method_numpy(TensorObject* self,
                            place,
                            dense_tensor->Holder()->ptr(),
                            dense_tensor->Holder()->size());
-#ifdef PADDLE_WITH_DISTRIBUTE
     } else if (self->tensor.is_dist_tensor()) {
+#ifdef PADDLE_WITH_DISTRIBUTE
       // TODO(chenweihang): deal with DistTensor as local DenseTensor now,
       // if the local DenseTensor is shard or partial, do gather or reduce?
       VLOG(6) << "Getting DistTensor's numpy value";
@@ -278,6 +276,13 @@ static PyObject* tensor_method_numpy(TensorObject* self,
                            place,
                            dense_tensor.Holder()->ptr(),
                            dense_tensor.Holder()->size());
+#else
+      PADDLE_THROW(
+          platform::errors::Unavailable("The `numpy()` method of (Dist)Tensor "
+                                        "is not supported in the current "
+                                        "PaddlePaddle, please recompile and "
+                                        "installPaddlePaddle with the option "
+                                        "of `WITH_DISTRIBUTE=ON`."));
 #endif
     } else {
       VLOG(6) << "Getting DenseTensor's numpy value";
@@ -320,8 +325,8 @@ static PyObject* tensor_method_numpy(TensorObject* self,
                                       dense_tensor->Holder()->ptr(),
                                       dense_tensor->Holder()->size(),
                                       kind);
-#ifdef PADDLE_WITH_DISTRIBUTE
     } else if (self->tensor.is_dist_tensor()) {
+#ifdef PADDLE_WITH_DISTRIBUTE
       VLOG(6) << "Getting DistTensor's numpy value";
       auto* dist_tensor =
           static_cast<phi::distributed::DistTensor*>(self->tensor.impl().get());
@@ -335,6 +340,13 @@ static PyObject* tensor_method_numpy(TensorObject* self,
                                       dense_tensor.Holder()->ptr(),
                                       dense_tensor.Holder()->size(),
                                       kind);
+#else
+      PADDLE_THROW(
+          platform::errors::Unavailable("The `numpy()` method of (Dist)Tensor "
+                                        "is not supported in the current "
+                                        "PaddlePaddle, please recompile and "
+                                        "installPaddlePaddle with the option "
+                                        "of `WITH_DISTRIBUTE=ON`."));
 #endif
     } else {
       VLOG(6) << "Getting DenseTensor's numpy value";
@@ -1133,7 +1145,10 @@ static PyObject* tensor_method_get_underline_tensor(TensorObject* self,
     VLOG(6) << "dist tensor: " << tensor->defined();
     return ToPyObject(tensor);
 #else
-    RETURN_PY_NONE
+    PADDLE_THROW(platform::errors::Unavailable(
+        "The `get_tensor()` method of (Dist)Tensor is not supported in the "
+        "current PaddlePaddle, please recompile and installPaddlePaddle "
+        "with the option of `WITH_DISTRIBUTE=ON`."));
 #endif
   } else {
     RETURN_PY_NONE
diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc
index 5ada60c81da..3abae045231 100644
--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -380,7 +380,11 @@ PyObject* tensor_properties_get_dist_attr(TensorObject* self, void* closure) {
         static_cast<phi::distributed::DistTensor*>(self->tensor.impl().get());
     return ToPyObject(&dist_tensor->dist_attr());
 #else
-    RETURN_PY_NONE
+    PADDLE_THROW(platform::errors::Unavailable(
+        "The `dist_attr()` property of (Dist)Tensor is not supported in the "
+        "current PaddlePaddle, please recompile and installPaddlePaddle with "
+        "the "
+        "option of `WITH_DISTRIBUTE=ON`."));
 #endif
   } else {
     RETURN_PY_NONE
diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc
index c1e7c2759c9..f718fab1260 100644
--- a/paddle/fluid/pybind/eager_utils.cc
+++ b/paddle/fluid/pybind/eager_utils.cc
@@ -61,9 +61,7 @@ extern PyTypeObject* g_customplace_pytype;
 extern PyTypeObject* g_framework_tensor_pytype;
 extern PyTypeObject* g_framework_lodtensorarray_pytype;
 extern PyTypeObject* g_jit_function_pytype;
-#ifdef PADDLE_WITH_DISTRIBUTE
 extern PyTypeObject* g_tensor_dist_attr_pytype;
-#endif
 
 int TensorDtype2NumpyDtype(phi::DataType dtype) {
   switch (dtype) {
@@ -549,9 +547,9 @@ platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos) {
   return place;
 }
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 using phi::distributed::TensorDistAttr;
 TensorDistAttr CastPyArg2DistAttr(PyObject* obj, ssize_t arg_pos) {
+#ifdef PADDLE_WITH_DISTRIBUTE
   if (PyObject_IsInstance(
           obj, reinterpret_cast<PyObject*>(g_tensor_dist_attr_pytype))) {
     return ::pybind11::handle(obj).cast<TensorDistAttr>();
@@ -562,8 +560,13 @@ TensorDistAttr CastPyArg2DistAttr(PyObject* obj, ssize_t arg_pos) {
         arg_pos + 1,
         reinterpret_cast<PyTypeObject*>(obj->ob_type)->tp_name));
   }
-}
+#else
+  PADDLE_THROW(platform::errors::Unavailable(
+      "The parsing of `DistAttr` is not supported in the current "
+      "PaddlePaddle, please recompile and installPaddlePaddle with the option "
+      "of `WITH_DISTRIBUTE=ON`."));
 #endif
+}
 
 phi::DenseTensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos) {
   if (PyObject_TypeCheck(obj, g_framework_tensor_pytype)) {
@@ -901,19 +904,31 @@ PyObject* ToPyObject(const std::vector<ir::OpResult>& value) {
   return result;
 }
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 PyObject* ToPyObject(const phi::distributed::DistTensor* value) {
+#ifdef PADDLE_WITH_DISTRIBUTE
   auto obj = ::pybind11::cast(value, py::return_value_policy::reference);
   obj.inc_ref();
   return obj.ptr();
+#else
+  PADDLE_THROW(platform::errors::Unavailable(
+      "DistTensor to PyObject is not supported in the current "
+      "PaddlePaddle, please recompile and installPaddlePaddle with the option "
+      "of `WITH_DISTRIBUTE=ON`."));
+#endif
 }
 
 PyObject* ToPyObject(const phi::distributed::TensorDistAttr* value) {
+#ifdef PADDLE_WITH_DISTRIBUTE
   auto obj = ::pybind11::cast(value, py::return_value_policy::reference);
   obj.inc_ref();
   return obj.ptr();
-}
+#else
+  PADDLE_THROW(platform::errors::Unavailable(
+      "TensorDistAttr to PyObject is not supported in the current "
+      "PaddlePaddle, please recompile and installPaddlePaddle with the option "
+      "of `WITH_DISTRIBUTE=ON`."));
 #endif
+}
 
 PyObject* ToPyObject(const phi::SelectedRows* value) {
   auto obj = ::pybind11::cast(value, py::return_value_policy::reference);
diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h
index 6210e0b9138..ad7ec2d42c4 100644
--- a/paddle/fluid/pybind/eager_utils.h
+++ b/paddle/fluid/pybind/eager_utils.h
@@ -35,14 +35,12 @@ typedef SSIZE_T ssize_t;
 #include "paddle/phi/common/int_array.h"
 #include "paddle/phi/common/scalar.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/selected_rows.h"
 #include "paddle/utils/pybind.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
-#ifdef PADDLE_WITH_DISTRIBUTE
-#include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
-#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
 
 namespace paddle {
 class CustomOpKernelContext;
@@ -120,10 +118,8 @@ PyObject* ToPyObject(const std::vector<std::vector<paddle::Tensor>>& value,
                      bool return_py_none_if_not_initialize = false);
 PyObject* ToPyObject(const platform::Place& value);
 PyObject* ToPyObject(const phi::DenseTensor* value);
-#ifdef PADDLE_WITH_DISTRIBUTE
 PyObject* ToPyObject(const phi::distributed::DistTensor* value);
 PyObject* ToPyObject(const phi::distributed::TensorDistAttr* value);
-#endif
 PyObject* ToPyObject(const phi::SelectedRows* value);
 PyObject* ToPyObject(const paddle::framework::proto::VarType::Type& dtype);
 PyObject* ToPyObject(const paddle::framework::proto::VarType& type);
@@ -314,10 +310,8 @@ paddle::DataType CastPyArg2DataTypeDirectly(PyObject* obj,
                                             const std::string& op_type,
                                             ssize_t arg_pos);
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 phi::distributed::TensorDistAttr CastPyArg2DistAttr(PyObject* obj,
                                                     ssize_t arg_pos);
-#endif
 
 paddle::optional<paddle::Tensor> GetOptionalTensorFromArgs(
     const std::string& op_type,
diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc
index f36da760495..0bc22b37366 100644
--- a/paddle/fluid/pybind/tensor.cc
+++ b/paddle/fluid/pybind/tensor.cc
@@ -170,13 +170,11 @@ limitations under the License. */
 #include "paddle/fluid/pybind/eager_utils.h"
 #include "paddle/fluid/pybind/tensor.h"
 #include "paddle/phi/api/ext/op_meta_info.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/flags.h"
 #include "paddle/phi/kernels/autotune/cache.h"
 #include "paddle/phi/kernels/autotune/switch_autotune.h"
 #include "pybind11/stl.h"
-#ifdef PADDLE_WITH_DISTRIBUTE
-#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
 
 PHI_DECLARE_bool(use_mkldnn);
 PHI_DECLARE_bool(use_shm_cache);
diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc
index 12846cf0f58..73a1ed4c7fd 100644
--- a/paddle/phi/api/lib/api_gen_utils.cc
+++ b/paddle/phi/api/lib/api_gen_utils.cc
@@ -21,10 +21,8 @@ DECLARE_bool(use_stride_kernel);
 
 #include "glog/logging.h"
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 #include "paddle/phi/core/distributed/auto_parallel/dist_attr.h"
 #include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
 
 namespace paddle {
 namespace experimental {
@@ -532,7 +530,6 @@ void TransStride(phi::DeviceContext* dev_ctx,
                  phi::SelectedRows* from,
                  phi::SelectedRows* to) {}
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 /* ------------------ for auto parallel ----------------------- */
 
 phi::distributed::DistTensor* SetKernelDistOutput(Tensor* out) {
@@ -548,7 +545,6 @@ phi::distributed::DistTensor* SetKernelDistOutput(Tensor* out) {
   }
   return nullptr;
 }
-#endif
 
 }  // namespace experimental
 }  // namespace paddle
diff --git a/paddle/phi/api/lib/api_gen_utils.h b/paddle/phi/api/lib/api_gen_utils.h
index 1b552bf94ea..605423b431a 100644
--- a/paddle/phi/api/lib/api_gen_utils.h
+++ b/paddle/phi/api/lib/api_gen_utils.h
@@ -137,11 +137,9 @@ void TransStrideLegacy(phi::DeviceContext* dev_ctx,
                        phi::DenseTensor* from,
                        phi::DenseTensor* to);
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 /* ------------------ for auto parallel ----------------------- */
 
 phi::distributed::DistTensor* SetKernelDistOutput(Tensor* out);
-#endif
 
 }  // namespace experimental
 }  // namespace paddle
diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc
index 68aaf0367b7..3d717c6dbfe 100644
--- a/paddle/phi/api/lib/data_transform.cc
+++ b/paddle/phi/api/lib/data_transform.cc
@@ -21,15 +21,13 @@ limitations under the License. */
 #include "paddle/phi/api/lib/kernel_dispatch.h"
 #include "paddle/phi/api/lib/utils/allocator.h"
 #include "paddle/phi/backends/context_pool.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/core/visit_type.h"
 #include "paddle/phi/kernels/cast_kernel.h"
 #include "paddle/phi/kernels/contiguous_kernel.h"
 #include "paddle/phi/kernels/transfer_layout_kernel.h"
-#ifdef PADDLE_WITH_DISTRIBUTE
-#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
 
 DECLARE_bool(use_stride_kernel);
 
@@ -597,7 +595,6 @@ void TransDataBackend(const phi::SelectedRows* tensor,
   }
 }
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 /* ------------------ for auto parallel ----------------------- */
 
 std::shared_ptr<phi::distributed::DistTensor> PrepareDataForDistTensor(
@@ -634,7 +631,6 @@ std::shared_ptr<phi::distributed::DistTensor> PrepareDataForDistTensor(
   }
   return nullptr;
 }
-#endif
 
 }  // namespace experimental
 }  // namespace paddle
diff --git a/paddle/phi/api/lib/data_transform.h b/paddle/phi/api/lib/data_transform.h
index 642ddae8b08..bc59ac8cfa7 100644
--- a/paddle/phi/api/lib/data_transform.h
+++ b/paddle/phi/api/lib/data_transform.h
@@ -171,7 +171,6 @@ inline bool NeedTransformPlace(const phi::Place& src_place,
   return ret;
 }
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 /* ------------------ for auto parallel ----------------------- */
 
 // TODO(chenweihang): impl Reshard input and output function
@@ -180,7 +179,6 @@ std::shared_ptr<phi::distributed::DistTensor> PrepareDataForDistTensor(
     const phi::TensorArgDef& target_args_def,
     const TransformFlag& transform_flag,
     bool is_stride_kernel);
-#endif
 
 }  // namespace experimental
 }  // namespace paddle
diff --git a/paddle/phi/api/lib/kernel_dispatch.h b/paddle/phi/api/lib/kernel_dispatch.h
index d309902c11d..4fd684b0bd6 100644
--- a/paddle/phi/api/lib/kernel_dispatch.h
+++ b/paddle/phi/api/lib/kernel_dispatch.h
@@ -24,14 +24,11 @@ limitations under the License. */
 #include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/layout.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/selected_rows.h"
 #include "paddle/phi/core/sparse_coo_tensor.h"
 #include "paddle/phi/core/sparse_csr_tensor.h"
 
-#ifdef PADDLE_WITH_DISTRIBUTE
-#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
-
 // TODO(chenweihang): split Key, Kernel, Factory into diff files
 #include "paddle/phi/core/kernel_factory.h"
 
@@ -173,7 +170,6 @@ struct KernelTypeParser : ArgsIterator<KernelTypeParser> {
   }
 };
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 /* ------------------ for auto parallel ----------------------- */
 
 struct DistTensorTypeParser : ArgsIterator<DistTensorTypeParser> {
@@ -201,7 +197,6 @@ struct DistTensorTypeParser : ArgsIterator<DistTensorTypeParser> {
     // do nothing
   }
 };
-#endif
 
 }  // namespace detail
 
@@ -235,12 +230,10 @@ DataLayout ParseLayout(DataLayout layout);
 DataLayout ParseLayout(const Tensor& tensor);
 DataLayout ParseLayoutWithInputOrder(DataLayout layout, const Tensor& tensor);
 
-#ifdef PADDLE_WITH_DISTRIBUTE
 template <typename... Args>
 bool AllInputsAreDistTensor(const Args&... args) {
   return detail::DistTensorTypeParser().apply(args...).result;
 }
-#endif
 
 }  // namespace experimental
 }  // namespace paddle
diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc
index 9924c95cd07..c6b055acf4e 100644
--- a/paddle/phi/api/lib/tensor.cc
+++ b/paddle/phi/api/lib/tensor.cc
@@ -26,6 +26,7 @@ limitations under the License. */
 #include "paddle/phi/backends/gpu/gpu_info.h"
 #include "paddle/phi/core/ddim.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/selected_rows.h"
 #include "paddle/phi/core/sparse_coo_tensor.h"
@@ -34,9 +35,6 @@ limitations under the License. */
 #include "paddle/phi/core/tensor_base.h"
 #include "paddle/phi/core/tensor_meta.h"
 #include "paddle/phi/core/tensor_utils.h"
-#ifdef PADDLE_WITH_DISTRIBUTE
-#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
 
 namespace paddle {
 
@@ -141,11 +139,7 @@ bool Tensor::is_dense_tensor() const {
   return phi::DenseTensor::classof(impl_.get());
 }
 bool Tensor::is_dist_tensor() const {
-#ifdef PADDLE_WITH_DISTRIBUTE
   return phi::distributed::DistTensor::classof(impl_.get());
-#else
-  return false;
-#endif
 }
 bool Tensor::is_selected_rows() const {
   return phi::SelectedRows::classof(impl_.get());
diff --git a/paddle/phi/backends/dynload/dynamic_loader.cc b/paddle/phi/backends/dynload/dynamic_loader.cc
index 66812bae121..4621a9c3dda 100644
--- a/paddle/phi/backends/dynload/dynamic_loader.cc
+++ b/paddle/phi/backends/dynload/dynamic_loader.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/phi/backends/dynload/cupti_lib_path.h"
+#include "paddle/phi/backends/dynload/port.h"
 #include "paddle/phi/core/enforce.h"
 
 #if defined(_WIN32)
diff --git a/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt b/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt
index 91cbe4a3ff4..0aee1b53638 100644
--- a/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt
+++ b/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt
@@ -1,20 +1,5 @@
 proto_library(auto_parallel_proto SRCS auto_parallel.proto)
 
-set(DISTRIBUTED_SRCS "")
-
-if(WITH_DISTRIBUTE)
-  list(
-    APPEND
-    DISTRIBUTED_SRCS
-    dist_tensor.cc
-    reshard_function.cc
-    reshard_split_functor.cc
-    reshard_concat_functor.cc
-    reshard_all_gather_functor.cc
-    r_to_s_reshard_function.cc
-    s_to_r_reshard_function.cc)
-endif()
-
 collect_srcs(
   core_srcs
   SRCS
@@ -23,4 +8,10 @@ collect_srcs(
   dist_attr.cc
   dist_mapper.cc
   reshard_utils.cc
-  ${DISTRIBUTED_SRCS})
+  dist_tensor.cc
+  reshard_function.cc
+  reshard_split_functor.cc
+  reshard_concat_functor.cc
+  reshard_all_gather_functor.cc
+  r_to_s_reshard_function.cc
+  s_to_r_reshard_function.cc)
diff --git a/paddle/phi/core/distributed/auto_parallel/dist_tensor.h b/paddle/phi/core/distributed/auto_parallel/dist_tensor.h
index e3a738b2ba1..7af036a9268 100644
--- a/paddle/phi/core/distributed/auto_parallel/dist_tensor.h
+++ b/paddle/phi/core/distributed/auto_parallel/dist_tensor.h
@@ -44,7 +44,7 @@ class DistTensor final
   DistTensor(const DDim& dims, const TensorDistAttr& dist_attr);
 
   /// \brief Destroy the tensor object and release exclusive resources.
-  ~DistTensor() = default;
+  virtual ~DistTensor() = default;
 
   /// \brief Returns the name of the class for type traits.
   /// \return The name of the class.
diff --git a/paddle/phi/core/distributed/auto_parallel/reshard_all_gather_functor.cc b/paddle/phi/core/distributed/auto_parallel/reshard_all_gather_functor.cc
index c1e1421ab73..4cf18ca2181 100644
--- a/paddle/phi/core/distributed/auto_parallel/reshard_all_gather_functor.cc
+++ b/paddle/phi/core/distributed/auto_parallel/reshard_all_gather_functor.cc
@@ -17,7 +17,6 @@
 #include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/core/distributed/auto_parallel/reshard_utils.h"
 #include "paddle/phi/core/visit_type.h"
-#include "paddle/phi/infermeta/unary.h"
 #include "paddle/phi/kernels/all_gather_kernel.h"
 
 namespace phi {
diff --git a/paddle/phi/core/distributed/auto_parallel/reshard_utils.cc b/paddle/phi/core/distributed/auto_parallel/reshard_utils.cc
index 60b60ab9421..3d30ce5fe7a 100644
--- a/paddle/phi/core/distributed/auto_parallel/reshard_utils.cc
+++ b/paddle/phi/core/distributed/auto_parallel/reshard_utils.cc
@@ -15,13 +15,17 @@
 #include "paddle/phi/core/distributed/auto_parallel/reshard_utils.h"
 
 #include <cstdlib>
+
+// the <winsock2.h> needs to be included before <winsock.h>, otherwise
+// there will be symbol redefinition error on windows
+#include "paddle/phi/core/distributed/store/tcp_store.h"
+
 #include "glog/logging.h"
 #include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/core/device_context.h"
 #include "paddle/phi/core/distributed/auto_parallel/process_mesh.h"
 #include "paddle/phi/core/distributed/auto_parallel/utils.h"
 #include "paddle/phi/core/distributed/comm_context_manager.h"
-#include "paddle/phi/core/distributed/store/tcp_store.h"
 
 namespace phi {
 namespace distributed {
diff --git a/paddle/phi/core/distributed/auto_parallel/reshard_utils.h b/paddle/phi/core/distributed/auto_parallel/reshard_utils.h
index 4d9e5a2086d..f50d9496d39 100644
--- a/paddle/phi/core/distributed/auto_parallel/reshard_utils.h
+++ b/paddle/phi/core/distributed/auto_parallel/reshard_utils.h
@@ -20,13 +20,13 @@
 #include <string>
 #include <vector>
 
-#include "paddle/phi/core/distributed/store/tcp_store.h"
-
 namespace phi {
 class DeviceContext;
 
 namespace distributed {
 class CommContext;
+class TCPStore;
+
 class ProcessMesh;
 
 bool IsDimsMappingShard(const std::vector<int64_t>& dims_mapping);
diff --git a/paddle/phi/core/enforce.h b/paddle/phi/core/enforce.h
index ae60fa96943..1f2998c8cf3 100644
--- a/paddle/phi/core/enforce.h
+++ b/paddle/phi/core/enforce.h
@@ -59,7 +59,6 @@ limitations under the License. */
 #define GLOG_NO_ABBREVIATED_SEVERITIES  // msvc conflict logging with windows.h
 #include "paddle/phi/core/errors.h"
 
-#include "paddle/phi/backends/dynload/port.h"
 #include "paddle/utils/string/printf.h"
 #include "paddle/utils/string/to_string.h"
 
diff --git a/paddle/phi/core/meta_tensor.cc b/paddle/phi/core/meta_tensor.cc
index f5b55edee65..adfc412a989 100644
--- a/paddle/phi/core/meta_tensor.cc
+++ b/paddle/phi/core/meta_tensor.cc
@@ -18,14 +18,12 @@ limitations under the License. */
 
 #include "paddle/phi/core/dense_tensor.h"
 
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/selected_rows.h"
 #include "paddle/phi/core/string_tensor.h"
 #include "paddle/phi/core/string_tensor_utils.h"
 #include "paddle/phi/core/tensor_utils.h"
-#ifdef PADDLE_WITH_DISTRIBUTE
-#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
 
 namespace phi {
 
@@ -88,10 +86,8 @@ void MetaTensor::set_dims(const DDim& dims) {
   } else if (phi::SparseCsrTensor::classof(tensor_)) {
     DenseTensorUtils::GetMutableMeta(static_cast<SparseCsrTensor*>(tensor_))
         ->dims = dims;
-#ifdef PADDLE_WITH_DISTRIBUTE
   } else if (phi::distributed::DistTensor::classof(tensor_)) {
     static_cast<distributed::DistTensor*>(tensor_)->set_dims(dims);
-#endif
   } else {
     PADDLE_THROW(phi::errors::Unimplemented(
         "Unsupported setting dims for `%s`.", tensor_->type_info().name()));
@@ -123,10 +119,8 @@ void MetaTensor::set_dtype(DataType dtype) {
   } else if (phi::SparseCsrTensor::classof(tensor_)) {
     DenseTensorUtils::GetMutableMeta(static_cast<SparseCsrTensor*>(tensor_))
         ->dtype = dtype;
-#ifdef PADDLE_WITH_DISTRIBUTE
   } else if (phi::distributed::DistTensor::classof(tensor_)) {
     // skip, DistTensor no need to set dtype
-#endif
   } else {
     PADDLE_THROW(phi::errors::Unimplemented(
         "Unsupported settting dtype for `%s`.", tensor_->type_info().name()));
@@ -157,10 +151,8 @@ void MetaTensor::set_layout(DataLayout layout) {
   } else if (phi::SparseCsrTensor::classof(tensor_)) {
     DenseTensorUtils::GetMutableMeta(static_cast<SparseCsrTensor*>(tensor_))
         ->layout = layout;
-#ifdef PADDLE_WITH_DISTRIBUTE
   } else if (phi::distributed::DistTensor::classof(tensor_)) {
     // skip, DistTensor no need to set dtype
-#endif
   } else {
     PADDLE_THROW(phi::errors::Unimplemented(
         "Unsupported settting layout for `%s`.", tensor_->type_info().name()));
@@ -171,11 +163,8 @@ void MetaTensor::share_lod(const MetaTensor& meta_tensor) {
   ValidCheck(*this);
   ValidCheck(meta_tensor);
   if (phi::SparseCooTensor::classof(tensor_) ||
-      phi::SparseCsrTensor::classof(tensor_)
-#ifdef PADDLE_WITH_DISTRIBUTE
-      || phi::distributed::DistTensor::classof(tensor_)
-#endif
-  ) {
+      phi::SparseCsrTensor::classof(tensor_) ||
+      phi::distributed::DistTensor::classof(tensor_)) {
     return;
   }
   if (meta_tensor.lod().empty()) {
@@ -201,11 +190,8 @@ void MetaTensor::share_meta(const MetaTensor& meta_tensor) {
   if (phi::DenseTensor::classof(tensor_) ||
       phi::SelectedRows::classof(tensor_) ||
       phi::SparseCooTensor::classof(tensor_) ||
-      phi::SparseCsrTensor::classof(tensor_)
-#ifdef PADDLE_WITH_DISTRIBUTE
-      || phi::distributed::DistTensor::classof(tensor_)
-#endif
-  ) {
+      phi::SparseCsrTensor::classof(tensor_) ||
+      phi::distributed::DistTensor::classof(tensor_)) {
     share_dims(meta_tensor);
     set_dtype(meta_tensor.dtype());
     set_layout(meta_tensor.layout());
@@ -230,10 +216,7 @@ void MetaTensor::share_dims(const MetaTensor& meta_tensor) {
   bool is_selected_rows = phi::SelectedRows::classof(tensor_);
   bool is_sparse_coo = phi::SparseCooTensor::classof(tensor_);
   bool is_sparse_csr = phi::SparseCsrTensor::classof(tensor_);
-  bool is_dist_tensor = false;
-#ifdef PADDLE_WITH_DISTRIBUTE
-  is_dist_tensor = phi::distributed::DistTensor::classof(tensor_);
-#endif
+  bool is_dist_tensor = phi::distributed::DistTensor::classof(tensor_);
   if (is_dense_tensor || is_selected_rows || is_sparse_coo || is_sparse_csr ||
       is_dist_tensor) {
     if (is_selected_rows) {
diff --git a/paddle/phi/core/tensor_utils.h b/paddle/phi/core/tensor_utils.h
index 4d9b50d34f8..42efe249cfb 100644
--- a/paddle/phi/core/tensor_utils.h
+++ b/paddle/phi/core/tensor_utils.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/device_context.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/selected_rows.h"
 #include "paddle/phi/core/sparse_coo_tensor.h"
 #include "paddle/phi/core/sparse_csr_tensor.h"
diff --git a/paddle/phi/core/utils/type_info.cc b/paddle/phi/core/utils/type_info.cc
index 82247c6864d..38e17b57f63 100644
--- a/paddle/phi/core/utils/type_info.cc
+++ b/paddle/phi/core/utils/type_info.cc
@@ -18,15 +18,13 @@ limitations under the License. */
 #include "paddle/phi/backends/custom/custom_context.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
 #include "paddle/phi/core/selected_rows.h"
 #include "paddle/phi/core/sparse_coo_tensor.h"
 #include "paddle/phi/core/sparse_csr_tensor.h"
 #include "paddle/phi/core/storage_properties.h"
 #include "paddle/phi/core/string_tensor.h"
 #include "paddle/phi/core/tensor_array.h"
-#ifdef PADDLE_WITH_DISTRIBUTE
-#include "paddle/phi/core/distributed/auto_parallel/dist_tensor.h"
-#endif
 #include "paddle/phi/core/utils/type_info.h"
 
 namespace phi {
@@ -51,14 +49,11 @@ template class TypeInfoTraits<phi::TensorBase, SparseCooTensor>;
 template class TypeInfoTraits<phi::TensorBase, SparseCsrTensor>;
 template class TypeInfoTraits<phi::TensorBase, StringTensor>;
 template class TypeInfoTraits<phi::TensorBase, TensorArray>;
+template class TypeInfoTraits<phi::TensorBase, phi::distributed::DistTensor>;
 
 template class TypeInfoTraits<phi::DeviceContext, CPUContext>;
 template class TypeInfoTraits<phi::DeviceContext, CustomContext>;
 
-#ifdef PADDLE_WITH_DISTRIBUTE
-template class TypeInfoTraits<phi::TensorBase, phi::distributed::DistTensor>;
-#endif
-
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \
     defined(PADDLE_WITH_XPU_KP)
 template class TypeInfoTraits<phi::DeviceContext, GPUContext>;
diff --git a/test/cpp/phi/kernels/test_cpu_vec.cc b/test/cpp/phi/kernels/test_cpu_vec.cc
index f9d07f1ca29..9a2b2994463 100644
--- a/test/cpp/phi/kernels/test_cpu_vec.cc
+++ b/test/cpp/phi/kernels/test_cpu_vec.cc
@@ -18,6 +18,7 @@ limitations under the License. */
 
 #include "glog/logging.h"
 #include "gtest/gtest.h"
+#include "paddle/phi/backends/dynload/port.h"
 #include "paddle/phi/kernels/funcs/cpu_vec.h"
 
 namespace phi {
-- 
GitLab