[Cherry-pick] Organize the API of custom operators (#41882)

* [Phi&CustomOp] Remove deprecated enum PlaceType for custom op & add warning (#41647) * remove old custom op placetype * replace dist placetype using * add with gpu macro * fix mutable_data error * fix set value error * add comment * remove all is initialized using (#41766) * remove inner_place using (#41768) * polish tensor depreacted method warning (#41807) * [CustomOp] Fix PlaceType related compat error (#41826) * fix place type related compat error * fix test failed * remove dll decl * revert place type change * add dll decl * resolve conflict

[Cherry-pick] Organize the API of custom operators (#41882)
* [Phi&CustomOp] Remove deprecated enum PlaceType for custom op & add warning (#41647) * remove old custom op placetype * replace dist placetype using * add with gpu macro * fix mutable_data error * fix set value error * add comment * remove all is initialized using (#41766) * remove inner_place using (#41768) * polish tensor depreacted method warning (#41807) * [CustomOp] Fix PlaceType related compat error (#41826) * fix place type related compat error * fix test failed * remove dll decl * revert place type change * add dll decl * resolve conflict
897911fc · Chen Weihang · GitHub · b7244512 · 897911fc · 897911fc
35 changed file
--- a/paddle/fluid/distributed/collective/Common.cc
+++ b/paddle/fluid/distributed/collective/Common.cc
@@ -21,7 +21,7 @@ std::vector<Place> GetPlaceList(const std::vector<Tensor>& tensors) {
  std::vector<Place> places;
  places.reserve(tensors.size());
  for (auto& tensor : tensors) {
-    places.push_back(tensor.inner_place());
+    places.push_back(tensor.place());
  }
  return places;
 }
@@ -41,13 +41,14 @@ std::string GetKeyFromPlaces(const std::vector<Place>& places) {
 }

 static bool CheckTensorsInPlace(const std::vector<Tensor>& tensors,
-                                const PlaceType type) {
-  return std::all_of(tensors.cbegin(), tensors.cend(),
-                     [&](const Tensor& t) { return t.place() == type; });
+                                phi::AllocationType type) {
+  return std::all_of(tensors.cbegin(), tensors.cend(), [&](const Tensor& t) {
+    return t.place().GetType() == type;
+  });
 }

 bool CheckTensorsInCudaPlace(const std::vector<Tensor>& tensors) {
-  return CheckTensorsInPlace(tensors, PlaceType::kGPU);
+  return CheckTensorsInPlace(tensors, phi::AllocationType::GPU);
 }

 }  //  namespace distributed

--- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
+++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc
@@ -404,7 +404,7 @@ void CheckTensorsInDifferentDevices(const std::vector<Tensor>& tensors,
                      platform::errors::InvalidArgument(
                          "Tensors must be CUDA and dense tensor."));

-    const auto inserted = used_devices.insert(t.inner_place()).second;
+    const auto inserted = used_devices.insert(t.place()).second;
    PADDLE_ENFORCE_EQ(inserted, true,
                      platform::errors::InvalidArgument(
                          "Tensors must be on distinct GPU devices."));

--- a/paddle/fluid/distributed/collective/reducer.cc
+++ b/paddle/fluid/distributed/collective/reducer.cc
@@ -398,7 +398,7 @@ void EagerReducer::InitializeDenseGroups(
                          "GRAD is SelectedRows",
                          tensor_name));

-    PADDLE_ENFORCE_EQ(tensor.is_initialized(), true,
+    PADDLE_ENFORCE_EQ(tensor.initialized(), true,
                      platform::errors::PreconditionNotMet(
                          "Tensor %s is not initialized.", tensor_name));
    const auto size = tensor.numel();
@@ -414,20 +414,13 @@ void EagerReducer::InitializeDenseGroups(
    p_group->dense_tensors_.push_back(phi::DenseTensor());

    const auto &dtype = tensor.dtype();
-    const auto &place = tensor.place();
    const auto &inner_place = tensor.impl()->place();
    if (index > 0) {
      PADDLE_ENFORCE_EQ(dtype, p_group->dtype_,
                        platform::errors::PreconditionNotMet(
                            "Tensor %s has unexpected dtype.", tensor_name));
-      PADDLE_ENFORCE_EQ(place, place_,
-                        platform::errors::PreconditionNotMet(
-                            "Tensor %s has different place. Expected place is "
-                            "%s, but actual place is %s",
-                            tensor_name, inner_place_, inner_place));
    } else {
      p_group->dtype_ = dtype;
-      place_ = place;
      inner_place_ = inner_place;
    }
  }
@@ -717,7 +710,7 @@ void EagerReducer::MarkGroupReady(size_t group_index) {

 bool EagerReducer::HasGrad(size_t var_index) {
  auto grad = egr::EagerUtils::mutable_grad(tensors_[var_index]);
-  if (grad && grad->is_initialized()) {
+  if (grad && grad->initialized()) {
    return true;
  } else {
    return false;

--- a/paddle/fluid/distributed/collective/reducer.h
+++ b/paddle/fluid/distributed/collective/reducer.h
@@ -26,7 +26,6 @@
 #include "paddle/fluid/platform/device/gpu/gpu_info.h"
 #include "paddle/phi/api/include/api.h"
 #include "paddle/phi/api/include/tensor.h"
-#include "paddle/phi/api/lib/ext_compat_utils.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/utils/string/string_helper.h"
@@ -121,7 +120,6 @@ class EagerReducer {

  std::vector<EagerGroup> groups_;
  std::vector<TensorLocator> variable_locators_;
-  PlaceType place_;
  platform::Place inner_place_;
  size_t next_group_ = 0;
  int64_t nranks_ = -1;

--- a/paddle/fluid/eager/amp_auto_cast.h
+++ b/paddle/fluid/eager/amp_auto_cast.h
@@ -21,7 +21,7 @@ namespace egr {

 static inline bool NeedCast(const paddle::experimental::Tensor& tensor,
                            const paddle::experimental::DataType& dst_dtype) {
-  auto place = tensor.inner_place();
+  auto place = tensor.place();
  auto data_type = tensor.dtype();
  if (paddle::platform::is_gpu_place(place) ||
      paddle::platform::is_cuda_pinned_place(place) ||

--- a/paddle/fluid/eager/eager_amp_auto_cast.h
+++ b/paddle/fluid/eager/eager_amp_auto_cast.h
@@ -20,7 +20,7 @@ namespace egr {

 static inline bool NeedCast(const paddle::experimental::Tensor& tensor,
                            const paddle::experimental::DataType& dst_dtype) {
-  auto place = tensor.inner_place();
+  auto place = tensor.place();
  auto data_type = tensor.dtype();
  if (paddle::platform::is_gpu_place(place) ||
      paddle::platform::is_cuda_pinned_place(place) ||

--- a/paddle/fluid/eager/grad_node_info.cc
+++ b/paddle/fluid/eager/grad_node_info.cc
@@ -119,7 +119,7 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
  auto& meta = metas[0];
  meta.SetStopGradient(fwd_out_meta->StopGradient());

-  if (!fwd_out.is_initialized()) {
+  if (!fwd_out.initialized()) {
    VLOG(6)
        << "Skip Configuring GradSlotMeta for uninitialized GradInput Tensor";
    return;
@@ -145,7 +145,7 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
          "which is illegal."));

  meta.SetTensorMeta(dense_tensor->meta());
-  meta.SetPlace(fwd_out.inner_place());
+  meta.SetPlace(fwd_out.place());

  if (paddle::framework::IsComplexType(
          paddle::framework::TransToProtoVarType(dense_tensor->type()))) {
@@ -186,7 +186,7 @@ void GradNodeBase::SetGradInMeta(
      meta.SetStopGradient(fwd_out_meta->StopGradient());
    }

-    if (!fwd_out_tensor.is_initialized()) {
+    if (!fwd_out_tensor.initialized()) {
      VLOG(6)
          << "Skip Configuring GradSlotMeta for uninitialized GradInput Tensor";
      return;
@@ -204,7 +204,7 @@ void GradNodeBase::SetGradInMeta(
                                          "with phi::DataType::UNDEFINED,"
                                          "which is illegal."));
      meta.SetTensorMeta(dense_tensor->meta());
-      meta.SetPlace(fwd_out_tensor.inner_place());
+      meta.SetPlace(fwd_out_tensor.place());

      if (paddle::framework::IsComplexType(
              paddle::framework::TransToProtoVarType(dense_tensor->type()))) {
@@ -250,7 +250,7 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
                                          "with phi::DataType::UNDEFINED,"
                                          "which is illegal."));
      meta.SetTensorMeta(dense_tensor->meta());
-      meta.SetPlace(fwd_in.inner_place());
+      meta.SetPlace(fwd_in.place());
    }
  } else {
    VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
@@ -295,7 +295,7 @@ void GradNodeBase::SetGradOutMeta(
                              "phi::DataType::UNDEFINED,"
                              "which is illegal."));
        meta.SetTensorMeta(dense_tensor->meta());
-        meta.SetPlace(fwd_in_tensor.inner_place());
+        meta.SetPlace(fwd_in_tensor.place());
      }
    } else {
      VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta "

--- a/paddle/fluid/eager/grad_node_info.h
+++ b/paddle/fluid/eager/grad_node_info.h
@@ -317,11 +317,11 @@ inline void CheckTensor(const paddle::experimental::Tensor& pre,
            paddle::framework::DataType2String(pre.dtype()),
            paddle::framework::DataType2String(post.dtype())));
    PADDLE_ENFORCE_EQ(
-        pre.inner_place(), post.inner_place(),
+        pre.place(), post.place(),
        paddle::platform::errors::PermissionDenied(
            "The place of tensor before(%s) and after(%s) "
            "hook are not consistent",
-            pre.inner_place().DebugString(), post.inner_place().DebugString()));
+            pre.place().DebugString(), post.place().DebugString()));
  }
 }


--- a/paddle/fluid/eager/grad_tensor_holder.cc
+++ b/paddle/fluid/eager/grad_tensor_holder.cc
@@ -53,7 +53,7 @@ void GradTensorHolder::CopyValueFromTensor(
    paddle::experimental::Tensor& buffer_tensor = buffer_[slot_id][rank];
    if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) {
      // Perform deep copy here
-      buffer_tensor.copy_(t, t.inner_place(), false);
+      buffer_tensor.copy_(t, t.place(), false);
      buffer_tensor.set_autograd_meta(t.mutable_autograd_meta());

    } else {
@@ -66,7 +66,7 @@ void GradTensorHolder::CopyValueFromTensor(
    if (t.defined()) {
      // Fill 1.0, use full to support complex, one_like don't support it.
      buffer_[slot_id][rank] =
-          paddle::experimental::full(t.shape(), 1, t.dtype(), t.inner_place());
+          paddle::experimental::full(t.shape(), 1, t.dtype(), t.place());
    }
  }
 }

--- a/paddle/fluid/eager/pylayer/py_layer_node.h
+++ b/paddle/fluid/eager/pylayer/py_layer_node.h
@@ -62,7 +62,7 @@ class GradNodePyLayer : public GradNodeBase {
        } else {
          forward_outputs_meta_[i].emplace_back();
        }
-        forward_outputs_place_[i].emplace_back(tensor->inner_place());
+        forward_outputs_place_[i].emplace_back(tensor->place());
      }
    }
  }

--- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
+++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc
@@ -96,7 +96,7 @@ TEST(Tensor, MemberFunction) {
  CHECK_EQ(et3.dims(), expected_dim);
  CHECK_EQ(et3.type(), paddle::experimental::DataType::FLOAT32);
  CHECK_EQ(et3.layout(), paddle::experimental::DataLayout::NCHW);
-  CHECK(paddle::platform::is_cpu_place(et3.inner_place()));
+  CHECK(paddle::platform::is_cpu_place(et3.place()));
  VLOG(6) << "Get impl";
  auto* dt3_ptr =
      std::dynamic_pointer_cast<phi::DenseTensor>(et3.impl())->data<float>();

--- a/paddle/fluid/eager/to_static/run_program_op_node.h
+++ b/paddle/fluid/eager/to_static/run_program_op_node.h
@@ -114,7 +114,7 @@ static void ShareTensorsIntoScope(const std::vector<Tensor> &tensors,
                                  paddle::framework::Scope *scope) {
  for (size_t i = 0; i < tensors.size(); ++i) {
    auto name = tensors[i].name();
-    if (name == "Fake_var" || !tensors[i].is_initialized()) {
+    if (name == "Fake_var" || !tensors[i].initialized()) {
      continue;
    }
    auto *var = scope->Var(name);

--- a/paddle/fluid/eager/utils.cc
+++ b/paddle/fluid/eager/utils.cc
@@ -447,7 +447,7 @@ void EagerUtils::FillZeroForEmptyGradInputs(
  for (size_t i = 0; i < in_grads->size(); i++) {
    for (size_t j = 0; j < (*in_grads)[i].size(); j++) {
      paddle::experimental::Tensor& grad = (*in_grads)[i][j];
-      if (!grad.is_initialized()) {
+      if (!grad.initialized()) {
        const GradSlotMeta& grad_in_meta = grad_in_metas[i][j];
        PADDLE_ENFORCE(
            grad_in_meta.HasTensorMeta(),

--- a/paddle/fluid/framework/custom_operator.cc
+++ b/paddle/fluid/framework/custom_operator.cc
@@ -36,7 +36,6 @@ limitations under the License. */
 #include "paddle/fluid/platform/dynload/dynamic_loader.h"
 #include "paddle/fluid/string/string_helper.h"
 #include "paddle/phi/api/all.h"
-#include "paddle/phi/api/lib/ext_compat_utils.h"
 #include "paddle/phi/api/lib/utils/tensor_utils.h"
 #include "paddle/phi/core/compat/convert_utils.h"
 #include "paddle/utils/any.h"
@@ -627,8 +626,8 @@ class CustomGradOpMaker<imperative::OpBase>
 static void RegisterOperatorKernelWithPlace(
    const std::string& name,
    const OperatorWithKernel::OpKernelFunc& op_kernel_func,
-    const proto::VarType::Type type, const PlaceType& place) {
-  OpKernelType key(type, experimental::ConvertExtPlaceToInnerPlace(place));
+    const proto::VarType::Type type, const platform::Place& place) {
+  OpKernelType key(type, place);
  VLOG(3) << "Custom Operator: op kernel key: " << key;
  OperatorWithKernel::AllOpKernels()[name][key] = op_kernel_func;
 }
@@ -666,10 +665,10 @@ static void RegisterOperatorKernel(const std::string& name,
    op_kernel_func = func;
  }
  RegisterOperatorKernelWithPlace(name, op_kernel_func, proto::VarType::RAW,
-                                  PlaceType::kCPU);
+                                  platform::CPUPlace());
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
  RegisterOperatorKernelWithPlace(name, op_kernel_func, proto::VarType::RAW,
-                                  PlaceType::kGPU);
+                                  platform::CUDAPlace());
 #endif
 }


--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -137,7 +137,7 @@ void InitTensorWithTensor(TensorObject* self,
                          const paddle::platform::Place& place,
                          const std::string& name) {
  self->tensor.set_name(name);
-  if (place == src.inner_place()) {
+  if (place == src.place()) {
    auto impl = std::static_pointer_cast<phi::DenseTensor>(src.impl());
    self->tensor.set_impl(impl);
    VLOG(4) << "Same place, do ShareDataWith";

--- a/paddle/fluid/pybind/eager_functions.cc
+++ b/paddle/fluid/pybind/eager_functions.cc
@@ -554,32 +554,32 @@ static PyObject* eager_api_async_read(PyObject* self, PyObject* args,
      src.is_gpu_pinned(), true,
      platform::errors::InvalidArgument("Required `src` device should be "
                                        "CUDAPinnedPlace, but received %d.",
-                                        src.inner_place()));
+                                        src.place()));
  PADDLE_ENFORCE_EQ(
      dst.is_gpu(), true,
      platform::errors::InvalidArgument(
          "Required `dst` device should be CUDAPlace, but received %d.",
-          dst.inner_place()));
+          dst.place()));
  PADDLE_ENFORCE_EQ(
      index.is_cpu(), true,
      platform::errors::InvalidArgument(
          "Required `index` device should be CPUPlace, but received %d.",
-          index.inner_place()));
+          index.place()));
  PADDLE_ENFORCE_EQ(buffer.is_gpu_pinned(), true,
                    platform::errors::InvalidArgument(
                        "Required `buffer` device should be CUDAPinnedPlace, "
                        "but received %d.",
-                        buffer.inner_place()));
+                        buffer.place()));
  PADDLE_ENFORCE_EQ(
      offset.is_cpu(), true,
      platform::errors::InvalidArgument(
          "Required `offset` device should be CPUPlace, but received %d.",
-          offset.inner_place()));
+          offset.place()));
  PADDLE_ENFORCE_EQ(
      count.is_cpu(), true,
      platform::errors::InvalidArgument(
          "Required `count` device should be CPUPlace, but received %d.",
-          count.inner_place()));
+          count.place()));

  auto& src_tensor = src;
  auto* dst_tensor = &dst;
@@ -701,22 +701,22 @@ static PyObject* eager_api_async_write(PyObject* self, PyObject* args,
      src.is_gpu(), true,
      platform::errors::InvalidArgument(
          "Required `src` device should be CUDAPlace, but received %d. ",
-          src.inner_place()));
+          src.place()));
  PADDLE_ENFORCE_EQ(dst.is_gpu_pinned(), true,
                    platform::errors::InvalidArgument(
                        "Required `dst` device should be CUDAPinnedPlace, "
                        "but received %d. ",
-                        dst.inner_place()));
+                        dst.place()));
  PADDLE_ENFORCE_EQ(
      offset.is_cpu(), true,
      platform::errors::InvalidArgument("Required `offset` device should "
                                        "be CPUPlace, but received %d. ",
-                                        offset.inner_place()));
+                                        offset.place()));
  PADDLE_ENFORCE_EQ(
      count.is_cpu(), true,
      platform::errors::InvalidArgument(
          "Required `count` device should be CPUPlace, but received %d. ",
-          count.inner_place()));
+          count.place()));

  // TODO(daisiming): In future, add index as arguments following
  // async_read.

--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -342,11 +342,11 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args,
        ->SetPersistable(
            egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable());
    if (src_tensor.initialized()) {
-      self->tensor.copy_(src_tensor, src_tensor.inner_place(), blocking);
+      self->tensor.copy_(src_tensor, src_tensor.place(), blocking);
    }
  } else {
    if (src_tensor.initialized()) {
-      self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking);
+      self->tensor.copy_(src_tensor, self->tensor.place(), blocking);
    }
  }

@@ -617,7 +617,7 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
  // if index is a list, list_select_flag will be true
  bool list_select_flag = false;
  PADDLE_ENFORCE_EQ(
-      self->tensor.is_initialized(), true,
+      self->tensor.initialized(), true,
      platform::errors::InvalidArgument(
          "tensor %s has not been initialized, we can only slice initialized "
          "tensor please init it first with numpy or other tensor.",
@@ -921,7 +921,7 @@ static PyObject* tensor_method__setitem_eager_tensor(TensorObject* self,
            "please check the type of tensor."));
      }

-      if (value_tensor_tmp.place() == paddle::PlaceType::kUNK) {
+      if (!value_tensor_tmp.initialized()) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
        SetTensorFromPyArray(
            static_cast<phi::DenseTensor*>(value_tensor_tmp.impl().get()),
@@ -934,7 +934,7 @@ static PyObject* tensor_method__setitem_eager_tensor(TensorObject* self,
      } else {
        SetTensorFromPyArray(
            static_cast<phi::DenseTensor*>(value_tensor_tmp.impl().get()),
-            value, value_tensor_tmp.inner_place(), false);
+            value, value_tensor_tmp.place(), false);
      }

      value_tensor = value_tensor_tmp;
@@ -1009,7 +1009,7 @@ static PyObject* tensor_method__setitem_eager_tensor(TensorObject* self,
      VLOG(4) << "index is not tensor";
      self_numpy[_index] = py::object(py::handle(value_obj), true);
    }
-    if (self->tensor.place() == paddle::PlaceType::kUNK) {
+    if (!self->tensor.initialized()) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
      SetTensorFromPyArray(self_tensor, self_numpy,
                           platform::Place(platform::CUDAPlace(0)), false);
@@ -1018,7 +1018,7 @@ static PyObject* tensor_method__setitem_eager_tensor(TensorObject* self,
                           platform::Place(platform::CPUPlace()), false);
 #endif
    } else {
-      SetTensorFromPyArray(self_tensor, self_numpy, self->tensor.inner_place(),
+      SetTensorFromPyArray(self_tensor, self_numpy, self->tensor.place(),
                           false);
    }
  }
@@ -1146,7 +1146,7 @@ static PyObject* tensor__copy_gradient_from(TensorObject* self, PyObject* args,
                                            PyObject* kwargs) {
  EAGER_TRY
  auto src = CastPyArg2Tensor(PyTuple_GET_ITEM(args, 0), 0);
-  if (self->tensor.is_initialized()) {
+  if (self->tensor.initialized()) {
    PADDLE_ENFORCE_EQ(self->tensor.dtype(), src.dtype(),
                      platform::errors::PreconditionNotMet(
                          "Tensor %s has different data type with Tensor %s",
@@ -1367,7 +1367,7 @@ static PyObject* tensor_method__share_memory(TensorObject* self, PyObject* args,
                                             PyObject* kwargs) {
  EAGER_TRY
 #ifndef _WIN32
-  PADDLE_ENFORCE_EQ(platform::is_cpu_place(self->tensor.inner_place()), true,
+  PADDLE_ENFORCE_EQ(platform::is_cpu_place(self->tensor.place()), true,
                    platform::errors::InvalidArgument(
                        "Sharing memory only support CPU Tensor currently"));
  // 1. get LoDTensor
@@ -1419,7 +1419,7 @@ static PyObject* tensor_method__uva(TensorObject* self, PyObject* args,
                    platform::errors::InvalidArgument(
                        "Unified virtual addressing only support "
                        "DenseTensor currently."));
-  PADDLE_ENFORCE_EQ(platform::is_cpu_place(self->tensor.inner_place()), true,
+  PADDLE_ENFORCE_EQ(platform::is_cpu_place(self->tensor.place()), true,
                    platform::errors::InvalidArgument(
                        "Unified virtual addressing only support "
                        "CPU Tensor currently."));

--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -108,7 +108,7 @@ int tensor_properties_set_grad(TensorObject* self, PyObject* value,
                     "Detected NULL grad"
                     "Please check if you have manually cleared"
                     "the grad inside autograd_meta"));
-  grad->copy_(src, self->tensor.inner_place(), true);
+  grad->copy_(src, self->tensor.place(), true);
  return 0;
  EAGER_CATCH_AND_THROW_RETURN_NEG
 }
@@ -160,14 +160,14 @@ PyObject* tensor_properties_get_shape(TensorObject* self, void* closure) {

 PyObject* tensor_properties_get_place(TensorObject* self, void* closure) {
  EAGER_TRY
-  return ToPyObject(self->tensor.inner_place());
+  return ToPyObject(self->tensor.place());
  EAGER_CATCH_AND_THROW_RETURN_NULL
 }

 PyObject* tensor_properties_get_place_str(TensorObject* self, void* closure) {
  EAGER_TRY
  std::stringstream ostr;
-  ostr << self->tensor.inner_place();
+  ostr << self->tensor.place();
  return ToPyObject(ostr.str());
  EAGER_CATCH_AND_THROW_RETURN_NULL
 }

--- a/paddle/phi/api/all.h
+++ b/paddle/phi/api/all.h
@@ -41,5 +41,4 @@ limitations under the License. */
 #include "paddle/phi/api/ext/dispatch.h"
 #include "paddle/phi/api/ext/exception.h"
 #include "paddle/phi/api/ext/op_meta_info.h"
-#include "paddle/phi/api/ext/place.h"
 #include "paddle/phi/api/ext/tensor_compat.h"
--- a/paddle/phi/api/ext/place.h
+++ b/paddle/phi/api/ext/place.h
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-namespace paddle {
-
-// TODO(yangjiabin): Add other place support in next PR
-enum class PlaceType { kUNK = -1, kCPU, kGPU };
-
-}  // namespace paddle
--- a/paddle/phi/api/ext/tensor_compat.h
+++ b/paddle/phi/api/ext/tensor_compat.h
@@ -14,6 +14,7 @@ limitations under the License. */

 #pragma once

+#include "paddle/phi/api/include/api.h"
 #include "paddle/phi/api/include/tensor.h"

 // Note(chenweihang): In order to be compatible with the original custom
@@ -21,5 +22,8 @@ limitations under the License. */
 // cannot be includeed in paddle

 namespace paddle {
-using Tensor = paddle::experimental::Tensor;
+using Tensor = experimental::Tensor;
+// using several Tensor initialize functions in paddle namespace
+using experimental::empty;
+using experimental::full;
 }  // namespace paddle
--- a/paddle/phi/api/include/tensor.h
+++ b/paddle/phi/api/include/tensor.h
@@ -29,7 +29,6 @@ using gpuStream_t = cudaStream_t;
 using gpuStream_t = hipStream_t;
 #endif

-#include "paddle/phi/api/ext/place.h"
 #include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/layout.h"
@@ -109,21 +108,23 @@ class PADDLE_API Tensor final {

  /**
   * @brief Construct a new Tensor object on the target place.
-   * This is a deprecated method and may be removed in the future!
+   *
+   * This is a deprecated method and may be removed in the future!!!
   *
   * @param place
   */
-  explicit Tensor(const PlaceType& place);
+  explicit Tensor(const Place& place);

  /**
   * @brief Construct a new Tensor object on the target place
   * with specified shape.
-   * This is a deprecated method and may be removed in the future!
+   *
+   * This is a deprecated method and may be removed in the future!!!
   *
   * @param place
   * @param shape
   */
-  Tensor(const PlaceType& place, const std::vector<int64_t>& shape);
+  Tensor(const Place& place, const std::vector<int64_t>& shape);

  /**
   * @brief Construct a new Tensor object by a TensorBase pointer and name
@@ -135,8 +136,9 @@ class PADDLE_API Tensor final {
  /**
   * @brief Construct a new Tensor object with name
   *
-   * @note Used to adapt original execution mechanism and debug analysis
-   * in the development of new dygraph. It may be removed in the future.
+   * @note Internal method, used to adapt original execution mechanism and
+   * debug analysis in the development of new dygraph. It may be removed in
+   * the future.
   * */
  explicit Tensor(const std::string& name) : name_(name) {}

@@ -151,6 +153,7 @@ class PADDLE_API Tensor final {

  /**
   * @brief Get the size of current tensor.
+   *
   * The compatible method of `Tensor::numel()`.
   * This is a deprecated method and may be removed in the future!
   *
@@ -167,6 +170,7 @@ class PADDLE_API Tensor final {

  /**
   * @brief Return the shape (dimensions) of Tensor.
+   *
   * The compatible method of `Tensor::dims()`.
   * This is a deprecated method and may be removed in the future!
   *
@@ -178,7 +182,7 @@ class PADDLE_API Tensor final {
   * @brief Reset the shape of the tensor.
   * @note: This method means Reset the shape of the tensor,
   * and must be called before calling mutable_data() or
-   * copy_to(const PlaceType& place), this is not a standard definition of
+   * copy_to(const Place& place), this is not a standard definition of
   * reshape behavior, so we will deprecated this feature in the future.
   *
   * @param shape
@@ -194,6 +198,7 @@ class PADDLE_API Tensor final {

  /**
   * @brief Return the data type of Tensor.
+   *
   * The compatible method of `Tensor::dtype()`.
   * This is a deprecated method and may be removed in the future!
   *
@@ -244,20 +249,10 @@ class PADDLE_API Tensor final {

  /**
   * @brief Return the place (device) of Tensor.
-   * This is a deprecated method and may be removed in the future!
   *
-   * @return PlaceType
+   * @return Place
   */
-  PlaceType place() const;
-
-  /**
-   * @brief Return the place (device) of Tensor.
-   * Because the `place` method already exists, so we need to use a new name,
-   * here we temporarily use `inner_place`.
-   *
-   * @return paddle::platform::Place
-   */
-  phi::Place inner_place() const;
+  Place place() const;

  /**
   * @brief Determine whether the tensor device is CPU
@@ -287,7 +282,7 @@ class PADDLE_API Tensor final {

  /**
   * @brief Get the memory pointer in CPU or GPU with specific data type.
-   * It's usually used to get the output data pointer.
+   * It's usually used to get the output data pointer, same as the T* data().
   *
   * @tparam T
   * @return T*
@@ -297,6 +292,7 @@ class PADDLE_API Tensor final {

  /**
   * @brief Get the memory pointer in CPU or GPU with specific data type.
+   *
   * It's usually used to get the output data pointer.
   * This is a deprecated method and may be removed in the future!
   *
@@ -305,7 +301,7 @@ class PADDLE_API Tensor final {
   * @return T*
   */
  template <typename T>
-  T* mutable_data(const PlaceType& place);
+  T* mutable_data(const Place& place);

  /**
   * @brief Get the const memory pointer directly.
@@ -319,8 +315,7 @@ class PADDLE_API Tensor final {

  /**
   * @brief Get the memory pointer directly.
-   * It's usually used to get the output data pointer.
-   * This is a deprecated method and may be removed in the future!
+   * It's usually used to get the mutable output data pointer.
   *
   * @tparam T
   * @return T*
@@ -409,7 +404,7 @@ class PADDLE_API Tensor final {
   * @return Tensor
   */
  template <typename T>
-  Tensor copy_to(const PlaceType& target_place) const;
+  Tensor copy_to(const Place& target_place) const;

  /**
   * @brief Transfer the current Tensor to the specified device and return.
@@ -427,7 +422,8 @@ class PADDLE_API Tensor final {
   * @param blocking, Should we copy this in sync way.
   * @return void
   */
-  void copy_(const Tensor& src, const phi::Place& target_place, bool blocking);
+  void copy_(const Tensor& src, const Place& target_place, bool blocking);
+
  /**
   * @brief Cast datatype from one to another
   *
@@ -489,11 +485,17 @@ class PADDLE_API Tensor final {
  /* Part 8: Autograd methods */

  /**
-   * @brief Get the autograd meta object
+   * @brief Get the autograd meta object pointer
   *
   * @return AbstractAutogradMeta*
   */
  AbstractAutogradMeta* get_autograd_meta() const;
+
+  /**
+   * @brief Get the shared pointer of autograd meta object
+   *
+   * @return std::shared_ptr<AbstractAutogradMeta>&
+   */
  const std::shared_ptr<AbstractAutogradMeta>& mutable_autograd_meta() const;

  /**
@@ -524,7 +526,7 @@ class PADDLE_API Tensor final {

  /* Part 10: Auto generated Tensor methods */

-  /* Part 11: Methods of converting SparseTensor and DenseTensor to each other
+  /* Part 11: Methods of converting underlying TensorType to each other
   */
  /**
   * @brief Convert DenseTensor or SparseCsrTensor to SparseCooTensor
@@ -587,12 +589,6 @@ class PADDLE_API Tensor final {
   * in the development of new dygraph. It may be removed in the future.
   */
  std::string name_{""};
-
-  /**
-   * Place type: Return the expected memory location if the Tensor is
-   * uninitialized.
-   */
-  PlaceType place_{PlaceType::kUNK};
 };

 }  // namespace experimental

--- a/paddle/phi/api/lib/CMakeLists.txt
+++ b/paddle/phi/api/lib/CMakeLists.txt
 add_subdirectory(utils)

-cc_library(ext_compat_utils SRCS ext_compat_utils.cc DEPS place)
-
 if (WITH_GPU)
-  nv_library(phi_tensor_raw SRCS tensor.cc DEPS tensor_base dense_tensor phi_api_utils ext_compat_utils phi_enforce)
+  nv_library(phi_tensor_raw SRCS tensor.cc DEPS tensor_base dense_tensor phi_api_utils phi_enforce)
 elseif (WITH_ROCM)
-  hip_library(phi_tensor_raw SRCS tensor.cc DEPS tensor_base dense_tensor phi_api_utils ext_compat_utils phi_enforce)
+  hip_library(phi_tensor_raw SRCS tensor.cc DEPS tensor_base dense_tensor phi_api_utils phi_enforce)
 else()
-  cc_library(phi_tensor_raw SRCS tensor.cc DEPS tensor_base dense_tensor phi_api_utils ext_compat_utils phi_enforce)
+  cc_library(phi_tensor_raw SRCS tensor.cc DEPS tensor_base dense_tensor phi_api_utils phi_enforce)
 endif()

 set(api_gen_base ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen/api_base.py)

--- a/paddle/phi/api/lib/ext_compat_utils.cc
+++ b/paddle/phi/api/lib/ext_compat_utils.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/phi/api/lib/ext_compat_utils.h"
-#include "paddle/fluid/platform/device/gpu/gpu_info.h"
-
-namespace paddle {
-namespace experimental {
-
-platform::Place ConvertExtPlaceToInnerPlace(PlaceType p) {
-  if (p == PlaceType::kCPU) {
-    return platform::Place(platform::CPUPlace());
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-  } else if (p == PlaceType::kGPU) {
-    return platform::Place(platform::CUDAPlace(platform::GetCurrentDeviceId()));
-#endif
-  } else {
-    PADDLE_THROW(
-        platform::errors::Unimplemented("Unsupported place type code(%d) when "
-                                        "casting enum place to paddle place.",
-                                        static_cast<int>(p)));
-  }
-  return platform::Place();
-}
-
-PlaceType ConvertInnerPlaceToExtPlace(const platform::Place& p) {
-  if (platform::is_cpu_place(p)) {
-    return PlaceType::kCPU;
-  } else if (platform::is_gpu_place(p)) {
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-    return PlaceType::kGPU;
-#endif
-  } else {
-    PADDLE_THROW(
-        platform::errors::Unimplemented("Unsupported place type `%s` when "
-                                        "casting paddle place to enum place.",
-                                        p));
-  }
-  return PlaceType::kUNK;
-}
-
-Backend ConvertExtPlaceToBackend(PlaceType p) {
-  switch (p) {
-    case PlaceType::kCPU:
-      return Backend::CPU;
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-    case PlaceType::kGPU:
-      return Backend::GPU;
-#endif
-    default:
-      PADDLE_THROW(
-          platform::errors::Unimplemented("Unsupported place type `%s` when "
-                                          "casting enum place to backend.",
-                                          static_cast<int>(p)));
-  }
-}
-
-}  // namespace experimental
-}  // namespace paddle
--- a/paddle/phi/api/lib/ext_compat_utils.h
+++ b/paddle/phi/api/lib/ext_compat_utils.h
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "paddle/fluid/platform/place.h"
-#include "paddle/phi/api/ext/place.h"
-#include "paddle/phi/common/backend.h"
-
-namespace paddle {
-namespace experimental {
-
-platform::Place ConvertExtPlaceToInnerPlace(PlaceType p);
-
-PlaceType ConvertInnerPlaceToExtPlace(const platform::Place& p);
-
-Backend ConvertExtPlaceToBackend(PlaceType p);
-
-}  // namespace experimental
-}  // namespace paddle
--- a/paddle/phi/api/lib/kernel_dispatch.cc
+++ b/paddle/phi/api/lib/kernel_dispatch.cc
@@ -126,7 +126,7 @@ Backend ParseBackend(const Place& place) {
  return phi::TransToPhiBackend(place);
 }
 Backend ParseBackend(const Tensor& tensor) {
-  return phi::TransToPhiBackend(tensor.inner_place());
+  return phi::TransToPhiBackend(tensor.place());
 }

 Backend ParseBackendWithInputOrder(const Place& place, const Tensor& tensor) {

--- a/paddle/phi/api/lib/tensor.cc
+++ b/paddle/phi/api/lib/tensor.cc
@@ -19,46 +19,41 @@ limitations under the License. */
 #include <vector>

 #include "glog/logging.h"
-#include "paddle/phi/api/lib/ext_compat_utils.h"
+
 #include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/api/lib/utils/storage.h"
-#include "paddle/phi/core/compat/convert_utils.h"
+#include "paddle/phi/backends/gpu/gpu_info.h"
+#include "paddle/phi/core/ddim.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/selected_rows.h"
 #include "paddle/phi/core/sparse_coo_tensor.h"
 #include "paddle/phi/core/sparse_csr_tensor.h"
 #include "paddle/phi/core/tensor_base.h"
 #include "paddle/phi/core/tensor_meta.h"
 #include "paddle/phi/core/tensor_utils.h"
-/**
- * [ Why still include the fluid headers? ]
- *
- * We hope to organize the basic implementation of Tensor and the logic related
- * to Tensor computation into an independent library, which we call
- * [Tensor Operation Library, phi], so we extract or rewrite the original
- * Kernels.
- *
- * In the future, the training library, inference library and custom operators
- * will link to this Tensor Operation library.
- *
- * However, if we directly split the link relation, we need to make too many
- * changes, which will affect the stability of the framework, so here we still
- * rely on the implementation of the framework, which is a intermediate state.
- *
- * In the future, the necessary components will be moved to the this library,
- * or the corresponding components will be re-implemented.
- */
-
-#include "paddle/fluid/memory/memory.h"
-#include "paddle/fluid/platform/place.h"
+
 #include "paddle/fluid/platform/stream/cuda_stream.h"
-#include "paddle/phi/common/complex.h"
-#include "paddle/phi/common/float16.h"
-#include "paddle/phi/core/ddim.h"
-#include "paddle/phi/core/enforce.h"

 namespace paddle {
 namespace experimental {
+namespace detail {
+static Place GetCorrectPlaceByPlaceType(const Place &place_type) {
+  auto alloc_type = place_type.GetType();
+  switch (alloc_type) {
+    case AllocationType::CPU:
+      return place_type;
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+    case AllocationType::GPU:
+      return phi::Place(AllocationType::GPU,
+                        phi::backends::gpu::GetCurrentDeviceId());
+#endif
+    default:
+      PADDLE_THROW(phi::errors::Unavailable(
+          "The PlaceType is a legacy design, only supports CPU and GPU, "
+          "and will not support other place types in the future."));
+  }
+}
+}  // namespace detail

 /////// Tensor Methods ////////

@@ -71,27 +66,43 @@ Tensor::Tensor(std::shared_ptr<phi::TensorBase> tensor_impl)
      phi::errors::InvalidArgument("TensorImpl with nullptr is not supported"));
 }

-Tensor::Tensor(const PlaceType &place)
-    : impl_(std::move(std::make_shared<phi::DenseTensor>(
-          std::move(phi::make_intrusive<SharedStorage>(
-              ConvertExtPlaceToInnerPlace(place))),
-          std::move(phi::DenseTensorMeta(phi::DataType::UNDEFINED,
-                                         phi::make_ddim({}),
-                                         phi::DataLayout::NCHW))))),
-      place_{place} {}
-
-Tensor::Tensor(const PlaceType &place, const std::vector<int64_t> &shape)
-    : impl_(std::move(std::make_shared<phi::DenseTensor>(
-          std::move(phi::make_intrusive<SharedStorage>(
-              ConvertExtPlaceToInnerPlace(place))),
-          std::move(phi::DenseTensorMeta(phi::DataType::UNDEFINED,
-                                         phi::make_ddim(shape),
-                                         phi::DataLayout::NCHW))))),
-      place_{place} {}
+Tensor::Tensor(const Place &place) {
+  LOG_FIRST_N(WARNING, 1)
+      << "The Tensor(place) constructor is deprecated since version "
+         "2.3, and will be removed in version 2.4! Please use "
+         "`paddle::empty/full` method to create a new "
+         "Tensor instead. "
+         "Reason: A legal tensor cannot be constructed only based on "
+         "the `place`, and datatype, shape, layout, etc. is also "
+         "required.";
+  DefaultAllocator alloc(detail::GetCorrectPlaceByPlaceType(place));
+  impl_ = std::move(std::make_shared<phi::DenseTensor>(
+      &alloc,
+      std::move(phi::DenseTensorMeta(
+          phi::DataType::FLOAT32, phi::make_ddim({}), phi::DataLayout::NCHW))));
+}
+
+Tensor::Tensor(const Place &place, const std::vector<int64_t> &shape) {
+  LOG_FIRST_N(WARNING, 1)
+      << "The Tensor(place, shape) constructor is deprecated since "
+         "version 2.3, and will be removed in version 2.4! Please use "
+         "`paddle::empty/full` method to create a new "
+         "Tensor instead. "
+         "Reason: A legal tensor cannot be constructed only based on "
+         "the `place` and `shape`, and datatype, layout, etc. is also "
+         "required.";
+  DefaultAllocator alloc(detail::GetCorrectPlaceByPlaceType(place));
+  impl_ = std::move(std::make_shared<phi::DenseTensor>(
+      &alloc,
+      std::move(phi::DenseTensorMeta(phi::DataType::FLOAT32,
+                                     phi::make_ddim({shape}),
+                                     phi::DataLayout::NCHW))));
+}

 Tensor::Tensor(std::shared_ptr<phi::TensorBase> tensor_impl,
               const std::string &name)
    : impl_(std::move(tensor_impl)), name_(std::move(name)) {}
+
 /* Part 2: Dimension, DataType and DataLayout methods */

 int64_t Tensor::numel() const { return impl_->numel(); }
@@ -109,17 +120,17 @@ std::vector<int64_t> Tensor::shape() const {
 }

 void Tensor::reshape(const std::vector<int64_t> &shape) {
-  LOG(WARNING) << "The function of resetting the shape of the uninitialized "
-                  "Tensor of the `reshape` method is deprecated since version "
-                  "2.3, and will be removed in version 2.4, please use "
-                  "`paddle::experimental::full` method to create a new Tensor "
-                  "instead. "
-                  "reason: `reshape` means changing the tensor shape without "
-                  "touching underlying data, this requires the total size of "
-                  "the tensor to remain constant.";
+  LOG_FIRST_N(WARNING, 1)
+      << "The function of resetting the shape of the uninitialized "
+         "Tensor of the `reshape` method is deprecated since version "
+         "2.3, and will be removed in version 2.4, please use "
+         "`paddle::empty/full` method to create a new Tensor "
+         "instead. "
+         "reason: `reshape` means changing the tensor shape without "
+         "touching underlying data, this requires the total size of "
+         "the tensor to remain constant.";
  if (is_dense_tensor()) {
-    std::dynamic_pointer_cast<phi::DenseTensor>(impl_)->Resize(
-        phi::make_ddim(shape));
+    static_cast<phi::DenseTensor *>(impl_.get())->Resize(phi::make_ddim(shape));
  } else {
    PADDLE_THROW(phi::errors::Unimplemented(
        "Only support reshape operation on DenseTensor now."));
@@ -146,42 +157,40 @@ bool Tensor::is_sparse_csr_tensor() const {
 }
 /* Part 3: Device and Backend methods */

-PlaceType Tensor::place() const {
-  if (!impl_->initialized()) {
-    return place_;
-  } else {
-    return ConvertInnerPlaceToExtPlace(impl_->place());
-  }
-}
-
-paddle::platform::Place Tensor::inner_place() const {
+Place Tensor::place() const {
  PADDLE_ENFORCE_NOT_NULL(
      impl_,
      phi::errors::PermissionDenied(
          "Null pointer error, the impl_ of Tensor should not be "
-          "Null when calling Tensor::inner_place()."));
+          "Null when calling Tensor::place()."));
  return impl_->place();
 }

-bool Tensor::is_cpu() const {
-  return paddle::platform::is_cpu_place(inner_place());
-}
+bool Tensor::is_cpu() const { return paddle::platform::is_cpu_place(place()); }

-bool Tensor::is_gpu() const {
-  return paddle::platform::is_gpu_place(inner_place());
-}
+bool Tensor::is_gpu() const { return paddle::platform::is_gpu_place(place()); }

 bool Tensor::is_gpu_pinned() const {
-  return paddle::platform::is_cuda_pinned_place(inner_place());
+  return paddle::platform::is_cuda_pinned_place(place());
 }

 /* Part 4: Data Access methods */

 template <typename T>
 T *Tensor::mutable_data() {
+  LOG_FIRST_N(WARNING, 1)
+      << "Allocating memory through `mutable_data` method is "
+         "deprecated since version 2.3, and `mutable_data` method "
+         "will be removed in version 2.4! Please use "
+         "`paddle::empty/full` method to create a new "
+         "Tensor with allocated memory, and use data<T>() method "
+         "to get the memory pointer of tensor instead. "
+         "Reason: When calling `mutable_data` to allocate memory, "
+         "the place, datatype, and data layout of tensor may be in "
+         "an illegal state.";
  if (is_dense_tensor()) {
-    return std::dynamic_pointer_cast<phi::DenseTensor>(impl_)->mutable_data<T>(
-        ConvertExtPlaceToInnerPlace(place()));
+    return static_cast<phi::DenseTensor *>(impl_.get())
+        ->mutable_data<T>(place());
  }
  return nullptr;
 }
@@ -202,51 +211,44 @@ template PADDLE_API phi::dtype::float16 *
 Tensor::mutable_data<phi::dtype::float16>();

 template <typename T>
-T *Tensor::mutable_data(const PlaceType &place) {
-  auto inner_place = ConvertExtPlaceToInnerPlace(place);
-  if (impl_->initialized()) {
-    PADDLE_ENFORCE_EQ(
-        platform::is_same_place(inner_place, impl_->place()),
-        true,
-        phi::errors::Unimplemented("Modification of tensor place through "
-                                   "mutable_data is not supported now"));
-  }
+T *Tensor::mutable_data(const Place &place) {
+  LOG_FIRST_N(WARNING, 1)
+      << "Allocating memory through `mutable_data` method is "
+         "deprecated since version 2.3, and `mutable_data` method "
+         "will be removed in version 2.4! Please use "
+         "`paddle::empty/full` method to create a new "
+         "Tensor with allocated memory, and use data<T>() method "
+         "to get the memory pointer of tensor instead. "
+         "Reason: When calling `mutable_data` to allocate memory, "
+         "the datatype, and data layout of tensor may be in "
+         "an illegal state.";
  if (is_dense_tensor()) {
-    return std::dynamic_pointer_cast<phi::DenseTensor>(impl_)->mutable_data<T>(
-        inner_place);
+    return static_cast<phi::DenseTensor *>(impl_.get())->mutable_data<T>(place);
  }
  return nullptr;
 }

-template PADDLE_API float *Tensor::mutable_data<float>(const PlaceType &place);
-template PADDLE_API double *Tensor::mutable_data<double>(
-    const PlaceType &place);
-template PADDLE_API int64_t *Tensor::mutable_data<int64_t>(
-    const PlaceType &place);
-template PADDLE_API int32_t *Tensor::mutable_data<int32_t>(
-    const PlaceType &place);
-template PADDLE_API uint8_t *Tensor::mutable_data<uint8_t>(
-    const PlaceType &place);
-template PADDLE_API int8_t *Tensor::mutable_data<int8_t>(
-    const PlaceType &place);
-template PADDLE_API int16_t *Tensor::mutable_data<int16_t>(
-    const PlaceType &place);
-template PADDLE_API bool *Tensor::mutable_data<bool>(const PlaceType &place);
+template PADDLE_API float *Tensor::mutable_data<float>(const Place &place);
+template PADDLE_API double *Tensor::mutable_data<double>(const Place &place);
+template PADDLE_API int64_t *Tensor::mutable_data<int64_t>(const Place &place);
+template PADDLE_API int32_t *Tensor::mutable_data<int32_t>(const Place &place);
+template PADDLE_API uint8_t *Tensor::mutable_data<uint8_t>(const Place &place);
+template PADDLE_API int8_t *Tensor::mutable_data<int8_t>(const Place &place);
+template PADDLE_API int16_t *Tensor::mutable_data<int16_t>(const Place &place);
+template PADDLE_API bool *Tensor::mutable_data<bool>(const Place &place);
 template PADDLE_API phi::dtype::complex<float>
-    *Tensor::mutable_data<phi::dtype::complex<float>>(const PlaceType &place);
+    *Tensor::mutable_data<phi::dtype::complex<float>>(const Place &place);
 template PADDLE_API phi::dtype::complex<double>
-    *Tensor::mutable_data<phi::dtype::complex<double>>(const PlaceType &place);
+    *Tensor::mutable_data<phi::dtype::complex<double>>(const Place &place);
 template PADDLE_API phi::dtype::float16 *
-Tensor::mutable_data<phi::dtype::float16>(const PlaceType &place);
+Tensor::mutable_data<phi::dtype::float16>(const Place &place);

 template <typename T>
 const T *Tensor::data() const {
  if (is_dense_tensor()) {
-    return std::dynamic_pointer_cast<phi::DenseTensor>(impl_)->data<T>();
-  } else if (phi::SelectedRows::classof(impl_.get())) {
-    return std::dynamic_pointer_cast<phi::SelectedRows>(impl_)
-        ->value()
-        .data<T>();
+    return static_cast<phi::DenseTensor *>(impl_.get())->data<T>();
+  } else if (is_selected_rows()) {
+    return static_cast<phi::SelectedRows *>(impl_.get())->value().data<T>();
  }
  return nullptr;
 }
@@ -271,9 +273,9 @@ Tensor::data<phi::dtype::bfloat16>() const;
 template <typename T>
 T *Tensor::data() {
  if (is_dense_tensor()) {
-    return std::dynamic_pointer_cast<phi::DenseTensor>(impl_)->data<T>();
-  } else if (phi::SelectedRows::classof(impl_.get())) {
-    return std::dynamic_pointer_cast<phi::SelectedRows>(impl_)
+    return static_cast<phi::DenseTensor *>(impl_.get())->data<T>();
+  } else if (is_selected_rows()) {
+    return static_cast<phi::SelectedRows *>(impl_.get())
        ->mutable_value()
        ->data<T>();
  }
@@ -299,7 +301,7 @@ Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const {
  if (is_dense_tensor()) {
    return Tensor(std::make_shared<phi::DenseTensor>(
        std::move(phi::DenseTensorUtils::Slice(
-            *(std::dynamic_pointer_cast<phi::DenseTensor>(impl_).get()),
+            *(static_cast<phi::DenseTensor *>(impl_.get())),
            begin_idx,
            end_idx))));
  } else {
@@ -331,6 +333,10 @@ bool Tensor::defined() const { return impl_ != nullptr; }
 bool Tensor::initialized() const { return defined() && impl_->initialized(); }

 bool Tensor::is_initialized() const {
+  LOG_FIRST_N(WARNING, 1)
+      << "The `is_initialized` method is deprecated since version "
+         "2.3, and will be removed in version 2.4! "
+         "Please use `initialized` method instead.";
  return defined() && impl_->initialized();
 }

@@ -342,7 +348,6 @@ Tensor &Tensor::operator=(const Tensor &x) & {
  impl_ = x.impl_;
  autograd_meta_ = x.autograd_meta_;
  name_ = x.name_;
-  place_ = x.place_;
  return *this;
 }

@@ -350,7 +355,6 @@ Tensor &Tensor::operator=(Tensor &&x) & {
  impl_ = std::move(x.impl_);
  autograd_meta_ = std::move(x.autograd_meta_);
  name_ = std::move(x.name_);
-  place_ = std::move(x.place_);
  return *this;
 }

@@ -371,8 +375,7 @@ void Tensor::set_autograd_meta(
 void Tensor::bump_inplace_version() {
  if (is_dense_tensor()) {
    auto &inplace_version_counter =
-        std::dynamic_pointer_cast<phi::DenseTensor>(impl_)
-            ->InplaceVersionCounter();
+        static_cast<phi::DenseTensor *>(impl_.get())->InplaceVersionCounter();
    inplace_version_counter.Bump();
  } else {
    PADDLE_THROW(phi::errors::Unimplemented(
@@ -383,8 +386,7 @@ void Tensor::bump_inplace_version() {
 uint32_t Tensor::current_inplace_version() {
  if (is_dense_tensor()) {
    auto &inplace_version_counter =
-        std::dynamic_pointer_cast<phi::DenseTensor>(impl_)
-            ->InplaceVersionCounter();
+        static_cast<phi::DenseTensor *>(impl_.get())->InplaceVersionCounter();
    return inplace_version_counter.CurrentVersion();
  } else {
    PADDLE_THROW(phi::errors::Unimplemented(
@@ -397,8 +399,7 @@ void Tensor::reset_inplace_version(bool set_to_zero) {
  if (set_to_zero) {
    if (is_dense_tensor()) {
      auto &inplace_version_counter =
-          std::dynamic_pointer_cast<phi::DenseTensor>(impl_)
-              ->InplaceVersionCounter();
+          static_cast<phi::DenseTensor *>(impl_.get())->InplaceVersionCounter();
      inplace_version_counter.SetInplaceVersionToZero();
    }
  }

--- a/paddle/phi/api/lib/tensor_method.cc
+++ b/paddle/phi/api/lib/tensor_method.cc
@@ -14,7 +14,6 @@ limitations under the License. */

 #include "paddle/phi/api/include/tensor.h"

-#include "paddle/phi/api/lib/ext_compat_utils.h"
 #include "paddle/phi/common/int_array.h"
 #include "paddle/phi/core/compat/convert_utils.h"
 #include "paddle/phi/core/tensor_base.h"
@@ -39,42 +38,43 @@ Tensor Tensor::copy_to(Place place, bool blocking) const {
 }

 template <typename T>
-Tensor Tensor::copy_to(const PlaceType &target_place) const {
-  LOG(WARNING) << "The Tensor's `copy_to` method is deprecated since version "
-                  "2.3, and will be removed in version 2.4, please use "
-                  "`copy_to` method without template argument instead. "
-                  "reason: copying a Tensor to another device does not need "
-                  "to specify the data type template argument.";
-  return copy_to(ConvertExtPlaceToInnerPlace(target_place), /*blocking=*/false);
+Tensor Tensor::copy_to(const Place &target_place) const {
+  LOG_FIRST_N(WARNING, 1)
+      << "The Tensor's `copy_to` method is deprecated since version "
+         "2.3, and will be removed in version 2.4, please use "
+         "`copy_to` method without template argument instead. "
+         "reason: copying a Tensor to another device does not need "
+         "to specify the data type template argument.";
+  return copy_to(target_place, /*blocking=*/false);
 }

 template PADDLE_API Tensor
-Tensor::copy_to<float>(const PlaceType &target_place) const;
+Tensor::copy_to<float>(const Place &target_place) const;
 template PADDLE_API Tensor
-Tensor::copy_to<double>(const PlaceType &target_place) const;
+Tensor::copy_to<double>(const Place &target_place) const;
 template PADDLE_API Tensor
-Tensor::copy_to<int64_t>(const PlaceType &target_place) const;
+Tensor::copy_to<int64_t>(const Place &target_place) const;
 template PADDLE_API Tensor
-Tensor::copy_to<int32_t>(const PlaceType &target_place) const;
+Tensor::copy_to<int32_t>(const Place &target_place) const;
 template PADDLE_API Tensor
-Tensor::copy_to<uint8_t>(const PlaceType &target_place) const;
+Tensor::copy_to<uint8_t>(const Place &target_place) const;
 template PADDLE_API Tensor
-Tensor::copy_to<int8_t>(const PlaceType &target_place) const;
+Tensor::copy_to<int8_t>(const Place &target_place) const;
 template PADDLE_API Tensor
-Tensor::copy_to<int16_t>(const PlaceType &target_place) const;
+Tensor::copy_to<int16_t>(const Place &target_place) const;
 template PADDLE_API Tensor
-Tensor::copy_to<bool>(const PlaceType &target_place) const;
-template PADDLE_API Tensor Tensor::copy_to<phi::dtype::complex<float>>(
-    const PlaceType &target_place) const;
-template PADDLE_API Tensor Tensor::copy_to<phi::dtype::complex<double>>(
-    const PlaceType &target_place) const;
+Tensor::copy_to<bool>(const Place &target_place) const;
 template PADDLE_API Tensor
-Tensor::copy_to<phi::dtype::float16>(const PlaceType &target_place) const;
+Tensor::copy_to<phi::dtype::complex<float>>(const Place &target_place) const;
+template PADDLE_API Tensor
+Tensor::copy_to<phi::dtype::complex<double>>(const Place &target_place) const;
+template PADDLE_API Tensor
+Tensor::copy_to<phi::dtype::float16>(const Place &target_place) const;

 void Tensor::copy_(const Tensor &src,
                   const phi::Place &target_place,
                   bool blocking) {
-  if (!src.is_initialized()) {
+  if (!src.initialized()) {
    VLOG(8) << "Src is empty, skip copy";
    return;
  }
@@ -82,7 +82,7 @@ void Tensor::copy_(const Tensor &src,
  auto kernel_key_set = ParseKernelKeyByInputArgs(src);
  KernelType kernel_type = ParseKernelTypeByInputArgs(src);
  VLOG(3) << "Deep copy Tensor from " << src.name() << " to " << name();
-  if (is_initialized()) {
+  if (initialized()) {
    PADDLE_ENFORCE_EQ(dtype(),
                      src.dtype(),
                      phi::errors::PreconditionNotMet(
@@ -98,16 +98,15 @@ void Tensor::copy_(const Tensor &src,
                          name(),
                          src.name()));
    PADDLE_ENFORCE_EQ(target_place,
-                      inner_place(),
+                      place(),
                      phi::errors::PreconditionNotMet(
                          "Place is different of dst tensor and args %s, which "
                          "current tensor holds %s "
                          "Copy cannot be performed!",
                          target_place,
-                          inner_place()));
-    kernel_key_set.backend_set =
-        kernel_key_set.backend_set |
-        BackendSet(phi::TransToPhiBackend(inner_place()));
+                          place()));
+    kernel_key_set.backend_set = kernel_key_set.backend_set |
+                                 BackendSet(phi::TransToPhiBackend(place()));
  } else {
    // Deep Copy AutoGrad info from src to self.
    *autograd_meta_ = *(src.autograd_meta_);

--- a/paddle/phi/common/place.cc
+++ b/paddle/phi/common/place.cc
@@ -18,6 +18,8 @@ limitations under the License. */
 #include <string>
 #include <unordered_map>

+#include "glog/logging.h"
+
 #include "paddle/phi/api/ext/exception.h"

 namespace phi {
@@ -108,4 +110,34 @@ uint32_t Place::Hash::operator()(const Place &place) const {
  return hash_value;
 }

+Place::Place(paddle::PlaceType type)
+    : device(0),
+      alloc_type_(static_cast<AllocationType>(type)),
+      device_type_id_(GetOrRegisterGlobalDeviceTypeId("")) {
+  LOG_FIRST_N(WARNING, 1)
+      << "The `paddle::PlaceType::kCPU/kGPU` is deprecated since version "
+         "2.3, and will be removed in version 2.4! Please use "
+         "`paddle::CPUPlace()/GPUPlace()` to represent the place type.";
+}
+
 }  // namespace phi
+
+namespace paddle {
+
+bool operator==(const Place &place, PlaceType place_type) {
+  LOG_FIRST_N(WARNING, 1)
+      << "The `paddle::PlaceType::kCPU/kGPU` is deprecated since version "
+         "2.3, and will be removed in version 2.4! Please use "
+         "`Tensor::is_cpu()/is_gpu()` method to determine the type of place.";
+  return place.GetType() == static_cast<AllocationType>(place_type);
+}
+
+bool operator==(PlaceType place_type, const Place &place) {
+  LOG_FIRST_N(WARNING, 1)
+      << "The `paddle::PlaceType::kCPU/kGPU` is deprecated since version "
+         "2.3, and will be removed in version 2.4! Please use "
+         "`Tensor::is_cpu()/is_gpu()` method to determine the type of place.";
+  return static_cast<AllocationType>(place_type) == place.GetType();
+}
+
+}  // namespace paddle
--- a/paddle/phi/common/place.h
+++ b/paddle/phi/common/place.h
@@ -18,6 +18,10 @@ limitations under the License. */

 #include "paddle/phi/api/include/dll_decl.h"

+namespace paddle {
+enum class PlaceType;
+}
+
 namespace phi {

 enum class AllocationType : int8_t {
@@ -57,6 +61,9 @@ class PADDLE_API Place {
        alloc_type_(type),
        device_type_id_(GetOrRegisterGlobalDeviceTypeId(dev_type)) {}

+  // See NOTE [ Why need to temporarily adapt to PlaceType? ]
+  Place(paddle::PlaceType type);  // NOLINT
+
  void Reset(AllocationType type,
             int8_t device_id = 0,
             const std::string& dev_type = "") noexcept {
@@ -213,4 +220,43 @@ using GPUPinnedPlace = phi::GPUPinnedPlace;
 using XPUPlace = phi::XPUPlace;
 using NPUPlace = phi::NPUPlace;
 }  // namespace experimental
+
+using AllocationType = phi::AllocationType;
+using Place = phi::Place;
+using CPUPlace = phi::CPUPlace;
+using GPUPlace = phi::GPUPlace;
+
+/* NOTE [ Why need to temporarily adapt to PlaceType? ]
+
+`PlaceType` emum class is the place type used by custom operators since the
+release of 2.0. Since 2.3, we have refactored the operator library and designed
+a new external Place type. The original PlaceType is no longer suitable for use
+as an internal type of the framework, but immediately delete the PlaceType,
+it will cause the previous custom operators to be incompatible, so it cannot be
+deleted in the short term. We'd better delete this abandoned data type in 2.4.
+
+Note: This type cannot add any new type!!! It is only used for compatibility
+with
+historical writing and we will remove this temporary type in the future.
+This Type cannot be used in framework! only used for custom operator!
+
+The original PlaceType define:
+
+- enum class PlaceType { kUNK = -1, kCPU, kGPU };
+
+The historical PlaceType using:
+
+- PD_CHECK(x.place() == paddle::PlaceType::kCPU)
+- auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape());
+
+*/
+enum class PlaceType {
+  kUNK = static_cast<int>(phi::AllocationType::UNDEFINED),
+  kCPU = static_cast<int>(phi::AllocationType::CPU),
+  kGPU = static_cast<int>(phi::AllocationType::GPU),
+};
+
+PADDLE_API bool operator==(const Place& place, PlaceType place_type);
+PADDLE_API bool operator==(PlaceType place_type, const Place& place);
+
 }  // namespace paddle
--- a/paddle/phi/core/dense_tensor.cc
+++ b/paddle/phi/core/dense_tensor.cc
@@ -19,7 +19,24 @@ limitations under the License. */
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/compat/convert_utils.h"

-// See Note [ Why still include the fluid headers? ]
+/**
+ * [ Why still include the fluid headers? ]
+ *
+ * We hope to organize the basic implementation of Tensor and the logic related
+ * to Tensor computation into an independent library, which we call
+ * [Tensor Operation Library, phi], so we extract or rewrite the original
+ * Kernels.
+ *
+ * In the future, the training library, inference library and custom operators
+ * will link to this Tensor Operation library.
+ *
+ * However, if we directly split the link relation, we need to make too many
+ * changes, which will affect the stability of the framework, so here we still
+ * rely on the implementation of the framework, which is a intermediate state.
+ *
+ * In the future, the necessary components will be moved to the this library,
+ * or the corresponding components will be re-implemented.
+ */
 #include "paddle/fluid/memory/malloc.h"

 namespace phi {

--- a/paddle/phi/tests/api/CMakeLists.txt
+++ b/paddle/phi/tests/api/CMakeLists.txt
-if(WITH_ROCM)
+if(WITH_GPU)
+  nv_test(test_phi_tensor SRCS test_pten_tensor.cc DEPS phi_tensor glog)
+elseif(WITH_ROCM)
  hip_test(test_phi_tensor SRCS test_pten_tensor.cc DEPS phi_tensor glog)
 else()
  cc_test(test_phi_tensor SRCS test_pten_tensor.cc DEPS phi_tensor glog)

--- a/paddle/phi/tests/api/test_pten_tensor.cc
+++ b/paddle/phi/tests/api/test_pten_tensor.cc
@@ -15,7 +15,6 @@
 #include "glog/logging.h"
 #include "gtest/gtest.h"
 #include "paddle/phi/api/include/tensor.h"
-#include "paddle/phi/api/lib/ext_compat_utils.h"
 #include "paddle/phi/core/kernel_registry.h"

 PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT);
@@ -201,7 +200,7 @@ void GroupTestDtype() {

 void TestInitilized() {
  experimental::Tensor test_tensor(paddle::PlaceType::kCPU, {1, 1});
-  CHECK(test_tensor.is_initialized() == false);
+  CHECK(test_tensor.is_initialized() == true);
  test_tensor.mutable_data<float>(paddle::PlaceType::kCPU);
  CHECK(test_tensor.is_initialized() == true);
  float* tensor_data = test_tensor.mutable_data<float>();

--- a/python/paddle/fluid/tests/custom_op/custom_relu_op.cc
+++ b/python/paddle/fluid/tests/custom_op/custom_relu_op.cc
@@ -108,7 +108,6 @@ std::vector<paddle::Tensor> relu_cuda_double_backward(
    const paddle::Tensor& out, const paddle::Tensor& ddx);

 std::vector<paddle::Tensor> ReluForward(const paddle::Tensor& x) {
-  // TODO(chenweihang): Check Input
  if (x.place() == paddle::PlaceType::kCPU) {
    return relu_cpu_forward(x);
  } else if (x.place() == paddle::PlaceType::kGPU) {

--- a/python/paddle/fluid/tests/custom_op/custom_relu_op.cu
+++ b/python/paddle/fluid/tests/custom_op/custom_relu_op.cu
@@ -53,6 +53,7 @@ __global__ void relu_cuda_double_backward_kernel(const data_t* out_data,
 }

 std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
+  CHECK_GPU_INPUT(x);
  auto out = paddle::Tensor(paddle::PlaceType::kGPU, x.shape());

  int numel = x.size();
@@ -70,6 +71,9 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
 std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
                                               const paddle::Tensor& out,
                                               const paddle::Tensor& grad_out) {
+  CHECK_GPU_INPUT(x);
+  CHECK_GPU_INPUT(out);
+  CHECK_GPU_INPUT(grad_out);
  auto grad_x = paddle::Tensor(paddle::PlaceType::kGPU, x.shape());

  int numel = out.size();