diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc
index cce90ea1e8eed8bc5ddd432fa91e7d8c6c6c0bd9..4e4e7e31cfbc7e47c365549e33901fcca6c61a75 100644
--- a/paddle/phi/api/lib/tensor.cc
+++ b/paddle/phi/api/lib/tensor.cc
@@ -39,24 +39,6 @@ limitations under the License. */
 
 namespace paddle {
 namespace experimental {
-namespace detail {
-static Place GetCorrectPlaceByPlaceType(const Place &place_type) {
-  auto alloc_type = place_type.GetType();
-  switch (alloc_type) {
-    case AllocationType::CPU:
-      return place_type;
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-    case AllocationType::GPU:
-      return phi::Place(AllocationType::GPU,
-                        phi::backends::gpu::GetCurrentDeviceId());
-#endif
-    default:
-      PADDLE_THROW(phi::errors::Unavailable(
-          "The PlaceType is a legacy design, only supports CPU and GPU, "
-          "and will not support other place types in the future."));
-  }
-}
-}  // namespace detail
 
 /////// Tensor Methods ////////
 
@@ -78,7 +60,7 @@ Tensor::Tensor(const Place &place) {
          "Reason: A legal tensor cannot be constructed only based on "
          "the `place`, and datatype, shape, layout, etc. is also "
          "required.";
-  DefaultAllocator alloc(detail::GetCorrectPlaceByPlaceType(place));
+  DefaultAllocator alloc(place);
   impl_ = std::move(std::make_shared<phi::DenseTensor>(
       &alloc,
       std::move(phi::DenseTensorMeta(
@@ -94,7 +76,7 @@ Tensor::Tensor(const Place &place, const std::vector<int64_t> &shape) {
          "Reason: A legal tensor cannot be constructed only based on "
          "the `place` and `shape`, and datatype, layout, etc. is also "
          "required.";
-  DefaultAllocator alloc(detail::GetCorrectPlaceByPlaceType(place));
+  DefaultAllocator alloc(place);
   impl_ = std::move(std::make_shared<phi::DenseTensor>(
       &alloc,
       std::move(phi::DenseTensorMeta(phi::DataType::FLOAT32,
diff --git a/paddle/phi/common/CMakeLists.txt b/paddle/phi/common/CMakeLists.txt
index d9266bd06d2789947e91fcf332c26e52293b69d2..b32e0f4bef5f7c7fda6eb87d03d2e97eec10fbc9 100644
--- a/paddle/phi/common/CMakeLists.txt
+++ b/paddle/phi/common/CMakeLists.txt
@@ -1,4 +1,17 @@
-cc_library(phi_place SRCS place.cc)
+if(WITH_GPU)
+  nv_library(
+    phi_place
+    SRCS place.cc
+    DEPS phi_gpu_info)
+elseif(WITH_ROCM)
+  hip_library(
+    phi_place
+    SRCS place.cc
+    DEPS phi_gpu_info)
+else()
+  cc_library(phi_place SRCS place.cc)
+endif()
+
 cc_library(
   scalar
   SRCS scalar.cc
diff --git a/paddle/phi/common/place.cc b/paddle/phi/common/place.cc
index c15a17651b18bebbde14ad213ff79f83165aa3bf..e9a388c8e9ecab9039f0106d408f0f64a3642c51 100644
--- a/paddle/phi/common/place.cc
+++ b/paddle/phi/common/place.cc
@@ -20,6 +20,7 @@ limitations under the License. */
 
 #include "glog/logging.h"
 #include "paddle/phi/api/ext/exception.h"
+#include "paddle/phi/backends/gpu/gpu_info.h"
 
 namespace phi {
 
@@ -109,14 +110,32 @@ uint32_t Place::Hash::operator()(const Place &place) const {
   return hash_value;
 }
 
+namespace detail {
+static int8_t GetCorrectDeviceIdByPlaceType(
+    const paddle::PlaceType &place_type) {
+  switch (place_type) {
+    case paddle::PlaceType::kCPU:
+      return 0;
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+    case paddle::PlaceType::kGPU:
+      return phi::backends::gpu::GetCurrentDeviceId();
+#endif
+    default:
+      PD_THROW(
+          "The PlaceType is a legacy design, only supports CPU and GPU, "
+          "and will not support other place types in the future.");
+  }
+}
+}  // namespace detail
+
 Place::Place(paddle::PlaceType type)
-    : device(0),
+    : device(detail::GetCorrectDeviceIdByPlaceType(type)),
       alloc_type_(static_cast<AllocationType>(type)),
       device_type_id_(GetOrRegisterGlobalDeviceTypeId("")) {
   LOG_FIRST_N(WARNING, 1)
       << "The `paddle::PlaceType::kCPU/kGPU` is deprecated since version "
          "2.3, and will be removed in version 2.4! Please use "
-         "`paddle::CPUPlace()/GPUPlace()` to represent the place type.";
+         "`paddle::CPUPlace()/DefaultGPUPlace()` to represent the place type.";
 }
 
 }  // namespace phi
@@ -139,4 +158,13 @@ bool operator==(PlaceType place_type, const Place &place) {
   return static_cast<AllocationType>(place_type) == place.GetType();
 }
 
+GPUPlace DefaultGPUPlace() {
+  return GPUPlace(
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+      phi::backends::gpu::GetCurrentDeviceId());
+#else
+      0);
+#endif
+}
+
 }  // namespace paddle
diff --git a/paddle/phi/common/place.h b/paddle/phi/common/place.h
index 199ee81f27200e18de1d9831ff0ffa057fd089e4..cbc1faf94f07c84c7701c6b6d6b3d6d5532a1791 100644
--- a/paddle/phi/common/place.h
+++ b/paddle/phi/common/place.h
@@ -256,4 +256,6 @@ enum class PlaceType {
 PADDLE_API bool operator==(const Place& place, PlaceType place_type);
 PADDLE_API bool operator==(PlaceType place_type, const Place& place);
 
+PADDLE_API GPUPlace DefaultGPUPlace();
+
 }  // namespace paddle
diff --git a/python/paddle/fluid/tests/custom_op/custom_relu_op.cu b/python/paddle/fluid/tests/custom_op/custom_relu_op.cu
index f9314ea4b10663182e114d784db295eb6f532c27..e791ea8cb7600eb78b54b80f8af6265261b1bc66 100644
--- a/python/paddle/fluid/tests/custom_op/custom_relu_op.cu
+++ b/python/paddle/fluid/tests/custom_op/custom_relu_op.cu
@@ -55,6 +55,8 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
   CHECK_GPU_INPUT(x);
   auto out = paddle::empty_like(x);
 
+  PD_CHECK(x.place() == paddle::DefaultGPUPlace());
+
   int64_t numel = x.numel();
   int64_t block = 512;
   int64_t grid = (numel + block - 1) / block;
@@ -75,6 +77,8 @@ std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
   CHECK_GPU_INPUT(grad_out);
   auto grad_x = paddle::empty_like(x);
 
+  PD_CHECK(x.place() == paddle::DefaultGPUPlace());
+
   int64_t numel = out.numel();
   int64_t block = 512;
   int64_t grid = (numel + block - 1) / block;
@@ -101,12 +105,12 @@ std::vector<paddle::Tensor> relu_cuda_double_backward(
   int64_t grid = (numel + block - 1) / block;
   PD_DISPATCH_FLOATING_AND_HALF_TYPES(
       out.type(), "relu_cuda_double_backward_kernel", ([&] {
-        relu_cuda_double_backward_kernel<
-            data_t><<<grid, block, 0, out.stream()>>>(
-            out.data<data_t>(),
-            ddx.data<data_t>(),
-            ddout.mutable_data<data_t>(out.place()),
-            numel);
+        relu_cuda_double_backward_kernel<data_t>
+            <<<grid, block, 0, out.stream()>>>(
+                out.data<data_t>(),
+                ddx.data<data_t>(),
+                ddout.mutable_data<data_t>(out.place()),
+                numel);
       }));
 
   std::cout << "Debug info: run relu gpu double backward success." << std::endl;