From 9ffdb2b79ac074b6c07e9a1f7f2ed323e483bf78 Mon Sep 17 00:00:00 2001
From: RedContritio <RedContritio@qq.com>
Date: Thu, 9 Mar 2023 10:36:52 +0800
Subject: [PATCH] =?UTF-8?q?=E3=80=90Hackathon=20No.67=E3=80=91remove=20ref?=
 =?UTF-8?q?erence=20to=20operator.h=20in=20phi=20[part=201]=20(#50624)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add visit_place to phi/core/utils

* remove reference to operator.h in phi/kernels/funcs/math_function.h

* update data type from framework.proto to phi

* fix enforce error in fluid
---
 .../fluid/framework/data_layout_transform.cc  |   1 +
 paddle/fluid/imperative/tests/test_group.cc   |   4 +
 .../operators/detection/anchor_generator_op.h |   1 +
 .../fluid/operators/detection/prior_box_op.h  |   1 +
 paddle/fluid/operators/index_select_op.h      |   1 +
 paddle/fluid/operators/interpolate_op.h       |   1 +
 paddle/fluid/operators/math/prelu.h           |   1 +
 .../fluid/operators/math/sequence_pooling.cc  |   1 +
 paddle/fluid/operators/math/tree2col.cu       |   1 +
 .../reduce_ops/check_reduce_rank_test.cu      |   5 +-
 .../sequence_ops/sequence_expand_op.h         |   1 +
 paddle/phi/core/utils/visit_place.h           | 112 ++++++++++++++++++
 .../cpu/repeat_interleave_grad_kernel.cc      |  18 ++-
 .../cpu/sparse_weight_embedding_kernel.cc     |   5 +-
 .../cpu/take_along_axis_grad_kernel.cc        |   7 +-
 .../kernels/cpu/unique_consecutive_kernel.cc  |   4 +-
 paddle/phi/kernels/cpu/unique_kernel.cc       |   2 +
 paddle/phi/kernels/funcs/math_function.cc     |   3 +-
 paddle/phi/kernels/funcs/math_function.h      |   4 +-
 paddle/phi/kernels/funcs/math_function_impl.h |   1 +
 paddle/phi/kernels/funcs/segment_pooling.cu   |   1 +
 paddle/phi/kernels/funcs/unique_functor.h     |   2 +
 paddle/phi/kernels/gpu/bincount_kernel.cu     |   3 -
 .../kernels/gpu/class_center_sample_kernel.cu |   4 +-
 .../phi/kernels/gpu/edit_distance_kernel.cu   |   1 +
 .../gpu/margin_cross_entropy_grad_kernel.cu   |  10 +-
 .../gpu/margin_cross_entropy_kernel.cu        |  51 ++++----
 .../phi/kernels/gpu/sync_batch_norm_kernel.cu |   3 +-
 .../impl/repeat_interleave_kernel_impl.h      |  20 ++--
 .../kernels/impl/set_value_grad_kernel_impl.h |  22 ++--
 .../xpu/distribute_fpn_proposals_kernel.cc    |  12 +-
 .../phi/kernels/xpu/set_value_grad_kernel.cc  |  22 ++--
 32 files changed, 218 insertions(+), 107 deletions(-)
 create mode 100644 paddle/phi/core/utils/visit_place.h

diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc
index 3b7d5fb4d8c..8e94a04ab16 100644
--- a/paddle/fluid/framework/data_layout_transform.cc
+++ b/paddle/fluid/framework/data_layout_transform.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/framework/data_layout_transform.h"
+#include "paddle/fluid/framework/op_kernel_type.h"
 
 #include "paddle/phi/core/utils/data_type.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
diff --git a/paddle/fluid/imperative/tests/test_group.cc b/paddle/fluid/imperative/tests/test_group.cc
index fef8c346f4b..d5f09868b89 100644
--- a/paddle/fluid/imperative/tests/test_group.cc
+++ b/paddle/fluid/imperative/tests/test_group.cc
@@ -18,6 +18,10 @@
 #include "gtest/gtest.h"
 #include "paddle/fluid/imperative/reducer.h"
 
+#include "paddle/fluid/framework/convert_utils.h"
+#include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/phi/core/utils/data_type.h"
+
 namespace paddle {
 namespace imperative {
 
diff --git a/paddle/fluid/operators/detection/anchor_generator_op.h b/paddle/fluid/operators/detection/anchor_generator_op.h
index 70194a0abcb..726b65fb1f4 100644
--- a/paddle/fluid/operators/detection/anchor_generator_op.h
+++ b/paddle/fluid/operators/detection/anchor_generator_op.h
@@ -18,6 +18,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/phi/common/transform.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace paddle {
diff --git a/paddle/fluid/operators/detection/prior_box_op.h b/paddle/fluid/operators/detection/prior_box_op.h
index 4c5249ec56f..b49841399c7 100644
--- a/paddle/fluid/operators/detection/prior_box_op.h
+++ b/paddle/fluid/operators/detection/prior_box_op.h
@@ -19,6 +19,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/phi/common/transform.h"
 #include "paddle/phi/core/visit_type.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace paddle {
diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h
index 6bb91f325f9..ad1542666fd 100644
--- a/paddle/fluid/operators/index_select_op.h
+++ b/paddle/fluid/operators/index_select_op.h
@@ -17,6 +17,7 @@
 
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace paddle {
diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h
index ad67efc4b78..12378a5f1f1 100644
--- a/paddle/fluid/operators/interpolate_op.h
+++ b/paddle/fluid/operators/interpolate_op.h
@@ -16,6 +16,7 @@
 
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/phi/core/hostdevice.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace paddle {
diff --git a/paddle/fluid/operators/math/prelu.h b/paddle/fluid/operators/math/prelu.h
index 5596a9fefed..00ff1fbcbc3 100644
--- a/paddle/fluid/operators/math/prelu.h
+++ b/paddle/fluid/operators/math/prelu.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/fluid/platform/device/gpu/gpu_dnn.h"
+#include "paddle/phi/api/include/tensor.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace paddle {
diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc
index eed7b64a3c8..8dbeff2bce1 100644
--- a/paddle/fluid/operators/math/sequence_pooling.cc
+++ b/paddle/fluid/operators/math/sequence_pooling.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <string>
 
 #include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/jit/kernels.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
diff --git a/paddle/fluid/operators/math/tree2col.cu b/paddle/fluid/operators/math/tree2col.cu
index 22bdc48768d..abaf5d3f3bb 100644
--- a/paddle/fluid/operators/math/tree2col.cu
+++ b/paddle/fluid/operators/math/tree2col.cu
@@ -14,6 +14,7 @@
 
 #include <stack>
 
+#include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/operators/math/tree2col.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
diff --git a/paddle/fluid/operators/reduce_ops/check_reduce_rank_test.cu b/paddle/fluid/operators/reduce_ops/check_reduce_rank_test.cu
index a724524716b..b94a78f898f 100644
--- a/paddle/fluid/operators/reduce_ops/check_reduce_rank_test.cu
+++ b/paddle/fluid/operators/reduce_ops/check_reduce_rank_test.cu
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "gtest/gtest.h"
+#include "paddle/phi/core/enforce.h"
 #include "paddle/phi/kernels/gpu/reduce.h"
 
 namespace paddle {
@@ -20,7 +21,7 @@ namespace operators {
 namespace details {
 
 TEST(test_reduce_rank_check, all) {
-  using EnforceNotMet = paddle::platform::EnforceNotMet;
+  using EnforceNotMet = phi::EnforceNotMet;
   constexpr int kMaxRank = framework::DDim::kMaxRank;
 
   for (int rank = 0; rank < kMaxRank; rank++) {
@@ -42,7 +43,7 @@ TEST(test_reduce_rank_check, all) {
         phi::funcs::details::CheckReduceRank(reduce_rank, rank);
       } else {
         ASSERT_THROW(phi::funcs::details::CheckReduceRank(reduce_rank, rank),
-                     paddle::platform::EnforceNotMet);
+                     EnforceNotMet);
       }
     }
   }
diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h
index 7a7a6f7b3e7..9270b97cfc3 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.h
@@ -17,6 +17,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/memory/memcpy.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace paddle {
diff --git a/paddle/phi/core/utils/visit_place.h b/paddle/phi/core/utils/visit_place.h
new file mode 100644
index 00000000000..e2e2ffec1bf
--- /dev/null
+++ b/paddle/phi/core/utils/visit_place.h
@@ -0,0 +1,112 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/phi/common/place.h"
+#include "paddle/phi/core/enforce.h"
+
+namespace phi {
+
+// need add dependency to phi_place when use phi::VisitPlace
+template <typename Visitor>
+typename Visitor::result_type VisitPlace(const phi::Place& place,
+                                         const Visitor& visitor) {
+  switch (place.GetType()) {
+    case phi::AllocationType::GPU: {
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+      phi::GPUPlace p(place.GetDeviceId());
+      return visitor(p);
+#else
+      PADDLE_THROW(phi::errors::Unavailable(
+          ("Paddle is not compiled with CUDA. Cannot visit cuda_pinned")));
+      return typename Visitor::result_type();
+#endif
+    }
+    case phi::AllocationType::GPUPINNED: {
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+      phi::GPUPinnedPlace p;
+      return visitor(p);
+#else
+      PADDLE_THROW(phi::errors::Unavailable(
+          ("Paddle is not compiled with CUDA. Cannot visit cuda_pinned")));
+      return typename Visitor::result_type();
+#endif
+    }
+    case phi::AllocationType::XPU: {
+#ifdef PADDLE_WITH_XPU
+      phi::XPUPlace p(place.GetDeviceId());
+      return visitor(p);
+#else
+      PADDLE_THROW(phi::errors::Unavailable(
+          ("Paddle is not compiled with XPU. Cannot visit xpu device")));
+      return typename Visitor::result_type();
+#endif
+    }
+    case phi::AllocationType::NPU: {
+#ifdef PADDLE_WITH_ASCEND_CL
+      phi::NPUPlace p(place.GetDeviceId());
+      return visitor(p);
+#else
+      PADDLE_THROW(phi::errors::Unavailable(
+          ("Paddle is not compiled with NPU. Cannot visit npu_pinned")));
+      return typename Visitor::result_type();
+#endif
+    }
+    case phi::AllocationType::NPUPINNED: {
+#ifdef PADDLE_WITH_ASCEND_CL
+      phi::NPUPinnedPlace p;
+      return visitor(p);
+#else
+      PADDLE_THROW(phi::errors::Unavailable(
+          ("Paddle is not compiled with NPU. Cannot visit npu_pinned")));
+      return typename Visitor::result_type();
+#endif
+    }
+    case phi::AllocationType::IPU: {
+#ifdef PADDLE_WITH_IPU
+      phi::IPUPlace p(place.GetDeviceId());
+      return visitor(p);
+#else
+      PADDLE_THROW(phi::errors::Unavailable(
+          ("Paddle is not compiled with IPU. Cannot visit ipu device")));
+      return typename Visitor::result_type();
+#endif
+    }
+    case phi::AllocationType::MLU: {
+#ifdef PADDLE_WITH_MLU
+      phi::MLUPlace p(place.GetDeviceId());
+      return visitor(p);
+#else
+      PADDLE_THROW(phi::errors::Unavailable(
+          ("Paddle is not compiled with MLU. Cannot visit mlu device")));
+#endif
+    }
+    case phi::AllocationType::CUSTOM: {
+#ifdef PADDLE_WITH_CUSTOM_DEVICE
+      phi::CustomPlace p(place.GetDeviceType(), place.GetDeviceId());
+      return visitor(p);
+#else
+      PADDLE_THROW(phi::errors::Unavailable(
+          ("Paddle is not compiled with CUSTOM. Cannot visit custom device")));
+#endif
+    }
+    default: {
+      phi::CPUPlace p;
+      return visitor(p);
+    }
+  }
+}
+
+}  // namespace phi
diff --git a/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc b/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc
index 521d620eb73..75875f81bee 100644
--- a/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/repeat_interleave_grad_kernel.cc
@@ -45,29 +45,25 @@ void RepeatInterleaveWithTensorIndexGradKernel(
                         repeats_tensor.dims()[0],
                         x_grad->dims()[dim]));
 
-  const auto& index_type =
-      paddle::framework::TransToProtoVarType(repeats_tensor.dtype());
+  const auto& index_type = repeats_tensor.dtype();
 
   bool index_type_match =
-      index_type == paddle::framework::proto::VarType::INT32 ||
-      index_type == paddle::framework::proto::VarType::INT64;
+      index_type == phi::DataType::INT32 || index_type == phi::DataType::INT64;
   PADDLE_ENFORCE_EQ(index_type_match,
                     true,
                     phi::errors::InvalidArgument(
                         "Input(Repeats) holds the wrong type, it holds %s, but "
                         "desires to be %s or %s",
-                        paddle::framework::DataTypeToString(index_type),
-                        paddle::framework::DataTypeToString(
-                            paddle::framework::proto::VarType::INT32),
-                        paddle::framework::DataTypeToString(
-                            paddle::framework::proto::VarType::INT64)));
+                        phi::DataTypeToString(index_type),
+                        phi::DataTypeToString(phi::DataType::INT32),
+                        phi::DataTypeToString(phi::DataType::INT64)));
 
   phi::DeviceContextPool::Instance().Get(repeats_tensor.place());
-  if (index_type == paddle::framework::proto::VarType::INT32) {
+  if (index_type == phi::DataType::INT32) {
     phi::funcs::RepeatsTensor2IndexTensor<Context, int>(
         ctx, repeats_tensor, &index);
     IndexSelectGradInner<Context, T, int>(ctx, out_grad, index, x_grad, dim);
-  } else if (index_type == paddle::framework::proto::VarType::INT64) {
+  } else if (index_type == phi::DataType::INT64) {
     phi::funcs::RepeatsTensor2IndexTensor<Context, int64_t>(
         ctx, repeats_tensor, &index);
     IndexSelectGradInner<Context, T, int64_t>(
diff --git a/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc b/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc
index cfdccb5c8d9..175b4a750a8 100644
--- a/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc
+++ b/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc
@@ -45,8 +45,7 @@ struct EmbeddingCPUSparseFunctor {
     int64_t row_width = table_t.value().dims()[1];
     const auto* table = table_t.value().template data<T>();
     auto* output = dev_ctx_.template Alloc<T>(output_t);
-    auto input_data_type =
-        paddle::framework::TransToProtoVarType(table_t.value().dtype());
+    auto input_data_type = table_t.value().dtype();
 
     for (int64_t i = 0; i < ids_numel; ++i) {
       if (padding_idx_ != kNoPadding && ids[i] == padding_idx_) {
@@ -66,7 +65,7 @@ struct EmbeddingCPUSparseFunctor {
             phi::errors::InvalidArgument(
                 "the input key should be exists. But received %d.", id_index));
 
-        if (input_data_type == paddle::framework::proto::VarType::BF16) {
+        if (input_data_type == phi::DataType::BFLOAT16) {
           memcpy(output + i * row_width,
                  table + id_index * row_width,
                  row_width * sizeof(T));
diff --git a/paddle/phi/kernels/cpu/take_along_axis_grad_kernel.cc b/paddle/phi/kernels/cpu/take_along_axis_grad_kernel.cc
index acfc0d4c29d..8a7238203ec 100644
--- a/paddle/phi/kernels/cpu/take_along_axis_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/take_along_axis_grad_kernel.cc
@@ -43,16 +43,15 @@ void TakeAlongAxisGradKernel(const Context& dev_ctx,
   phi::funcs::SetConstant<Context, T> functor;
   functor(dev_ctx, x_grad, static_cast<T>(0));
 
-  const auto& index_type =
-      paddle::framework::TransToProtoVarType(index.dtype());
-  if (index_type == paddle::framework::proto::VarType::INT32) {
+  const auto& index_type = index.dtype();
+  if (index_type == phi::DataType::INT32) {
     phi::funcs::cpu_scatter_add_kernel<T, int32_t>(
         *x_grad,
         axis,
         index,
         out_grad,
         dev_ctx);  // the gradient of gather is scatter
-  } else if (index_type == paddle::framework::proto::VarType::INT64) {
+  } else if (index_type == phi::DataType::INT64) {
     phi::funcs::cpu_scatter_add_kernel<T, int64_t>(
         *x_grad, axis, index, out_grad, dev_ctx);
   }
diff --git a/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc b/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc
index 560578ed228..f2e05749914 100644
--- a/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc
+++ b/paddle/phi/kernels/cpu/unique_consecutive_kernel.cc
@@ -12,8 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/phi/kernels/unique_consecutive_kernel.h"
+#include <climits>
+
 #include "paddle/phi/kernels/cpu/unique_consecutive_functor.h"
+#include "paddle/phi/kernels/unique_consecutive_kernel.h"
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
 #include "paddle/phi/core/errors.h"
diff --git a/paddle/phi/kernels/cpu/unique_kernel.cc b/paddle/phi/kernels/cpu/unique_kernel.cc
index 15c19b24444..3b742fbd1df 100644
--- a/paddle/phi/kernels/cpu/unique_kernel.cc
+++ b/paddle/phi/kernels/cpu/unique_kernel.cc
@@ -12,6 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <climits>
+
 #include "paddle/phi/kernels/unique_kernel.h"
 
 #include "paddle/phi/backends/cpu/cpu_context.h"
diff --git a/paddle/phi/kernels/funcs/math_function.cc b/paddle/phi/kernels/funcs/math_function.cc
index 8e051623c4d..e8bd17efc7d 100644
--- a/paddle/phi/kernels/funcs/math_function.cc
+++ b/paddle/phi/kernels/funcs/math_function.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/phi/kernels/funcs/math_function.h"
+#include "paddle/phi/core/utils/visit_place.h"
 
 #ifdef PADDLE_WITH_MKLML
 #include "paddle/phi/backends/dynload/mklml.h"
@@ -236,7 +237,7 @@ void set_constant(const phi::DeviceContext& context,
 #endif
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   // tensor->place().apply_visitor(func);
-  paddle::platform::VisitPlace(tensor->place(), func);
+  phi::VisitPlace(tensor->place(), func);
 #elif defined(PADDLE_WITH_XPU)
   func(phi::XPUPlace());
 #else
diff --git a/paddle/phi/kernels/funcs/math_function.h b/paddle/phi/kernels/funcs/math_function.h
index 7e69402c350..d2de413dad5 100644
--- a/paddle/phi/kernels/funcs/math_function.h
+++ b/paddle/phi/kernels/funcs/math_function.h
@@ -17,12 +17,10 @@ limitations under the License. */
 #include <memory>
 #include <vector>
 
-#include "paddle/fluid/framework/operator.h"
 #include "paddle/phi/backends/all_context.h"
+#include "paddle/phi/common/memory_utils.h"
 #include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/enforce.h"
 #include "paddle/phi/core/utils/data_type.h"
-#include "paddle/phi/kernels/funcs/eigen/common.h"
 
 namespace phi {
 namespace funcs {
diff --git a/paddle/phi/kernels/funcs/math_function_impl.h b/paddle/phi/kernels/funcs/math_function_impl.h
index 4e540a19d6c..ed8e0669ab7 100644
--- a/paddle/phi/kernels/funcs/math_function_impl.h
+++ b/paddle/phi/kernels/funcs/math_function_impl.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/phi/common/data_type.h"
+#include "paddle/phi/kernels/funcs/eigen/common.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace phi {
diff --git a/paddle/phi/kernels/funcs/segment_pooling.cu b/paddle/phi/kernels/funcs/segment_pooling.cu
index f776a5d1905..2624b5850e1 100644
--- a/paddle/phi/kernels/funcs/segment_pooling.cu
+++ b/paddle/phi/kernels/funcs/segment_pooling.cu
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_launch_config.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
+#include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/gather.cu.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 #include "paddle/phi/kernels/funcs/segment_pooling.h"
diff --git a/paddle/phi/kernels/funcs/unique_functor.h b/paddle/phi/kernels/funcs/unique_functor.h
index 510236e278d..913ee1afb9f 100644
--- a/paddle/phi/kernels/funcs/unique_functor.h
+++ b/paddle/phi/kernels/funcs/unique_functor.h
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 #pragma once
+#include <set>
+
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/utils/data_type.h"
 #include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
diff --git a/paddle/phi/kernels/gpu/bincount_kernel.cu b/paddle/phi/kernels/gpu/bincount_kernel.cu
index 1308d435bba..b1000dac6f7 100644
--- a/paddle/phi/kernels/gpu/bincount_kernel.cu
+++ b/paddle/phi/kernels/gpu/bincount_kernel.cu
@@ -109,9 +109,6 @@ void BincountCUDAInner(const Context& dev_ctx,
         <<<GET_BLOCKS(input_numel), PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
             input_data, input_numel, has_weights, weights_data, output_data);
   } else {
-    const auto& weights_type =
-        paddle::framework::TransToProtoVarType(weights->dtype());
-
     if (weights->dtype() == DataType::FLOAT32) {
       float* output_data = dev_ctx.template Alloc<float>(output);
       phi::funcs::SetConstant<Context, float>()(
diff --git a/paddle/phi/kernels/gpu/class_center_sample_kernel.cu b/paddle/phi/kernels/gpu/class_center_sample_kernel.cu
index da5624e2d9d..f63baadbde5 100644
--- a/paddle/phi/kernels/gpu/class_center_sample_kernel.cu
+++ b/paddle/phi/kernels/gpu/class_center_sample_kernel.cu
@@ -375,9 +375,7 @@ void ClassCenterSampleKernel(const Context& dev_ctx,
           num_classes_per_device_ptr,
           num_classes_per_device_ptr,
           num_classes_per_device.numel(),
-          paddle::platform::ToNCCLDataType(
-              paddle::framework::TransToProtoVarType(
-                  num_classes_per_device.dtype())),
+          phi::ToNCCLDataType(num_classes_per_device.dtype()),
           ncclSum,
           comm->comm(),
           calcu_stream));
diff --git a/paddle/phi/kernels/gpu/edit_distance_kernel.cu b/paddle/phi/kernels/gpu/edit_distance_kernel.cu
index d4d8433fdc0..cb5b096ba3f 100644
--- a/paddle/phi/kernels/gpu/edit_distance_kernel.cu
+++ b/paddle/phi/kernels/gpu/edit_distance_kernel.cu
@@ -21,6 +21,7 @@
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
 #include "paddle/phi/common/memory_utils.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/core/mixed_vector.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace phi {
diff --git a/paddle/phi/kernels/gpu/margin_cross_entropy_grad_kernel.cu b/paddle/phi/kernels/gpu/margin_cross_entropy_grad_kernel.cu
index 87faf0aad58..51f5e28b032 100644
--- a/paddle/phi/kernels/gpu/margin_cross_entropy_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/margin_cross_entropy_grad_kernel.cu
@@ -96,8 +96,7 @@ void GetClassInterval(const gpuStream_t& stream,
         num_classes_per_device_ptr,
         num_classes_per_device_ptr,
         num_classes_per_device.numel(),
-        paddle::platform::ToNCCLDataType(paddle::framework::TransToProtoVarType(
-            num_classes_per_device.dtype())),
+        phi::ToNCCLDataType(num_classes_per_device.dtype()),
         ncclSum,
         comm->comm(),
         calcu_stream));
@@ -188,8 +187,7 @@ void MarginCrossEntropyGradKernel(const Context& dev_ctx,
 
   int blocks = NumBlocks(N * D);
   int threads = kNumCUDAThreads;
-  const auto& label_type =
-      paddle::framework::TransToProtoVarType(label.dtype());
+  const auto& label_type = label.dtype();
 
   DenseTensor class_interval;
   GetClassInterval<T, Context>(dev_ctx.stream(),
@@ -201,7 +199,7 @@ void MarginCrossEntropyGradKernel(const Context& dev_ctx,
                                D,
                                &class_interval);
 
-  if (label_type == paddle::framework::proto::VarType::INT32) {
+  if (label_type == phi::DataType::INT32) {
     typedef int32_t LabelT;
     CalculateGrad<T, LabelT>
         <<<blocks, threads, 0, dev_ctx.stream()>>>(logits_grad->data<T>(),
@@ -215,7 +213,7 @@ void MarginCrossEntropyGradKernel(const Context& dev_ctx,
                                                    N,
                                                    D,
                                                    class_interval.data<int>());
-  } else if (label_type == paddle::framework::proto::VarType::INT64) {
+  } else if (label_type == phi::DataType::INT64) {
     typedef int64_t LabelT;
     CalculateGrad<T, LabelT>
         <<<blocks, threads, 0, dev_ctx.stream()>>>(logits_grad->data<T>(),
diff --git a/paddle/phi/kernels/gpu/margin_cross_entropy_kernel.cu b/paddle/phi/kernels/gpu/margin_cross_entropy_kernel.cu
index 0bc442058ac..5cbb21c45b7 100644
--- a/paddle/phi/kernels/gpu/margin_cross_entropy_kernel.cu
+++ b/paddle/phi/kernels/gpu/margin_cross_entropy_kernel.cu
@@ -92,8 +92,7 @@ void GetClassInterval(const gpuStream_t& stream,
         num_classes_per_device_ptr,
         num_classes_per_device_ptr,
         num_classes_per_device.numel(),
-        paddle::platform::ToNCCLDataType(paddle::framework::TransToProtoVarType(
-            num_classes_per_device.dtype())),
+        phi::ToNCCLDataType(num_classes_per_device.dtype()),
         ncclSum,
         comm->comm(),
         calcu_stream));
@@ -265,8 +264,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
 
   int blocks = NumBlocks(N);
   int threads = kNumCUDAThreads;
-  const auto& label_type =
-      paddle::framework::TransToProtoVarType(labels.dtype());
+  const auto& label_type = labels.dtype();
 
   // copy logits to softmax variable since we can't modify logits,
   // and it also be used when calculate grad
@@ -291,7 +289,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
   // theta = acos(x_i)
   // (cos(m1 * theta + m2) - m3)
   // save match_logits, used for gradient computation.
-  if (label_type == paddle::framework::proto::VarType::INT32) {
+  if (label_type == phi::DataType::INT32) {
     typedef int32_t LabelT;
     AddMarginToPositiveLogitsKernel<T>
         <<<NumBlocks(N), threads, 0, dev_ctx.stream()>>>(
@@ -305,7 +303,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
             N,
             D,
             class_interval.data<int>());
-  } else if (label_type == paddle::framework::proto::VarType::INT64) {
+  } else if (label_type == phi::DataType::INT64) {
     typedef int64_t LabelT;
     AddMarginToPositiveLogitsKernel<T>
         <<<NumBlocks(N), threads, 0, dev_ctx.stream()>>>(
@@ -357,15 +355,14 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
       auto task = pg->AllReduce(in_tensor, out_tensor, opts);
       task->Wait();
     } else {
-      PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclAllReduce(
-          logits_max_buff,
-          logits_max_buff,
-          logits_max.numel(),
-          paddle::platform::ToNCCLDataType(
-              paddle::framework::TransToProtoVarType(logits_max.dtype())),
-          ncclMax,
-          comm->comm(),
-          stream));
+      PADDLE_ENFORCE_GPU_SUCCESS(
+          phi::dynload::ncclAllReduce(logits_max_buff,
+                                      logits_max_buff,
+                                      logits_max.numel(),
+                                      phi::ToNCCLDataType(logits_max.dtype()),
+                                      ncclMax,
+                                      comm->comm(),
+                                      stream));
     }
   }
 #endif
@@ -403,8 +400,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
           sum_exp_logits_buff,
           sum_exp_logits_buff,
           sum_exp_logits.numel(),
-          paddle::platform::ToNCCLDataType(
-              paddle::framework::TransToProtoVarType(sum_exp_logits.dtype())),
+          phi::ToNCCLDataType(sum_exp_logits.dtype()),
           ncclSum,
           comm->comm(),
           stream));
@@ -423,7 +419,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
 
   phi::funcs::SetConstant<Context, T> functor;
   functor(dev_ctx, loss, static_cast<T>(0.0));
-  if (label_type == paddle::framework::proto::VarType::INT32) {
+  if (label_type == phi::DataType::INT32) {
     typedef int32_t LabelT;
     HardLabelSoftmaxWithCrossEntropyKernel<T, LabelT>
         <<<blocks, threads, 0, dev_ctx.stream()>>>(loss_ptr,
@@ -433,7 +429,7 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
                                                    N,
                                                    D,
                                                    class_interval.data<int>());
-  } else if (label_type == paddle::framework::proto::VarType::INT64) {
+  } else if (label_type == phi::DataType::INT64) {
     typedef int64_t LabelT;
     HardLabelSoftmaxWithCrossEntropyKernel<T, LabelT>
         <<<blocks, threads, 0, dev_ctx.stream()>>>(loss_ptr,
@@ -458,15 +454,14 @@ void MarginCrossEntropyKernel(const Context& dev_ctx,
       auto task = pg->AllReduce(in_tensor, out_tensor, opts);
       task->Wait();
     } else {
-      PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::ncclAllReduce(
-          loss_ptr,
-          loss_ptr,
-          loss->numel(),
-          paddle::platform::ToNCCLDataType(
-              paddle::framework::TransToProtoVarType(loss->dtype())),
-          ncclSum,
-          comm->comm(),
-          stream));
+      PADDLE_ENFORCE_GPU_SUCCESS(
+          phi::dynload::ncclAllReduce(loss_ptr,
+                                      loss_ptr,
+                                      loss->numel(),
+                                      phi::ToNCCLDataType(loss->dtype()),
+                                      ncclSum,
+                                      comm->comm(),
+                                      stream));
     }
   }
 #endif
diff --git a/paddle/phi/kernels/gpu/sync_batch_norm_kernel.cu b/paddle/phi/kernels/gpu/sync_batch_norm_kernel.cu
index 448004fc4b8..19b9f5845bf 100644
--- a/paddle/phi/kernels/gpu/sync_batch_norm_kernel.cu
+++ b/paddle/phi/kernels/gpu/sync_batch_norm_kernel.cu
@@ -108,8 +108,7 @@ void SyncBatchNormKernel(const Context &ctx,
     }
 
     if (comm) {
-      int dtype = paddle::platform::ToNCCLDataType(
-          paddle::framework::TransToProtoVarType(mean_out->dtype()));
+      int dtype = phi::ToNCCLDataType(mean_out->dtype());
       // In-place operation
       PADDLE_ENFORCE_GPU_SUCCESS(
           phi::dynload::ncclAllReduce(stats,
diff --git a/paddle/phi/kernels/impl/repeat_interleave_kernel_impl.h b/paddle/phi/kernels/impl/repeat_interleave_kernel_impl.h
index d8a65afaf2c..ff413c7b61a 100644
--- a/paddle/phi/kernels/impl/repeat_interleave_kernel_impl.h
+++ b/paddle/phi/kernels/impl/repeat_interleave_kernel_impl.h
@@ -131,32 +131,28 @@ void RepeatInterleaveWithTensorIndexKernel(const Context& ctx,
                         "But received: [%s], required: [%d].",
                         repeats_tensor.dims()[0],
                         x.dims()[dim]));
-  const auto& index_type =
-      paddle::framework::TransToProtoVarType(repeats_tensor.dtype());
+  const auto& index_type = repeats_tensor.dtype();
   bool index_type_match =
-      index_type == paddle::framework::proto::VarType::INT32 ||
-      index_type == paddle::framework::proto::VarType::INT64;
+      index_type == phi::DataType::INT32 || index_type == phi::DataType::INT64;
   PADDLE_ENFORCE_EQ(
       index_type_match,
       true,
       phi::errors::InvalidArgument(
           "Input(RepeatsTensor) holds the wrong type, it holds %s, but "
           "desires to be %s or %s",
-          paddle::framework::DataTypeToString(index_type),
-          paddle::framework::DataTypeToString(
-              paddle::framework::proto::VarType::INT32),
-          paddle::framework::DataTypeToString(
-              paddle::framework::proto::VarType::INT64)));
+          phi::DataTypeToString(index_type),
+          phi::DataTypeToString(phi::DataType::INT32),
+          phi::DataTypeToString(phi::DataType::INT64)));
   if (place == cpu_place) {
     auto x_copy = x;
-    if (index_type == paddle::framework::proto::VarType::INT32) {
+    if (index_type == phi::DataType::INT32) {
       phi::funcs::RepeatsTensor2IndexTensor<Context, int>(
           ctx, repeats_tensor, &index);
       auto output_dim = phi::vectorize(x.dims());
       output_dim[dim] = index.dims()[0];
       out->Resize(phi::make_ddim(output_dim));
       IndexSelectInner<Context, T, int>(ctx, &x_copy, index, out, dim);
-    } else if (index_type == paddle::framework::proto::VarType::INT64) {
+    } else if (index_type == phi::DataType::INT64) {
       phi::funcs::RepeatsTensor2IndexTensor<Context, int64_t>(
           ctx, repeats_tensor, &index);
       auto output_dim = phi::vectorize(x.dims());
@@ -170,7 +166,7 @@ void RepeatInterleaveWithTensorIndexKernel(const Context& ctx,
     int64_t stride = stride_dim[dim];
     auto stream = ctx.stream();
     auto* in_data = x.data<T>();
-    if (index_type == paddle::framework::proto::VarType::INT64) {
+    if (index_type == phi::DataType::INT64) {
       phi::funcs::RepeatsTensor2IndexTensor<Context, int64_t>(
           ctx, repeats_tensor, &index);
 
diff --git a/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h b/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h
index de930734be6..02e5323c5b6 100644
--- a/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/set_value_grad_kernel_impl.h
@@ -188,21 +188,21 @@ void SetValueGradImpl(const Context& dev_ctx,
                    (value_grad_dims_size + decrease_axis_size - num_decrease));
           fake_value_grad_dims[i] = value_grad_dims[index_grad];
 
-          PADDLE_ENFORCE_EQ((out_dims[i] == value_grad_dims[index_grad]) ||
-                                (value_grad_dims[index_grad] == 1),
-                            true,
-                            errors::InvalidArgument(
-                                "An error occurred while calculating %s: "
-                                "[%s] can not be accumulated into [%s].",
-                                paddle::framework::GradVarName("ValueTensor"),
-                                out_dims,
-                                value_grad_dims));
+          PADDLE_ENFORCE_EQ(
+              (out_dims[i] == value_grad_dims[index_grad]) ||
+                  (value_grad_dims[index_grad] == 1),
+              true,
+              errors::InvalidArgument("An error occurred while calculating %s: "
+                                      "[%s] can not be accumulated into [%s].",
+                                      "ValueTensor@GRAD",
+                                      out_dims,
+                                      value_grad_dims));
         }
       }
 
       VLOG(3) << "Dimensions of "
-              << paddle::framework::GradVarName("ValueTensor") << "(["
-              << value_grad_dims << "])is broadcasted into ["
+              << "ValueTensor@GRAD"
+              << "([" << value_grad_dims << "])is broadcasted into ["
               << fake_value_grad_dims << "].";
 
       auto extent = Eigen::DSizes<Eigen::DenseIndex, RANK>();
diff --git a/paddle/phi/kernels/xpu/distribute_fpn_proposals_kernel.cc b/paddle/phi/kernels/xpu/distribute_fpn_proposals_kernel.cc
index 82efcd2959c..e23b1052d18 100644
--- a/paddle/phi/kernels/xpu/distribute_fpn_proposals_kernel.cc
+++ b/paddle/phi/kernels/xpu/distribute_fpn_proposals_kernel.cc
@@ -32,11 +32,11 @@ static void Sort(const XPUContext& dev_ctx,
   scores_slice_cpu.Resize({value.numel()});
   T* scores_slice_cpu_data = dev_ctx.template HostAlloc<T>(&scores_slice_cpu);
 
-  paddle::memory::Copy(cpu_place,
-                       scores_slice_cpu_data,
-                       place,
-                       value_data,
-                       sizeof(T) * value.numel());
+  memory_utils::Copy(cpu_place,
+                     scores_slice_cpu_data,
+                     place,
+                     value_data,
+                     sizeof(T) * value.numel());
   // Sort index
   DenseTensor index_t;
   index_t.Resize({value.numel()});
@@ -52,7 +52,7 @@ static void Sort(const XPUContext& dev_ctx,
   std::sort(index, index + value.numel(), compare);
   index_out->Resize({index_t.numel()});
   int* idx_out = dev_ctx.template Alloc<int>(index_out);
-  paddle::memory::Copy(
+  memory_utils::Copy(
       place, idx_out, cpu_place, index, sizeof(T) * index_t.numel());
 }
 
diff --git a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc
index 26ba5e93087..affc6b0fe94 100644
--- a/paddle/phi/kernels/xpu/set_value_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/set_value_grad_kernel.cc
@@ -222,21 +222,21 @@ void SetValueGradImpl(const Context& dev_ctx,
                    (value_grad_dims_size + decrease_axis_size - num_decrease));
           fake_value_grad_dims[i] = value_grad_dims[index_grad];
 
-          PADDLE_ENFORCE_EQ((out_dims[i] == value_grad_dims[index_grad]) ||
-                                (value_grad_dims[index_grad] == 1),
-                            true,
-                            errors::InvalidArgument(
-                                "An error occurred while calculating %s: "
-                                "[%s] can not be accumulated into [%s].",
-                                paddle::framework::GradVarName("ValueTensor"),
-                                out_dims,
-                                value_grad_dims));
+          PADDLE_ENFORCE_EQ(
+              (out_dims[i] == value_grad_dims[index_grad]) ||
+                  (value_grad_dims[index_grad] == 1),
+              true,
+              errors::InvalidArgument("An error occurred while calculating %s: "
+                                      "[%s] can not be accumulated into [%s].",
+                                      "ValueTensor@GRAD",
+                                      out_dims,
+                                      value_grad_dims));
         }
       }
 
       VLOG(3) << "Dimensions of "
-              << paddle::framework::GradVarName("ValueTensor") << "(["
-              << value_grad_dims << "])is broadcasted into ["
+              << "ValueTensor@GRAD"
+              << "([" << value_grad_dims << "])is broadcasted into ["
               << fake_value_grad_dims << "].";
 
       std::vector<int64_t> slice_end(RANK, 0);
-- 
GitLab