From d09962a1403f6ed832b0c6a7b378b22b5f2bab52 Mon Sep 17 00:00:00 2001
From: engineer1109 <jialiang.wang@xdxct.com>
Date: Mon, 6 Feb 2023 10:50:23 +0800
Subject: [PATCH] phi move ReshapeToMatrix & GetValue (#50139)

---
 paddle/fluid/framework/tensor_test.cc         | 12 ++--
 paddle/fluid/framework/tensor_util.h          | 21 -------
 paddle/fluid/operators/bpr_loss_op.h          |  7 ++-
 paddle/fluid/operators/cross_entropy_op.h     |  7 ++-
 .../operators/fused/multihead_matmul_op.cu    |  5 +-
 paddle/fluid/operators/isfinite_op.h          |  6 +-
 paddle/phi/core/tensor_utils.cc               | 56 +++++++++++++++++++
 paddle/phi/core/tensor_utils.h                | 18 ++++++
 .../fusion/onednn/fused_matmul_kernel.cc      |  3 +-
 paddle/phi/kernels/gpu/arange_kernel.cu       | 13 -----
 paddle/phi/kernels/gpu/linspace_kernel.cu     | 13 -----
 .../kernels/impl/matmul_grad_kernel_impl.h    | 56 ++++++++-----------
 paddle/phi/kernels/impl/matmul_kernel_impl.h  |  8 +--
 .../phi/kernels/onednn/matmul_grad_kernel.cc  |  6 +-
 paddle/phi/kernels/onednn/matmul_kernel.cc    |  2 +-
 paddle/phi/kernels/xpu/arange_kernel.cc       | 21 +------
 paddle/phi/kernels/xpu/matmul_grad_kernel.cc  | 11 ++--
 paddle/phi/kernels/xpu/matmul_kernel.cc       |  9 +--
 18 files changed, 131 insertions(+), 143 deletions(-)
diff --git a/paddle/fluid/framework/tensor_test.cc b/paddle/fluid/framework/tensor_test.cc
index 852b6a3cc1c..42690c071bc 100644
--- a/paddle/fluid/framework/tensor_test.cc
+++ b/paddle/fluid/framework/tensor_test.cc
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/phi/core/tensor_utils.h"
 
 #include <gtest/gtest.h>
 
@@ -25,7 +25,7 @@ namespace platform = paddle::platform;
 TEST(DenseTensor, Dims) {
   phi::DenseTensor tt;
   tt.Resize({2, 3, 4});
-  framework::DDim dims = tt.dims();
+  phi::DDim dims = tt.dims();
   ASSERT_EQ(arity(dims), 3);
   for (int i = 0; i < 3; ++i) {
     EXPECT_EQ(i + 2, dims[i]);
@@ -225,7 +225,7 @@ TEST(DenseTensor, Slice) {
     src_tensor.mutable_data<int>(phi::make_ddim({5, 3, 4}),
                                  platform::CPUPlace());
     phi::DenseTensor slice_tensor = src_tensor.Slice(1, 3);
-    framework::DDim slice_dims = slice_tensor.dims();
+    phi::DDim slice_dims = slice_tensor.dims();
     ASSERT_EQ(arity(slice_dims), 3);
     EXPECT_EQ(slice_dims[0], 2);
     EXPECT_EQ(slice_dims[1], 3);
@@ -251,7 +251,7 @@ TEST(DenseTensor, Slice) {
     src_tensor.mutable_data<double>(phi::make_ddim({6, 9}),
                                     platform::CUDAPlace(0));
     phi::DenseTensor slice_tensor = src_tensor.Slice(2, 6);
-    framework::DDim slice_dims = slice_tensor.dims();
+    phi::DDim slice_dims = slice_tensor.dims();
     ASSERT_EQ(arity(slice_dims), 2);
     EXPECT_EQ(slice_dims[0], 4);
     EXPECT_EQ(slice_dims[1], 9);
@@ -278,7 +278,7 @@ TEST(DenseTensor, Slice) {
     src_tensor.mutable_data<double>(phi::make_ddim({6, 9}),
                                     platform::NPUPlace(0));
     phi::DenseTensor slice_tensor = src_tensor.Slice(2, 6);
-    framework::DDim slice_dims = slice_tensor.dims();
+    phi::DDim slice_dims = slice_tensor.dims();
     ASSERT_EQ(arity(slice_dims), 2);
     EXPECT_EQ(slice_dims[0], 4);
     EXPECT_EQ(slice_dims[1], 9);
@@ -306,7 +306,7 @@ TEST(DenseTensor, ReshapeToMatrix) {
   for (int i = 0; i < 2 * 3 * 4 * 9; ++i) {
     src_ptr[i] = i;
   }
-  phi::DenseTensor res = framework::ReshapeToMatrix(src, 2);
+  phi::DenseTensor res = phi::ReshapeToMatrix(src, 2);
   ASSERT_EQ(res.dims()[0], 2 * 3);
   ASSERT_EQ(res.dims()[1], 4 * 9);
 }
diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h
index d61c062ac86..35a612678cb 100644
--- a/paddle/fluid/framework/tensor_util.h
+++ b/paddle/fluid/framework/tensor_util.h
@@ -560,27 +560,6 @@ inline void TensorToVector(const phi::DenseTensor& src,
 
 std::ostream& operator<<(std::ostream& os, const LoD& lod);
 
-inline phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src,
-                                        int num_col_dims) {
-  int rank = src.dims().size();
-  PADDLE_ENFORCE_GE(
-      rank,
-      2,
-      platform::errors::InvalidArgument(
-          "'ReshapeToMatrix()' is only used for flatten high rank "
-          "tensors to matrixs. The dimensions of phi::DenseTensor must be "
-          "greater or equal than 2. "
-          "But received dimensions of phi::DenseTensor is %d",
-          rank));
-  if (rank == 2) {
-    return src;
-  }
-  phi::DenseTensor res;
-  res.ShareDataWith(src);
-  res.Resize(phi::flatten_to_2d(src.dims(), num_col_dims));
-  return res;
-}
-
 template <typename T>
 inline T GetValue(const phi::DenseTensor* x) {
   T value = static_cast<T>(0);
diff --git a/paddle/fluid/operators/bpr_loss_op.h b/paddle/fluid/operators/bpr_loss_op.h
index 2e1d62dddd2..679f2687849 100644
--- a/paddle/fluid/operators/bpr_loss_op.h
+++ b/paddle/fluid/operators/bpr_loss_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace paddle {
@@ -44,9 +45,9 @@ class BprLossOpKernel : public framework::OpKernel<T> {
     y->mutable_data<T>(ctx.GetPlace());
     int rank = x->dims().size();
 
-    phi::DenseTensor x_2d = framework::ReshapeToMatrix(*x, rank - 1);
-    phi::DenseTensor labels_2d = framework::ReshapeToMatrix(*label, rank - 1);
-    phi::DenseTensor y_2d = framework::ReshapeToMatrix(*y, rank - 1);
+    phi::DenseTensor x_2d = phi::ReshapeToMatrix(*x, rank - 1);
+    phi::DenseTensor labels_2d = phi::ReshapeToMatrix(*label, rank - 1);
+    phi::DenseTensor y_2d = phi::ReshapeToMatrix(*y, rank - 1);
 
     const phi::DenseTensor* logits = &x_2d;
     const phi::DenseTensor* labels = &labels_2d;
diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h
index c581d33091c..83aaf70d1c4 100644
--- a/paddle/fluid/operators/cross_entropy_op.h
+++ b/paddle/fluid/operators/cross_entropy_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/cross_entropy.h"
 #include "paddle/phi/kernels/funcs/math.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
@@ -34,7 +35,7 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {
 
     int rank = x->dims().size();
     auto label_dims = labels->dims();
-    phi::DenseTensor x_2d = framework::ReshapeToMatrix(*x, rank - 1);
+    phi::DenseTensor x_2d = phi::ReshapeToMatrix(*x, rank - 1);
     phi::DenseTensor labels_2d, y_2d;
     if (label_dims.size() < rank) {
       labels_2d.ShareDataWith(*labels);
@@ -44,8 +45,8 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {
       y_2d.Resize({phi::product(y->dims()), 1});
 
     } else {
-      labels_2d = framework::ReshapeToMatrix(*labels, rank - 1);
-      y_2d = framework::ReshapeToMatrix(*y, rank - 1);
+      labels_2d = phi::ReshapeToMatrix(*labels, rank - 1);
+      y_2d = phi::ReshapeToMatrix(*y, rank - 1);
     }
 
     int axis_dim = x->dims()[rank - 1];
diff --git a/paddle/fluid/operators/fused/multihead_matmul_op.cu b/paddle/fluid/operators/fused/multihead_matmul_op.cu
index ba2b71ff6ff..0b9f23657a2 100644
--- a/paddle/fluid/operators/fused/multihead_matmul_op.cu
+++ b/paddle/fluid/operators/fused/multihead_matmul_op.cu
@@ -21,6 +21,7 @@
 #include "paddle/fluid/memory/malloc.h"
 #include "paddle/fluid/operators/math/bert_encoder_functor.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
 
 namespace paddle {
@@ -343,10 +344,10 @@ class MultiHeadMatMulV2Kernel : public framework::OpKernel<T> {
 
     // (B*S, hidden)
     const phi::DenseTensor input_matrix =
-        framework::ReshapeToMatrix(*input, 2 /*x_num_col_dims */);
+        phi::ReshapeToMatrix(*input, 2 /*x_num_col_dims */);
     // (hidden, 3 * all_head_size)
     const phi::DenseTensor w_matrix =
-        framework::ReshapeToMatrix(*w, 1 /*y_num_col_dims*/);
+        phi::ReshapeToMatrix(*w, 1 /*y_num_col_dims*/);
 
     phi::DenseTensor temp_out_tensor;
     auto temp_out_dims =
diff --git a/paddle/fluid/operators/isfinite_op.h b/paddle/fluid/operators/isfinite_op.h
index cd802567889..431d446daa7 100644
--- a/paddle/fluid/operators/isfinite_op.h
+++ b/paddle/fluid/operators/isfinite_op.h
@@ -129,17 +129,17 @@ inline void TensorIsfinite(const phi::DenseTensor& tensor,
 inline bool TensorContainsNAN(const phi::DenseTensor& tensor) {
   phi::DenseTensor out;
   TensorContainsNAN(tensor, &out);
-  return GetValue<bool>(&out);
+  return paddle::framework::GetValue<bool>(&out);
 }
 inline bool TensorContainsInf(const phi::DenseTensor& tensor) {
   phi::DenseTensor out;
   TensorContainsInf(tensor, &out);
-  return GetValue<bool>(&out);
+  return paddle::framework::GetValue<bool>(&out);
 }
 inline bool TensorIsfinite(const phi::DenseTensor& tensor) {
   phi::DenseTensor out;
   TensorIsfinite(tensor, &out);
-  return GetValue<bool>(&out);
+  return paddle::framework::GetValue<bool>(&out);
 }
 }  // namespace framework
 namespace operators {
diff --git a/paddle/phi/core/tensor_utils.cc b/paddle/phi/core/tensor_utils.cc
index e9ed973e0dd..379558b0b5d 100644
--- a/paddle/phi/core/tensor_utils.cc
+++ b/paddle/phi/core/tensor_utils.cc
@@ -867,4 +867,60 @@ template void TensorToVector(const phi::DenseTensor& src,
 template void TensorToVector(const phi::DenseTensor& src,
                              std::vector<phi::dtype::complex<double>>* dst);
 
+phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src,
+                                 int num_col_dims) {
+  int rank = src.dims().size();
+  PADDLE_ENFORCE_GE(
+      rank,
+      2,
+      phi::errors::InvalidArgument(
+          "'ReshapeToMatrix()' is only used for flatten high rank "
+          "tensors to matrixs. The dimensions of phi::DenseTensor must be "
+          "greater or equal than 2. "
+          "But received dimensions of phi::DenseTensor is %d",
+          rank));
+  if (rank == 2) {
+    return src;
+  }
+  phi::DenseTensor res;
+  res.ShareDataWith(src);
+  res.Resize(phi::flatten_to_2d(src.dims(), num_col_dims));
+  return res;
+}
+
+template <typename T>
+T GetValue(const phi::DenseTensor* x) {
+  T value = static_cast<T>(0);
+  if (!paddle::platform::is_cpu_place(x->place())) {
+    phi::DenseTensor cpu_x{};
+    phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
+    phi::DeviceContext* dev_ctx = pool.Get(x->place());
+    phi::Copy(*dev_ctx, *x, phi::CPUPlace(), true, &cpu_x);
+    value = cpu_x.data<T>()[0];
+  } else {
+    value = x->data<T>()[0];
+  }
+  return value;
+}
+
+template bool GetValue(const phi::DenseTensor* x);
+
+template int16_t GetValue(const phi::DenseTensor* x);
+
+template int GetValue(const phi::DenseTensor* x);
+
+template int64_t GetValue(const phi::DenseTensor* x);
+
+template float GetValue(const phi::DenseTensor* x);
+
+template double GetValue(const phi::DenseTensor* x);
+
+template phi::dtype::bfloat16 GetValue(const phi::DenseTensor* x);
+
+template phi::dtype::float16 GetValue(const phi::DenseTensor* x);
+
+template phi::dtype::complex<float> GetValue(const phi::DenseTensor* x);
+
+template phi::dtype::complex<double> GetValue(const phi::DenseTensor* x);
+
 }  // namespace phi
diff --git a/paddle/phi/core/tensor_utils.h b/paddle/phi/core/tensor_utils.h
index fe0393c7919..df6db077f03 100644
--- a/paddle/phi/core/tensor_utils.h
+++ b/paddle/phi/core/tensor_utils.h
@@ -126,4 +126,22 @@ void TensorToVector(const phi::DenseTensor& src,
                     const phi::DeviceContext& ctx,
                     std::vector<T>* dst);
 
+phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src, int num_col_dims);
+
+template <typename T>
+T GetValue(const phi::DenseTensor* x);
+
+template <typename T, typename Context>
+inline T GetValue(const Context& dev_ctx, const DenseTensor& x) {
+  T value = static_cast<T>(0);
+  if (x.place() != CPUPlace()) {
+    DenseTensor cpu_x;
+    Copy(dev_ctx, x, CPUPlace(), true, &cpu_x);
+    value = cpu_x.data<T>()[0];
+  } else {
+    value = x.data<T>()[0];
+  }
+  return value;
+}
+
 }  // namespace phi
diff --git a/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc b/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc
index f54db963b09..f4f4b259451 100644
--- a/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc
+++ b/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc
@@ -16,13 +16,14 @@
 
 #include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/core/tensor_utils.h"
 
 using dnnl::engine;
 using dnnl::inner_product_forward;
 using dnnl::memory;
 using dnnl::prop_kind;
 using dnnl::stream;
-using paddle::framework::ReshapeToMatrix;
+using phi::ReshapeToMatrix;
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/gpu/arange_kernel.cu b/paddle/phi/kernels/gpu/arange_kernel.cu
index 4cd8eb3ceca..4fafda857dc 100644
--- a/paddle/phi/kernels/gpu/arange_kernel.cu
+++ b/paddle/phi/kernels/gpu/arange_kernel.cu
@@ -23,19 +23,6 @@
 
 namespace phi {
 
-template <typename T, typename Context>
-inline T GetValue(const Context& dev_ctx, const DenseTensor& x) {
-  T value = static_cast<T>(0);
-  if (x.place() != CPUPlace()) {
-    DenseTensor cpu_x;
-    Copy(dev_ctx, x, CPUPlace(), true, &cpu_x);
-    value = cpu_x.data<T>()[0];
-  } else {
-    value = x.data<T>()[0];
-  }
-  return value;
-}
-
 template <typename T>
 __global__ void Range(T start, T step, int64_t size, T* out) {
   CUDA_KERNEL_LOOP(index, size) { out[index] = start + step * index; }
diff --git a/paddle/phi/kernels/gpu/linspace_kernel.cu b/paddle/phi/kernels/gpu/linspace_kernel.cu
index eaf2955010c..57a16b0cea7 100644
--- a/paddle/phi/kernels/gpu/linspace_kernel.cu
+++ b/paddle/phi/kernels/gpu/linspace_kernel.cu
@@ -41,19 +41,6 @@ __global__ void LinspaceSpecialKernel(T start, T* out) {
   out[0] = static_cast<T>(start);
 }
 
-template <typename T, typename Context>
-T GetValue(const Context& ctx, const DenseTensor& x) {
-  T value = static_cast<T>(0);
-  if (x.place() != CPUPlace()) {
-    DenseTensor cpu_x;
-    Copy(ctx, x, CPUPlace(), true, &cpu_x);
-    value = cpu_x.data<T>()[0];
-  } else {
-    value = x.data<T>()[0];
-  }
-  return value;
-}
-
 template <typename T, typename Context>
 T GetValueOfExpectedType(const Context& ctx, const DenseTensor& x) {
   switch (x.dtype()) {
diff --git a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
index a9dac3e493f..b840a803b6d 100644
--- a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
@@ -1872,12 +1872,10 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx,
                                  int y_num_col_dims,
                                  DenseTensor* x_grad,
                                  DenseTensor* y_grad) {
-  auto x_matrix = x.dims().size() > 2
-                      ? paddle::framework::ReshapeToMatrix(x, x_num_col_dims)
-                      : x;
-  auto y_matrix = y.dims().size() > 2
-                      ? paddle::framework::ReshapeToMatrix(y, y_num_col_dims)
-                      : y;
+  auto x_matrix =
+      x.dims().size() > 2 ? phi::ReshapeToMatrix(x, x_num_col_dims) : x;
+  auto y_matrix =
+      y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims) : y;
   auto* dout = &out_grad;
 
   DenseTensor dout_mat(*dout);
@@ -1898,9 +1896,7 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx,
   if (dx) {
     dev_ctx.template Alloc<T>(dx);
     DenseTensor dx_matrix =
-        dx->dims().size() > 2
-            ? paddle::framework::ReshapeToMatrix(*dx, x_num_col_dims)
-            : *dx;
+        dx->dims().size() > 2 ? phi::ReshapeToMatrix(*dx, x_num_col_dims) : *dx;
 
     // dx = dout * y'. dx: M x K, dout : M x N, y : K x N
     blas.MatMul(dout_mat, false, y_matrix, true, &dx_matrix);
@@ -1908,9 +1904,7 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx,
   if (dy) {
     dev_ctx.template Alloc<T>(dy);
     DenseTensor dy_matrix =
-        dy->dims().size() > 2
-            ? paddle::framework::ReshapeToMatrix(*dy, y_num_col_dims)
-            : *dy;
+        dy->dims().size() > 2 ? phi::ReshapeToMatrix(*dy, y_num_col_dims) : *dy;
     // dy = x' * dout. dy K x N, dout : M x N, x : M x K
     blas.MatMul(x_matrix, true, dout_mat, false, &dy_matrix);
   }
@@ -1929,12 +1923,10 @@ void MatmulWithFlattenDoubleGradKernel(
     DenseTensor* x_grad,
     DenseTensor* y_grad,
     DenseTensor* out_grad_grad) {
-  auto x_mat = x.dims().size() > 2
-                   ? paddle::framework::ReshapeToMatrix(x, x_num_col_dims)
-                   : x;
-  auto y_mat = y.dims().size() > 2
-                   ? paddle::framework::ReshapeToMatrix(y, y_num_col_dims)
-                   : y;
+  auto x_mat =
+      x.dims().size() > 2 ? phi::ReshapeToMatrix(x, x_num_col_dims) : x;
+  auto y_mat =
+      y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims) : y;
 
   const int m = phi::flatten_to_2d(x.dims(), x_num_col_dims)[0];
   const int n = phi::flatten_to_2d(y.dims(), y_num_col_dims)[1];
@@ -1965,20 +1957,18 @@ void MatmulWithFlattenDoubleGradKernel(
   // true, MatMul beta should be 1 to add result to ddout.
   bool ddout_flag = false;
   if (ddx) {
-    auto ddx_mat =
-        ddx->dims().size() > 2
-            ? paddle::framework::ReshapeToMatrix(*ddx, x_num_col_dims)
-            : static_cast<const DenseTensor&>(*ddx);
+    auto ddx_mat = ddx->dims().size() > 2
+                       ? phi::ReshapeToMatrix(*ddx, x_num_col_dims)
+                       : static_cast<const DenseTensor&>(*ddx);
 
     // dy = ddx' * dout. dy : K x M, ddx' : K x M, dout : M x N
     if (dy) {
       dy->set_lod(y.lod());
       // allocate and reshape dy
       dev_ctx.template Alloc<T>(dy);
-      DenseTensor dy_mat =
-          dy->dims().size() > 2
-              ? paddle::framework::ReshapeToMatrix(*dy, y_num_col_dims)
-              : *dy;
+      DenseTensor dy_mat = dy->dims().size() > 2
+                               ? phi::ReshapeToMatrix(*dy, y_num_col_dims)
+                               : *dy;
       blas.MatMul(ddx_mat, true, dout_mat, false, &dy_mat);
     }
     // ddout1 = ddx * y. ddx : M x K, y : K x N, ddout1 : M x N
@@ -1994,19 +1984,17 @@ void MatmulWithFlattenDoubleGradKernel(
     }
   }
   if (ddy) {
-    auto ddy_mat =
-        ddy->dims().size() > 2
-            ? paddle::framework::ReshapeToMatrix(*ddy, y_num_col_dims)
-            : static_cast<const DenseTensor&>(*ddy);
+    auto ddy_mat = ddy->dims().size() > 2
+                       ? phi::ReshapeToMatrix(*ddy, y_num_col_dims)
+                       : static_cast<const DenseTensor&>(*ddy);
     // dx = dout * ddy'. dout : M x N, ddy' : N x K, dx : M x K
     if (dx) {
       dx->set_lod(x.lod());
       // allocate and reshape dx
       dev_ctx.template Alloc<T>(dx);
-      DenseTensor dx_mat =
-          dx->dims().size() > 2
-              ? paddle::framework::ReshapeToMatrix(*dx, x_num_col_dims)
-              : *dx;
+      DenseTensor dx_mat = dx->dims().size() > 2
+                               ? phi::ReshapeToMatrix(*dx, x_num_col_dims)
+                               : *dx;
       blas.MatMul(dout_mat, false, ddy_mat, true, &dx_mat);
     }
     // ddout2 = x * ddy. x : M x K, ddy : K x N, ddout2 : M x N
diff --git a/paddle/phi/kernels/impl/matmul_kernel_impl.h b/paddle/phi/kernels/impl/matmul_kernel_impl.h
index 99257ce4a6a..83855f7296b 100644
--- a/paddle/phi/kernels/impl/matmul_kernel_impl.h
+++ b/paddle/phi/kernels/impl/matmul_kernel_impl.h
@@ -513,13 +513,9 @@ void MatmulWithFlattenKernel(const Context& dev_ctx,
                              int y_num_col_dims,
                              DenseTensor* out) {
   const DenseTensor x_matrix =
-      x.dims().size() > 2
-          ? paddle::framework::ReshapeToMatrix(x, x_num_col_dims)
-          : x;
+      x.dims().size() > 2 ? phi::ReshapeToMatrix(x, x_num_col_dims) : x;
   const DenseTensor y_matrix =
-      y.dims().size() > 2
-          ? paddle::framework::ReshapeToMatrix(y, y_num_col_dims)
-          : y;
+      y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims) : y;
 
   dev_ctx.template Alloc<T>(out);
   auto z_dim = out->dims();
diff --git a/paddle/phi/kernels/onednn/matmul_grad_kernel.cc b/paddle/phi/kernels/onednn/matmul_grad_kernel.cc
index f9b45d4bc44..f5bef428d67 100644
--- a/paddle/phi/kernels/onednn/matmul_grad_kernel.cc
+++ b/paddle/phi/kernels/onednn/matmul_grad_kernel.cc
@@ -196,10 +196,8 @@ void MatmulWithFlattenGradKernel(const Context &dev_ctx,
                                  int y_num_col_dims,
                                  DenseTensor *x_grad,
                                  DenseTensor *y_grad) {
-  const DenseTensor reshaped_y =
-      paddle::framework::ReshapeToMatrix(y, y_num_col_dims);
-  const DenseTensor reshaped_x =
-      paddle::framework::ReshapeToMatrix(x, x_num_col_dims);
+  const DenseTensor reshaped_y = phi::ReshapeToMatrix(y, y_num_col_dims);
+  const DenseTensor reshaped_x = phi::ReshapeToMatrix(x, x_num_col_dims);
   const DenseTensor x_matrix = x.dims().size() > 2 ? reshaped_x : x;
   const DenseTensor y_matrix = y.dims().size() > 2 ? reshaped_y : y;
 
diff --git a/paddle/phi/kernels/onednn/matmul_kernel.cc b/paddle/phi/kernels/onednn/matmul_kernel.cc
index 8f9baec3668..12a45c280cf 100644
--- a/paddle/phi/kernels/onednn/matmul_kernel.cc
+++ b/paddle/phi/kernels/onednn/matmul_kernel.cc
@@ -24,7 +24,7 @@ using dnnl::inner_product_forward;
 using dnnl::memory;
 using dnnl::prop_kind;
 using dnnl::stream;
-using paddle::framework::ReshapeToMatrix;
+using phi::ReshapeToMatrix;
 
 namespace phi {
 
diff --git a/paddle/phi/kernels/xpu/arange_kernel.cc b/paddle/phi/kernels/xpu/arange_kernel.cc
index 84896ca1993..0ae1007e91d 100644
--- a/paddle/phi/kernels/xpu/arange_kernel.cc
+++ b/paddle/phi/kernels/xpu/arange_kernel.cc
@@ -14,33 +14,17 @@ limitations under the License. */
 
 #include "paddle/phi/kernels/arange_kernel.h"
 
-#include "paddle/fluid/memory/memcpy.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/range_function.h"
 
 namespace phi {
 
-template <typename T, typename Context>
-inline T GetValue(const Context& dev_ctx, const DenseTensor& x) {
-  T value = static_cast<T>(0);
-  if (x.place() != CPUPlace()) {
-    DenseTensor cpu_x;
-    Copy(dev_ctx, x, CPUPlace(), true, &cpu_x);
-    value = cpu_x.data<T>()[0];
-  } else {
-    value = x.data<T>()[0];
-  }
-  return value;
-}
 template <typename T, typename Context>
 void ArangeKernel(const Context& dev_ctx,
                   const DenseTensor& start,
                   const DenseTensor& end,
                   const DenseTensor& step,
                   DenseTensor* out) {
-  auto place = dev_ctx.GetPlace();
-  auto cpu_place = phi::CPUPlace();
-
   T start_value = GetValue<T, Context>(dev_ctx, start);
   T end_value = GetValue<T, Context>(dev_ctx, end);
   T step_value = GetValue<T, Context>(dev_ctx, step);
@@ -48,7 +32,7 @@ void ArangeKernel(const Context& dev_ctx,
   int64_t size = 0;
   phi::funcs::GetSize(start_value, end_value, step_value, &size);
   out->Resize(phi::make_ddim({size}));
-  T* out_data = dev_ctx.template Alloc<T>(out);
+  dev_ctx.template Alloc<T>(out);
 
   DenseTensor out_cpu;
   out_cpu.Resize({out->numel()});
@@ -60,8 +44,7 @@ void ArangeKernel(const Context& dev_ctx,
     out_cpu_data[i] = value;
     value += step_value;
   }
-  paddle::memory::Copy(
-      place, out_data, cpu_place, out_cpu_data, out->numel() * sizeof(T));
+  phi::Copy(dev_ctx, out_cpu, out->place(), true, out);
 }
 
 }  // namespace phi
diff --git a/paddle/phi/kernels/xpu/matmul_grad_kernel.cc b/paddle/phi/kernels/xpu/matmul_grad_kernel.cc
index 07f93dc2d6a..c4fb311cbe5 100644
--- a/paddle/phi/kernels/xpu/matmul_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/matmul_grad_kernel.cc
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/matmul_grad_kernel.h"
-#include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -110,12 +109,10 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx,
                                  DenseTensor* y_grad) {
   using XPUType = typename XPUTypeTrait<T>::Type;
 
-  auto x_matrix = x.dims().size() > 2
-                      ? paddle::framework::ReshapeToMatrix(x, x_num_col_dims)
-                      : static_cast<const DenseTensor&>(x);
-  auto y_matrix = y.dims().size() > 2
-                      ? paddle::framework::ReshapeToMatrix(y, y_num_col_dims)
-                      : static_cast<const DenseTensor&>(y);
+  auto x_matrix = x.dims().size() > 2 ? phi::ReshapeToMatrix(x, x_num_col_dims)
+                                      : static_cast<const DenseTensor&>(x);
+  auto y_matrix = y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims)
+                                      : static_cast<const DenseTensor&>(y);
   DenseTensor dout_mat;
   dout_mat.Resize({phi::flatten_to_2d(x.dims(), x_num_col_dims)[0],
                    phi::flatten_to_2d(y.dims(), y_num_col_dims)[1]});
diff --git a/paddle/phi/kernels/xpu/matmul_kernel.cc b/paddle/phi/kernels/xpu/matmul_kernel.cc
index 50ace476703..d703f10a248 100644
--- a/paddle/phi/kernels/xpu/matmul_kernel.cc
+++ b/paddle/phi/kernels/xpu/matmul_kernel.cc
@@ -13,7 +13,6 @@
 // limitations under the License.
 
 #include "paddle/phi/kernels/matmul_kernel.h"
-#include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -52,13 +51,9 @@ void MatmulWithFlattenKernel(const Context& dev_ctx,
                              DenseTensor* out) {
   using XPUType = typename XPUTypeTrait<T>::Type;
   const DenseTensor x_matrix =
-      x.dims().size() > 2
-          ? paddle::framework::ReshapeToMatrix(x, x_num_col_dims)
-          : x;
+      x.dims().size() > 2 ? phi::ReshapeToMatrix(x, x_num_col_dims) : x;
   const DenseTensor y_matrix =
-      y.dims().size() > 2
-          ? paddle::framework::ReshapeToMatrix(y, y_num_col_dims)
-          : y;
+      y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims) : y;
   dev_ctx.template Alloc<T>(out);
 
   const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x_matrix.data<T>());
-- 
GitLab