phi move ReshapeToMatrix & GetValue (#50139)

d09962a1 · engineer1109 · GitHub · 1274e738 · d09962a1 · d09962a1
18 changed file
--- a/paddle/fluid/framework/tensor_test.cc
+++ b/paddle/fluid/framework/tensor_test.cc
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/phi/core/tensor_utils.h"

 #include <gtest/gtest.h>

@@ -25,7 +25,7 @@ namespace platform = paddle::platform;
 TEST(DenseTensor, Dims) {
  phi::DenseTensor tt;
  tt.Resize({2, 3, 4});
-  framework::DDim dims = tt.dims();
+  phi::DDim dims = tt.dims();
  ASSERT_EQ(arity(dims), 3);
  for (int i = 0; i < 3; ++i) {
    EXPECT_EQ(i + 2, dims[i]);
@@ -225,7 +225,7 @@ TEST(DenseTensor, Slice) {
    src_tensor.mutable_data<int>(phi::make_ddim({5, 3, 4}),
                                 platform::CPUPlace());
    phi::DenseTensor slice_tensor = src_tensor.Slice(1, 3);
-    framework::DDim slice_dims = slice_tensor.dims();
+    phi::DDim slice_dims = slice_tensor.dims();
    ASSERT_EQ(arity(slice_dims), 3);
    EXPECT_EQ(slice_dims[0], 2);
    EXPECT_EQ(slice_dims[1], 3);
@@ -251,7 +251,7 @@ TEST(DenseTensor, Slice) {
    src_tensor.mutable_data<double>(phi::make_ddim({6, 9}),
                                    platform::CUDAPlace(0));
    phi::DenseTensor slice_tensor = src_tensor.Slice(2, 6);
-    framework::DDim slice_dims = slice_tensor.dims();
+    phi::DDim slice_dims = slice_tensor.dims();
    ASSERT_EQ(arity(slice_dims), 2);
    EXPECT_EQ(slice_dims[0], 4);
    EXPECT_EQ(slice_dims[1], 9);
@@ -278,7 +278,7 @@ TEST(DenseTensor, Slice) {
    src_tensor.mutable_data<double>(phi::make_ddim({6, 9}),
                                    platform::NPUPlace(0));
    phi::DenseTensor slice_tensor = src_tensor.Slice(2, 6);
-    framework::DDim slice_dims = slice_tensor.dims();
+    phi::DDim slice_dims = slice_tensor.dims();
    ASSERT_EQ(arity(slice_dims), 2);
    EXPECT_EQ(slice_dims[0], 4);
    EXPECT_EQ(slice_dims[1], 9);
@@ -306,7 +306,7 @@ TEST(DenseTensor, ReshapeToMatrix) {
  for (int i = 0; i < 2 * 3 * 4 * 9; ++i) {
    src_ptr[i] = i;
  }
-  phi::DenseTensor res = framework::ReshapeToMatrix(src, 2);
+  phi::DenseTensor res = phi::ReshapeToMatrix(src, 2);
  ASSERT_EQ(res.dims()[0], 2 * 3);
  ASSERT_EQ(res.dims()[1], 4 * 9);
 }

--- a/paddle/fluid/framework/tensor_util.h
+++ b/paddle/fluid/framework/tensor_util.h
@@ -560,27 +560,6 @@ inline void TensorToVector(const phi::DenseTensor& src,

 std::ostream& operator<<(std::ostream& os, const LoD& lod);

-inline phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src,
-                                        int num_col_dims) {
-  int rank = src.dims().size();
-  PADDLE_ENFORCE_GE(
-      rank,
-      2,
-      platform::errors::InvalidArgument(
-          "'ReshapeToMatrix()' is only used for flatten high rank "
-          "tensors to matrixs. The dimensions of phi::DenseTensor must be "
-          "greater or equal than 2. "
-          "But received dimensions of phi::DenseTensor is %d",
-          rank));
-  if (rank == 2) {
-    return src;
-  }
-  phi::DenseTensor res;
-  res.ShareDataWith(src);
-  res.Resize(phi::flatten_to_2d(src.dims(), num_col_dims));
-  return res;
-}
-
 template <typename T>
 inline T GetValue(const phi::DenseTensor* x) {
  T value = static_cast<T>(0);

--- a/paddle/fluid/operators/bpr_loss_op.h
+++ b/paddle/fluid/operators/bpr_loss_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/math_function.h"

 namespace paddle {
@@ -44,9 +45,9 @@ class BprLossOpKernel : public framework::OpKernel<T> {
    y->mutable_data<T>(ctx.GetPlace());
    int rank = x->dims().size();

-    phi::DenseTensor x_2d = framework::ReshapeToMatrix(*x, rank - 1);
-    phi::DenseTensor labels_2d = framework::ReshapeToMatrix(*label, rank - 1);
-    phi::DenseTensor y_2d = framework::ReshapeToMatrix(*y, rank - 1);
+    phi::DenseTensor x_2d = phi::ReshapeToMatrix(*x, rank - 1);
+    phi::DenseTensor labels_2d = phi::ReshapeToMatrix(*label, rank - 1);
+    phi::DenseTensor y_2d = phi::ReshapeToMatrix(*y, rank - 1);

    const phi::DenseTensor* logits = &x_2d;
    const phi::DenseTensor* labels = &labels_2d;

--- a/paddle/fluid/operators/cross_entropy_op.h
+++ b/paddle/fluid/operators/cross_entropy_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/platform/for_range.h"
+#include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/cross_entropy.h"
 #include "paddle/phi/kernels/funcs/math.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
@@ -34,7 +35,7 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {

    int rank = x->dims().size();
    auto label_dims = labels->dims();
-    phi::DenseTensor x_2d = framework::ReshapeToMatrix(*x, rank - 1);
+    phi::DenseTensor x_2d = phi::ReshapeToMatrix(*x, rank - 1);
    phi::DenseTensor labels_2d, y_2d;
    if (label_dims.size() < rank) {
      labels_2d.ShareDataWith(*labels);
@@ -44,8 +45,8 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {
      y_2d.Resize({phi::product(y->dims()), 1});

    } else {
-      labels_2d = framework::ReshapeToMatrix(*labels, rank - 1);
-      y_2d = framework::ReshapeToMatrix(*y, rank - 1);
+      labels_2d = phi::ReshapeToMatrix(*labels, rank - 1);
+      y_2d = phi::ReshapeToMatrix(*y, rank - 1);
    }

    int axis_dim = x->dims()[rank - 1];

--- a/paddle/fluid/operators/fused/multihead_matmul_op.cu
+++ b/paddle/fluid/operators/fused/multihead_matmul_op.cu
@@ -21,6 +21,7 @@
 #include "paddle/fluid/memory/malloc.h"
 #include "paddle/fluid/operators/math/bert_encoder_functor.h"
 #include "paddle/fluid/platform/float16.h"
+#include "paddle/phi/core/tensor_utils.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"

 namespace paddle {
@@ -343,10 +344,10 @@ class MultiHeadMatMulV2Kernel : public framework::OpKernel<T> {

    // (B*S, hidden)
    const phi::DenseTensor input_matrix =
-        framework::ReshapeToMatrix(*input, 2 /*x_num_col_dims */);
+        phi::ReshapeToMatrix(*input, 2 /*x_num_col_dims */);
    // (hidden, 3 * all_head_size)
    const phi::DenseTensor w_matrix =
-        framework::ReshapeToMatrix(*w, 1 /*y_num_col_dims*/);
+        phi::ReshapeToMatrix(*w, 1 /*y_num_col_dims*/);

    phi::DenseTensor temp_out_tensor;
    auto temp_out_dims =

--- a/paddle/fluid/operators/isfinite_op.h
+++ b/paddle/fluid/operators/isfinite_op.h
@@ -129,17 +129,17 @@ inline void TensorIsfinite(const phi::DenseTensor& tensor,
 inline bool TensorContainsNAN(const phi::DenseTensor& tensor) {
  phi::DenseTensor out;
  TensorContainsNAN(tensor, &out);
-  return GetValue<bool>(&out);
+  return paddle::framework::GetValue<bool>(&out);
 }
 inline bool TensorContainsInf(const phi::DenseTensor& tensor) {
  phi::DenseTensor out;
  TensorContainsInf(tensor, &out);
-  return GetValue<bool>(&out);
+  return paddle::framework::GetValue<bool>(&out);
 }
 inline bool TensorIsfinite(const phi::DenseTensor& tensor) {
  phi::DenseTensor out;
  TensorIsfinite(tensor, &out);
-  return GetValue<bool>(&out);
+  return paddle::framework::GetValue<bool>(&out);
 }
 }  // namespace framework
 namespace operators {

--- a/paddle/phi/core/tensor_utils.cc
+++ b/paddle/phi/core/tensor_utils.cc
@@ -867,4 +867,60 @@ template void TensorToVector(const phi::DenseTensor& src,
 template void TensorToVector(const phi::DenseTensor& src,
                             std::vector<phi::dtype::complex<double>>* dst);

+phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src,
+                                 int num_col_dims) {
+  int rank = src.dims().size();
+  PADDLE_ENFORCE_GE(
+      rank,
+      2,
+      phi::errors::InvalidArgument(
+          "'ReshapeToMatrix()' is only used for flatten high rank "
+          "tensors to matrixs. The dimensions of phi::DenseTensor must be "
+          "greater or equal than 2. "
+          "But received dimensions of phi::DenseTensor is %d",
+          rank));
+  if (rank == 2) {
+    return src;
+  }
+  phi::DenseTensor res;
+  res.ShareDataWith(src);
+  res.Resize(phi::flatten_to_2d(src.dims(), num_col_dims));
+  return res;
+}
+
+template <typename T>
+T GetValue(const phi::DenseTensor* x) {
+  T value = static_cast<T>(0);
+  if (!paddle::platform::is_cpu_place(x->place())) {
+    phi::DenseTensor cpu_x{};
+    phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
+    phi::DeviceContext* dev_ctx = pool.Get(x->place());
+    phi::Copy(*dev_ctx, *x, phi::CPUPlace(), true, &cpu_x);
+    value = cpu_x.data<T>()[0];
+  } else {
+    value = x->data<T>()[0];
+  }
+  return value;
+}
+
+template bool GetValue(const phi::DenseTensor* x);
+
+template int16_t GetValue(const phi::DenseTensor* x);
+
+template int GetValue(const phi::DenseTensor* x);
+
+template int64_t GetValue(const phi::DenseTensor* x);
+
+template float GetValue(const phi::DenseTensor* x);
+
+template double GetValue(const phi::DenseTensor* x);
+
+template phi::dtype::bfloat16 GetValue(const phi::DenseTensor* x);
+
+template phi::dtype::float16 GetValue(const phi::DenseTensor* x);
+
+template phi::dtype::complex<float> GetValue(const phi::DenseTensor* x);
+
+template phi::dtype::complex<double> GetValue(const phi::DenseTensor* x);
+
 }  // namespace phi
--- a/paddle/phi/core/tensor_utils.h
+++ b/paddle/phi/core/tensor_utils.h
@@ -126,4 +126,22 @@ void TensorToVector(const phi::DenseTensor& src,
                    const phi::DeviceContext& ctx,
                    std::vector<T>* dst);

+phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src, int num_col_dims);
+
+template <typename T>
+T GetValue(const phi::DenseTensor* x);
+
+template <typename T, typename Context>
+inline T GetValue(const Context& dev_ctx, const DenseTensor& x) {
+  T value = static_cast<T>(0);
+  if (x.place() != CPUPlace()) {
+    DenseTensor cpu_x;
+    Copy(dev_ctx, x, CPUPlace(), true, &cpu_x);
+    value = cpu_x.data<T>()[0];
+  } else {
+    value = x.data<T>()[0];
+  }
+  return value;
+}
+
 }  // namespace phi
--- a/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc
+++ b/paddle/phi/kernels/fusion/onednn/fused_matmul_kernel.cc
@@ -16,13 +16,14 @@

 #include "paddle/phi/backends/onednn/onednn_reuse.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/core/tensor_utils.h"

 using dnnl::engine;
 using dnnl::inner_product_forward;
 using dnnl::memory;
 using dnnl::prop_kind;
 using dnnl::stream;
-using paddle::framework::ReshapeToMatrix;
+using phi::ReshapeToMatrix;

 namespace phi {


--- a/paddle/phi/kernels/gpu/arange_kernel.cu
+++ b/paddle/phi/kernels/gpu/arange_kernel.cu
@@ -23,19 +23,6 @@

 namespace phi {

-template <typename T, typename Context>
-inline T GetValue(const Context& dev_ctx, const DenseTensor& x) {
-  T value = static_cast<T>(0);
-  if (x.place() != CPUPlace()) {
-    DenseTensor cpu_x;
-    Copy(dev_ctx, x, CPUPlace(), true, &cpu_x);
-    value = cpu_x.data<T>()[0];
-  } else {
-    value = x.data<T>()[0];
-  }
-  return value;
-}
-
 template <typename T>
 __global__ void Range(T start, T step, int64_t size, T* out) {
  CUDA_KERNEL_LOOP(index, size) { out[index] = start + step * index; }

--- a/paddle/phi/kernels/gpu/linspace_kernel.cu
+++ b/paddle/phi/kernels/gpu/linspace_kernel.cu
@@ -41,19 +41,6 @@ __global__ void LinspaceSpecialKernel(T start, T* out) {
  out[0] = static_cast<T>(start);
 }

-template <typename T, typename Context>
-T GetValue(const Context& ctx, const DenseTensor& x) {
-  T value = static_cast<T>(0);
-  if (x.place() != CPUPlace()) {
-    DenseTensor cpu_x;
-    Copy(ctx, x, CPUPlace(), true, &cpu_x);
-    value = cpu_x.data<T>()[0];
-  } else {
-    value = x.data<T>()[0];
-  }
-  return value;
-}
-
 template <typename T, typename Context>
 T GetValueOfExpectedType(const Context& ctx, const DenseTensor& x) {
  switch (x.dtype()) {

--- a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
@@ -1872,12 +1872,10 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx,
                                 int y_num_col_dims,
                                 DenseTensor* x_grad,
                                 DenseTensor* y_grad) {
-  auto x_matrix = x.dims().size() > 2
-                      ? paddle::framework::ReshapeToMatrix(x, x_num_col_dims)
-                      : x;
-  auto y_matrix = y.dims().size() > 2
-                      ? paddle::framework::ReshapeToMatrix(y, y_num_col_dims)
-                      : y;
+  auto x_matrix =
+      x.dims().size() > 2 ? phi::ReshapeToMatrix(x, x_num_col_dims) : x;
+  auto y_matrix =
+      y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims) : y;
  auto* dout = &out_grad;

  DenseTensor dout_mat(*dout);
@@ -1898,9 +1896,7 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx,
  if (dx) {
    dev_ctx.template Alloc<T>(dx);
    DenseTensor dx_matrix =
-        dx->dims().size() > 2
-            ? paddle::framework::ReshapeToMatrix(*dx, x_num_col_dims)
-            : *dx;
+        dx->dims().size() > 2 ? phi::ReshapeToMatrix(*dx, x_num_col_dims) : *dx;

    // dx = dout * y'. dx: M x K, dout : M x N, y : K x N
    blas.MatMul(dout_mat, false, y_matrix, true, &dx_matrix);
@@ -1908,9 +1904,7 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx,
  if (dy) {
    dev_ctx.template Alloc<T>(dy);
    DenseTensor dy_matrix =
-        dy->dims().size() > 2
-            ? paddle::framework::ReshapeToMatrix(*dy, y_num_col_dims)
-            : *dy;
+        dy->dims().size() > 2 ? phi::ReshapeToMatrix(*dy, y_num_col_dims) : *dy;
    // dy = x' * dout. dy K x N, dout : M x N, x : M x K
    blas.MatMul(x_matrix, true, dout_mat, false, &dy_matrix);
  }
@@ -1929,12 +1923,10 @@ void MatmulWithFlattenDoubleGradKernel(
    DenseTensor* x_grad,
    DenseTensor* y_grad,
    DenseTensor* out_grad_grad) {
-  auto x_mat = x.dims().size() > 2
-                   ? paddle::framework::ReshapeToMatrix(x, x_num_col_dims)
-                   : x;
-  auto y_mat = y.dims().size() > 2
-                   ? paddle::framework::ReshapeToMatrix(y, y_num_col_dims)
-                   : y;
+  auto x_mat =
+      x.dims().size() > 2 ? phi::ReshapeToMatrix(x, x_num_col_dims) : x;
+  auto y_mat =
+      y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims) : y;

  const int m = phi::flatten_to_2d(x.dims(), x_num_col_dims)[0];
  const int n = phi::flatten_to_2d(y.dims(), y_num_col_dims)[1];
@@ -1965,20 +1957,18 @@ void MatmulWithFlattenDoubleGradKernel(
  // true, MatMul beta should be 1 to add result to ddout.
  bool ddout_flag = false;
  if (ddx) {
-    auto ddx_mat =
-        ddx->dims().size() > 2
-            ? paddle::framework::ReshapeToMatrix(*ddx, x_num_col_dims)
-            : static_cast<const DenseTensor&>(*ddx);
+    auto ddx_mat = ddx->dims().size() > 2
+                       ? phi::ReshapeToMatrix(*ddx, x_num_col_dims)
+                       : static_cast<const DenseTensor&>(*ddx);

    // dy = ddx' * dout. dy : K x M, ddx' : K x M, dout : M x N
    if (dy) {
      dy->set_lod(y.lod());
      // allocate and reshape dy
      dev_ctx.template Alloc<T>(dy);
-      DenseTensor dy_mat =
-          dy->dims().size() > 2
-              ? paddle::framework::ReshapeToMatrix(*dy, y_num_col_dims)
-              : *dy;
+      DenseTensor dy_mat = dy->dims().size() > 2
+                               ? phi::ReshapeToMatrix(*dy, y_num_col_dims)
+                               : *dy;
      blas.MatMul(ddx_mat, true, dout_mat, false, &dy_mat);
    }
    // ddout1 = ddx * y. ddx : M x K, y : K x N, ddout1 : M x N
@@ -1994,19 +1984,17 @@ void MatmulWithFlattenDoubleGradKernel(
    }
  }
  if (ddy) {
-    auto ddy_mat =
-        ddy->dims().size() > 2
-            ? paddle::framework::ReshapeToMatrix(*ddy, y_num_col_dims)
-            : static_cast<const DenseTensor&>(*ddy);
+    auto ddy_mat = ddy->dims().size() > 2
+                       ? phi::ReshapeToMatrix(*ddy, y_num_col_dims)
+                       : static_cast<const DenseTensor&>(*ddy);
    // dx = dout * ddy'. dout : M x N, ddy' : N x K, dx : M x K
    if (dx) {
      dx->set_lod(x.lod());
      // allocate and reshape dx
      dev_ctx.template Alloc<T>(dx);
-      DenseTensor dx_mat =
-          dx->dims().size() > 2
-              ? paddle::framework::ReshapeToMatrix(*dx, x_num_col_dims)
-              : *dx;
+      DenseTensor dx_mat = dx->dims().size() > 2
+                               ? phi::ReshapeToMatrix(*dx, x_num_col_dims)
+                               : *dx;
      blas.MatMul(dout_mat, false, ddy_mat, true, &dx_mat);
    }
    // ddout2 = x * ddy. x : M x K, ddy : K x N, ddout2 : M x N

--- a/paddle/phi/kernels/impl/matmul_kernel_impl.h
+++ b/paddle/phi/kernels/impl/matmul_kernel_impl.h
@@ -513,13 +513,9 @@ void MatmulWithFlattenKernel(const Context& dev_ctx,
                             int y_num_col_dims,
                             DenseTensor* out) {
  const DenseTensor x_matrix =
-      x.dims().size() > 2
-          ? paddle::framework::ReshapeToMatrix(x, x_num_col_dims)
-          : x;
+      x.dims().size() > 2 ? phi::ReshapeToMatrix(x, x_num_col_dims) : x;
  const DenseTensor y_matrix =
-      y.dims().size() > 2
-          ? paddle::framework::ReshapeToMatrix(y, y_num_col_dims)
-          : y;
+      y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims) : y;

  dev_ctx.template Alloc<T>(out);
  auto z_dim = out->dims();

--- a/paddle/phi/kernels/onednn/matmul_grad_kernel.cc
+++ b/paddle/phi/kernels/onednn/matmul_grad_kernel.cc
@@ -196,10 +196,8 @@ void MatmulWithFlattenGradKernel(const Context &dev_ctx,
                                 int y_num_col_dims,
                                 DenseTensor *x_grad,
                                 DenseTensor *y_grad) {
-  const DenseTensor reshaped_y =
-      paddle::framework::ReshapeToMatrix(y, y_num_col_dims);
-  const DenseTensor reshaped_x =
-      paddle::framework::ReshapeToMatrix(x, x_num_col_dims);
+  const DenseTensor reshaped_y = phi::ReshapeToMatrix(y, y_num_col_dims);
+  const DenseTensor reshaped_x = phi::ReshapeToMatrix(x, x_num_col_dims);
  const DenseTensor x_matrix = x.dims().size() > 2 ? reshaped_x : x;
  const DenseTensor y_matrix = y.dims().size() > 2 ? reshaped_y : y;


--- a/paddle/phi/kernels/onednn/matmul_kernel.cc
+++ b/paddle/phi/kernels/onednn/matmul_kernel.cc
@@ -24,7 +24,7 @@ using dnnl::inner_product_forward;
 using dnnl::memory;
 using dnnl::prop_kind;
 using dnnl::stream;
-using paddle::framework::ReshapeToMatrix;
+using phi::ReshapeToMatrix;

 namespace phi {


--- a/paddle/phi/kernels/xpu/arange_kernel.cc
+++ b/paddle/phi/kernels/xpu/arange_kernel.cc
@@ -14,33 +14,17 @@ limitations under the License. */

 #include "paddle/phi/kernels/arange_kernel.h"

-#include "paddle/fluid/memory/memcpy.h"
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/funcs/range_function.h"

 namespace phi {

-template <typename T, typename Context>
-inline T GetValue(const Context& dev_ctx, const DenseTensor& x) {
-  T value = static_cast<T>(0);
-  if (x.place() != CPUPlace()) {
-    DenseTensor cpu_x;
-    Copy(dev_ctx, x, CPUPlace(), true, &cpu_x);
-    value = cpu_x.data<T>()[0];
-  } else {
-    value = x.data<T>()[0];
-  }
-  return value;
-}
 template <typename T, typename Context>
 void ArangeKernel(const Context& dev_ctx,
                  const DenseTensor& start,
                  const DenseTensor& end,
                  const DenseTensor& step,
                  DenseTensor* out) {
-  auto place = dev_ctx.GetPlace();
-  auto cpu_place = phi::CPUPlace();
-
  T start_value = GetValue<T, Context>(dev_ctx, start);
  T end_value = GetValue<T, Context>(dev_ctx, end);
  T step_value = GetValue<T, Context>(dev_ctx, step);
@@ -48,7 +32,7 @@ void ArangeKernel(const Context& dev_ctx,
  int64_t size = 0;
  phi::funcs::GetSize(start_value, end_value, step_value, &size);
  out->Resize(phi::make_ddim({size}));
-  T* out_data = dev_ctx.template Alloc<T>(out);
+  dev_ctx.template Alloc<T>(out);

  DenseTensor out_cpu;
  out_cpu.Resize({out->numel()});
@@ -60,8 +44,7 @@ void ArangeKernel(const Context& dev_ctx,
    out_cpu_data[i] = value;
    value += step_value;
  }
-  paddle::memory::Copy(
-      place, out_data, cpu_place, out_cpu_data, out->numel() * sizeof(T));
+  phi::Copy(dev_ctx, out_cpu, out->place(), true, out);
 }

 }  // namespace phi

--- a/paddle/phi/kernels/xpu/matmul_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/matmul_grad_kernel.cc
@@ -13,7 +13,6 @@
 // limitations under the License.

 #include "paddle/phi/kernels/matmul_grad_kernel.h"
-#include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -110,12 +109,10 @@ void MatmulWithFlattenGradKernel(const Context& dev_ctx,
                                 DenseTensor* y_grad) {
  using XPUType = typename XPUTypeTrait<T>::Type;

-  auto x_matrix = x.dims().size() > 2
-                      ? paddle::framework::ReshapeToMatrix(x, x_num_col_dims)
-                      : static_cast<const DenseTensor&>(x);
-  auto y_matrix = y.dims().size() > 2
-                      ? paddle::framework::ReshapeToMatrix(y, y_num_col_dims)
-                      : static_cast<const DenseTensor&>(y);
+  auto x_matrix = x.dims().size() > 2 ? phi::ReshapeToMatrix(x, x_num_col_dims)
+                                      : static_cast<const DenseTensor&>(x);
+  auto y_matrix = y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims)
+                                      : static_cast<const DenseTensor&>(y);
  DenseTensor dout_mat;
  dout_mat.Resize({phi::flatten_to_2d(x.dims(), x_num_col_dims)[0],
                   phi::flatten_to_2d(y.dims(), y_num_col_dims)[1]});

--- a/paddle/phi/kernels/xpu/matmul_kernel.cc
+++ b/paddle/phi/kernels/xpu/matmul_kernel.cc
@@ -13,7 +13,6 @@
 // limitations under the License.

 #include "paddle/phi/kernels/matmul_kernel.h"
-#include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/backends/xpu/xpu_context.h"
 #include "paddle/phi/core/kernel_registry.h"
@@ -52,13 +51,9 @@ void MatmulWithFlattenKernel(const Context& dev_ctx,
                             DenseTensor* out) {
  using XPUType = typename XPUTypeTrait<T>::Type;
  const DenseTensor x_matrix =
-      x.dims().size() > 2
-          ? paddle::framework::ReshapeToMatrix(x, x_num_col_dims)
-          : x;
+      x.dims().size() > 2 ? phi::ReshapeToMatrix(x, x_num_col_dims) : x;
  const DenseTensor y_matrix =
-      y.dims().size() > 2
-          ? paddle::framework::ReshapeToMatrix(y, y_num_col_dims)
-          : y;
+      y.dims().size() > 2 ? phi::ReshapeToMatrix(y, y_num_col_dims) : y;
  dev_ctx.template Alloc<T>(out);

  const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x_matrix.data<T>());