[Phi] Remove storage deps of empty (#40136)

* remove storage deps of empty * remove invalid empty method * remove error empty using * fix test_sparse_utils_dev_api * revert some sparse change * add memset for conv grad * resolve conflict * resolve conflict * resolve conflict

[Phi] Remove storage deps of empty (#40136)
* remove storage deps of empty * remove invalid empty method * remove error empty using * fix test_sparse_utils_dev_api * revert some sparse change * add memset for conv grad * resolve conflict * resolve conflict * resolve conflict
b46e49de · Chen Weihang · GitHub · 6a0d60d2 · b46e49de · b46e49de
25 changed file
--- a/paddle/phi/api/lib/data_transform.cc
+++ b/paddle/phi/api/lib/data_transform.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/phi/api/ext/dispatch.h"
 #include "paddle/phi/api/lib/kernel_dispatch.h"
+#include "paddle/phi/api/lib/utils/storage.h"
 #include "paddle/phi/backends/all_context.h"
 #include "paddle/phi/kernels/cast_kernel.h"
 #include "paddle/phi/kernels/transfer_layout_kernel.h"

--- a/paddle/phi/kernels/CMakeLists.txt
+++ b/paddle/phi/kernels/CMakeLists.txt
@@ -9,13 +9,22 @@ add_subdirectory(funcs)
 # phi depends all phi kernel targets
 set_property(GLOBAL PROPERTY PHI_KERNELS "")
+# [ 1. Common kernel compilation dependencies ]
 set(COMMON_KERNEL_DEPS dense_tensor sparse_coo_tensor sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils)
 set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas math_function im2col vol2col concat_and_split_functor softmax)
 # remove this dep after removing fluid deps on tensor creation
 set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils)
 set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
-# NOTE: Some kernels depend on some targets that are not commonly used.
+# [ 2. Kernels that most kernels depend on ]
+# There are a few kernels that are very basic operations, and most of the
+# kernels depend on these kernels.
+set(COMMON_BAISC_KERNELS empty_kernel full_kernel)
+kernel_library(empty_kernel DEPS ${COMMON_KERNEL_DEPS})
+kernel_library(full_kernel DEPS ${COMMON_KERNEL_DEPS} empty_kernel)
+# [ 3. Kernels with special dependencies ]
+# Some kernels depend on some targets that are not commonly used.
 # These targets are not suitable for common dependencies.
 # In this case, you need to manually generate them here.
 set(MANUAL_BUILD_KERNELS math_kernel softmax_kernel softmax_grad_kernel triangular_solve_grad_kernel)
@@ -24,8 +33,8 @@ kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
 kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
 kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_reduce)
-# auto parse and build kernel targets by cmake
+# 4. auto parse and build kernel targets by cmake
-register_kernels(EXCLUDES ${MANUAL_BUILD_KERNELS} DEPS ${COMMON_KERNEL_DEPS})
+register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS ${COMMON_KERNEL_DEPS} ${COMMON_BAISC_KERNELS} )
 # phi sparse kernels
 add_subdirectory(sparse)

--- a/paddle/phi/kernels/cast_kernel.h
+++ b/paddle/phi/kernels/cast_kernel.h
@@ -29,7 +29,7 @@ template <typename T, typename Context>
 DenseTensor Cast(const Context& dev_ctx,
                 const DenseTensor& x,
                 DataType out_dtype) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  CastInferMeta(x, out_dtype, &meta_out);
  CastKernel<T, Context>(dev_ctx, x, out_dtype, &dense_out);

--- a/paddle/phi/kernels/complex_kernel.h
+++ b/paddle/phi/kernels/complex_kernel.h
@@ -38,7 +38,7 @@ template <
                         std::is_same<T, phi::dtype::complex<double>>::value,
                     bool> = true>
 DenseTensor Conj(const Context& dev_ctx, const DenseTensor& x) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  UnchangedInferMeta(x, &meta_out);
  ConjKernel<T>(dev_ctx, x, &dense_out);
@@ -64,7 +64,7 @@ template <
                         std::is_same<T, phi::dtype::complex<double>>::value,
                     bool> = true>
 DenseTensor Real(const Context& dev_ctx, const DenseTensor& x) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  RealAndImagInferMeta(x, &meta_out);
  RealKernel<T>(dev_ctx, x, &dense_out);
@@ -90,7 +90,7 @@ template <
                         std::is_same<T, phi::dtype::complex<double>>::value,
                     bool> = true>
 DenseTensor Imag(const Context& dev_ctx, const DenseTensor& x) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  RealAndImagInferMeta(x, &meta_out);
  ImagKernel<T>(dev_ctx, x, &dense_out);

--- a/paddle/phi/kernels/concat_kernel.h
+++ b/paddle/phi/kernels/concat_kernel.h
@@ -38,7 +38,7 @@ DenseTensor Concat(const Context& dev_ctx,
    meta_x_ptr.push_back(&meta_x.back());
  }
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  ConcatInferMeta(meta_x_ptr, axis.to<int>(), &meta_out, /*is_runtime=*/true);
  ConcatKernel<T, Context>(dev_ctx, x, axis, &dense_out);

--- a/paddle/phi/kernels/dot_kernel.h
+++ b/paddle/phi/kernels/dot_kernel.h
@@ -29,7 +29,7 @@ template <typename T, typename Context>
 DenseTensor Dot(const Context& dev_ctx,
                const DenseTensor& x,
                const DenseTensor& y) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  DotInferMeta(x, y, &meta_out);
  DotKernel<T, Context>(dev_ctx, x, y, &dense_out);

--- a/paddle/phi/kernels/empty_kernel.h
+++ b/paddle/phi/kernels/empty_kernel.h
@@ -14,9 +14,9 @@
 #pragma once
-#include "paddle/phi/api/lib/utils/storage.h"
 #include "paddle/phi/common/scalar_array.h"
 #include "paddle/phi/core/dense_tensor.h"
+#include "paddle/phi/core/device_context.h"
 #include "paddle/phi/infermeta/nullary.h"
 #include "paddle/phi/infermeta/unary.h"
@@ -34,28 +34,17 @@ void EmptyLikeKernel(const Context& dev_ctx,
                     DataType dtype,
                     DenseTensor* out);
-// TODO(chenweihang): the tensor creation method need to be replaced later,
-// all kernel api call Empty here instead of making tensor self
 template <typename Context>
 DenseTensor Empty(const Context& dev_ctx, DenseTensorMeta&& meta) {
-  phi::DenseTensor dense_out(
+  phi::DenseTensor dense_out;
-      phi::make_intrusive<paddle::experimental::SharedStorage>(
+  dense_out.set_meta(meta);
-          dev_ctx.GetPlace()),
+  dev_ctx.Alloc(&dense_out, dense_out.dtype());
-      std::move(meta));
  return dense_out;
 }
-template <typename T, typename Context>
-DenseTensor Empty(const Context& dev_ctx) {
-  return Empty(dev_ctx,
-               {paddle::experimental::CppTypeToDataType<T>::Type(),
-                {-1},
-                DataLayout::NCHW});
-}
 template <typename T, typename Context>
 DenseTensor Empty(const Context& dev_ctx, const ScalarArray& shape) {
-  auto dense_out = Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type();
  CreateInferMeta(shape, dtype, &meta_out);
@@ -65,7 +54,7 @@ DenseTensor Empty(const Context& dev_ctx, const ScalarArray& shape) {
 template <typename T, typename Context>
 DenseTensor EmptyLike(const Context& dev_ctx, const DenseTensor& x) {
-  auto dense_out = Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type();
  CreateLikeInferMeta(x, dtype, &meta_out);

--- a/paddle/phi/kernels/flatten_kernel.h
+++ b/paddle/phi/kernels/flatten_kernel.h
@@ -40,7 +40,7 @@ DenseTensor Flatten(const Context& dev_ctx,
                    const DenseTensor& x,
                    int start_axis,
                    int stop_axis) {
-  auto dense_out = Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  FlattenInferMeta(x, start_axis, stop_axis, &meta_out);
  FlattenKernel<T, Context>(dev_ctx, x, start_axis, stop_axis, &dense_out);

--- a/paddle/phi/kernels/full_kernel.h
+++ b/paddle/phi/kernels/full_kernel.h
@@ -41,7 +41,7 @@ template <typename T, typename Context>
 DenseTensor Full(const Context& dev_ctx,
                 const ScalarArray& shape,
                 const Scalar& val) {
-  auto dense_out = Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type();
  CreateInferMeta(shape, dtype, &meta_out);
@@ -53,7 +53,7 @@ template <typename T, typename Context>
 DenseTensor FullLike(const Context& dev_ctx,
                     const DenseTensor& x,
                     const Scalar& val) {
-  auto dense_out = Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type();
  CreateLikeInferMeta(x, dtype, &meta_out);

--- a/paddle/phi/kernels/funcs/reduce_function.h
+++ b/paddle/phi/kernels/funcs/reduce_function.h
@@ -344,9 +344,8 @@ struct ReduceConfig {
                     const phi::GPUContext& dev_ctx,
                     phi::DenseTensor* tmp) {
    if (should_reduce_again) {
-      tmp->ResizeAndAllocate(phi::make_ddim(
+      tmp->Resize(phi::make_ddim(
          {static_cast<int64_t>(left_num * grid.z * grid.y * sizeof(Ty))}));
      output_data = dev_ctx.Alloc<Ty>(tmp);
    } else {
      output_data = y_data;
@@ -1053,8 +1052,8 @@ CubTensorReduceImpl(const Tx* x_data,
                            reducer,
                            reducer.initial(),
                            stream);
-  phi::DenseTensor tmp =
+  phi::DenseTensor tmp = phi::Empty<uint8_t, phi::GPUContext>(
-      phi::Empty<uint8_t>(dev_ctx, {static_cast<int64_t>(temp_storage_bytes)});
+      dev_ctx, {static_cast<int64_t>(temp_storage_bytes)});
  auto* temp_storage = dev_ctx.Alloc<uint8_t>(&tmp);
@@ -1106,7 +1105,7 @@ void TensorReduceImpl(const phi::GPUContext& dev_ctx,
  // y_data;
  phi::DDim tmp_ddim;
-  phi::DenseTensor tmp = phi::Empty<Ty>(dev_ctx);
+  phi::DenseTensor tmp;
  auto x_data = x.data<Tx>();
  auto y_data = y->data<Ty>();

--- a/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/matmul_grad_kernel_impl.h
@@ -329,8 +329,8 @@ void MatmulGradKernel(const Context& dev_ctx,
    x_conj = Conj<T>(dev_ctx, x);
    y_conj = Conj<T>(dev_ctx, y);
-    DenseTensor dx_help = Empty<T, Context>(dev_ctx);
+    DenseTensor dx_help;
-    DenseTensor dy_help = Empty<T, Context>(dev_ctx);
+    DenseTensor dy_help;
    if (transpose_x) {
      if (transpose_y) {
@@ -686,8 +686,8 @@ void MatmulDoubleGradKernel(const Context& dev_ctx,
      y_conj = Conj<T>(dev_ctx, y);
    }
-    DenseTensor dx_help = Empty<T>(dev_ctx);
+    DenseTensor dx_help;
-    DenseTensor dy_help = Empty<T>(dev_ctx);
+    DenseTensor dy_help;
    if (transpose_x) {
      if (transpose_y) {
@@ -1373,10 +1373,10 @@ void MatmulTripleGradKernel(const Context& dev_ctx,
    VLOG(3) << "It need cost much time to reduce sum for the broadcast and "
               "wastes the memory. So we should avoid the case in reality";
-    DenseTensor out_dx_help = Empty<T>(dev_ctx);
+    DenseTensor out_dx_help;
-    DenseTensor out_dy_help = Empty<T>(dev_ctx);
+    DenseTensor out_dy_help;
-    DenseTensor out_d_ddx_help = Empty<T>(dev_ctx);
+    DenseTensor out_d_ddx_help;
-    DenseTensor out_d_ddy_help = Empty<T>(dev_ctx);
+    DenseTensor out_d_ddy_help;
    if (out_d_dout) {
      ddx_conj = Conj<T>(dev_ctx, ddx);

--- a/paddle/phi/kernels/impl/triangular_solve_grad_kernel_impl.h
+++ b/paddle/phi/kernels/impl/triangular_solve_grad_kernel_impl.h
@@ -49,7 +49,7 @@ void TriangularSolveGradKernel(const Context& dev_ctx,
  DenseTensor dy_bst = phi::Empty<T, Context>(dev_ctx, y_bst_dims_array);
  if (dy) {
    // calculate x's conjugate for complex
-    DenseTensor x_conj = phi::Empty<T, Context>(dev_ctx);
+    DenseTensor x_conj;
    x_conj.Resize(x.dims());
    phi::funcs::ForRange<Context> x_for_range(dev_ctx, x.numel());
@@ -76,7 +76,7 @@ void TriangularSolveGradKernel(const Context& dev_ctx,
  DenseTensor dx_bst = phi::Empty<T, Context>(dev_ctx, x_bst_dims_array);
  if (dx) {
    // calculate x's conjugate for complex
-    DenseTensor out_conj = phi::Empty<T, Context>(dev_ctx);
+    DenseTensor out_conj;
    out_conj.Resize(out.dims());
    phi::funcs::ForRange<Context> out_for_range(dev_ctx, out.numel());

--- a/paddle/phi/kernels/math_kernel.h
+++ b/paddle/phi/kernels/math_kernel.h
@@ -109,7 +109,7 @@ template <typename T, typename Context>
 DenseTensor Add(const Context& dev_ctx,
                const DenseTensor& x,
                const DenseTensor& y) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  ElementwiseInferMeta(x, y, &meta_out);
  AddKernel<T, Context>(dev_ctx, x, y, &dense_out);
@@ -120,7 +120,7 @@ template <typename T, typename Context>
 DenseTensor Subtract(const Context& dev_ctx,
                     const DenseTensor& x,
                     const DenseTensor& y) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  ElementwiseInferMeta(x, y, &meta_out);
  SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out);
@@ -131,7 +131,7 @@ template <typename T, typename Context>
 DenseTensor Divide(const Context& dev_ctx,
                   const DenseTensor& x,
                   const DenseTensor& y) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  ElementwiseInferMeta(x, y, &meta_out);
  DivideKernel<T, Context>(dev_ctx, x, y, &dense_out);
@@ -142,7 +142,7 @@ template <typename T, typename Context>
 DenseTensor Multiply(const Context& dev_ctx,
                     const DenseTensor& x,
                     const DenseTensor& y) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  ElementwiseInferMeta(x, y, &meta_out);
  MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out);
@@ -154,7 +154,7 @@ DenseTensor Mean(const Context& dev_ctx,
                 const DenseTensor& x,
                 const std::vector<int64_t>& axis,
                 bool keep_dim) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  ReduceInferMetaBase(x, axis, keep_dim, false, x.dtype(), &meta_out);
  MeanKernel<T, Context>(dev_ctx, x, axis, keep_dim, &dense_out);
@@ -167,7 +167,7 @@ DenseTensor Sum(const Context& dev_ctx,
                const std::vector<int64_t>& axis,
                DataType dtype,
                bool keep_dim) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  SumInferMeta(x, axis, dtype, keep_dim, &meta_out);
  SumKernel<T, Context>(dev_ctx, x, axis, dtype, keep_dim, &dense_out);

--- a/paddle/phi/kernels/matmul_kernel.h
+++ b/paddle/phi/kernels/matmul_kernel.h
@@ -35,7 +35,7 @@ DenseTensor Matmul(const Context& dev_ctx,
                   const DenseTensor& y,
                   bool transpose_x = false,
                   bool transpose_y = false) {
-  auto dense_out = Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  MatmulInferMeta(x, y, transpose_x, transpose_y, &meta_out);
  MatmulKernel<T, Context>(dev_ctx, x, y, transpose_x, transpose_y, &dense_out);

--- a/paddle/phi/kernels/reshape_kernel.h
+++ b/paddle/phi/kernels/reshape_kernel.h
@@ -38,7 +38,7 @@ template <typename T, typename Context>
 DenseTensor Reshape(const Context& dev_ctx,
                    const DenseTensor& x,
                    const std::vector<int64_t>& shape) {
-  auto dense_out = Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  InferMetaFromVecValue(x, shape, &meta_out);
  ReshapeKernel<Context>(dev_ctx, x, ScalarArray(shape), &dense_out);

--- a/paddle/phi/kernels/scale_kernel.h
+++ b/paddle/phi/kernels/scale_kernel.h
@@ -34,7 +34,7 @@ DenseTensor Scale(const Context& dev_ctx,
                  const Scalar& scale,
                  float bias,
                  bool bias_after_scale) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  UnchangedInferMeta(x, &meta_out);
  ScaleKernel<T, Context>(

--- a/paddle/phi/kernels/sign_kernel.h
+++ b/paddle/phi/kernels/sign_kernel.h
@@ -25,7 +25,7 @@ void SignKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out);
 template <typename T, typename Context>
 DenseTensor Sign(const Context& dev_ctx, const DenseTensor& x) {
-  auto dense_out = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  UnchangedInferMeta(x, &meta_out);
  SignKernel<T, Context>(dev_ctx, x, &dense_out);

--- a/paddle/phi/kernels/sparse/convolution_grad_kernel.h
+++ b/paddle/phi/kernels/sparse/convolution_grad_kernel.h
@@ -13,9 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/sparse_coo_tensor.h"
 #include "paddle/phi/kernels/empty_kernel.h"
+#include "paddle/phi/kernels/sparse/convolution_kernel.h"
 namespace phi {
 namespace sparse {
@@ -45,6 +47,7 @@ std::vector<DenseTensor> Conv3dGrad(const Context& dev_ctx,
                                    const int groups) {
  DenseTensor x_grad = phi::Empty<T, Context>(dev_ctx);
  DenseTensor kernel_grad = phi::Empty<T, Context>(dev_ctx);
+  // TODO(zhangkaihuo): call InferMeta func here
  Conv3dGradKernel<T, Context>(dev_ctx,
                               x,
                               rulebook,

--- a/paddle/phi/kernels/sparse/convolution_kernel.h
+++ b/paddle/phi/kernels/sparse/convolution_kernel.h
@@ -14,11 +14,24 @@ limitations under the License. */
 #pragma once
+#include "paddle/phi/api/lib/utils/storage.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/sparse_coo_tensor.h"
 #include "paddle/phi/kernels/empty_kernel.h"
 namespace phi {
+template <typename T, typename Context>
+DenseTensor Empty(const Context& dev_ctx) {
+  phi::DenseTensor dense_out(
+      phi::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      {paddle::experimental::CppTypeToDataType<T>::Type(),
+       {-1},
+       DataLayout::NCHW});
+  return dense_out;
+}
 namespace sparse {
 struct Dims4D {

--- a/paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc
@@ -74,6 +74,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
  dev_ctx.Alloc(
      kernel_grad, kernel_grad->dtype(), kernel_grad->numel() * sizeof(T));
  T* d_kernel_ptr = kernel_grad->data<T>();
+  memset(d_kernel_ptr, 0, sizeof(T) * kernel_grad->numel());
  Gather<T>(x.non_zero_elements().data<T>(),
            rulebook_ptr + rulebook_len,

--- a/paddle/phi/kernels/sparse/sparse_utils_kernel.h
+++ b/paddle/phi/kernels/sparse/sparse_utils_kernel.h
@@ -14,6 +14,7 @@ limitations under the License. */
 #pragma once
+#include "paddle/phi/api/lib/utils/storage.h"
 #include "paddle/phi/core/dense_tensor.h"
 #include "paddle/phi/core/sparse_coo_tensor.h"
 #include "paddle/phi/core/sparse_csr_tensor.h"
@@ -63,8 +64,8 @@ template <typename T, typename Context>
 SparseCooTensor DenseToSparseCoo(const Context& dev_ctx,
                                 const DenseTensor& x,
                                 const int64_t sparse_dim) {
-  DenseTensor indices = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor indices;
-  DenseTensor values = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor values;
  SparseCooTensor coo(indices, values, x.dims());
  DenseToSparseCooKernel<T, Context>(dev_ctx, x, sparse_dim, &coo);
  return coo;
@@ -78,8 +79,8 @@ void SparseCsrToCooKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 SparseCooTensor SparseCsrToCoo(const Context& dev_ctx,
                               const SparseCsrTensor& x) {
-  DenseTensor indices = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor indices;
-  DenseTensor values = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor values;
  SparseCooTensor coo(indices, values, x.dims());
  SparseCsrToCooKernel<T, Context>(dev_ctx, x, &coo);
  return coo;
@@ -93,9 +94,9 @@ void SparseCooToCsrKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 SparseCsrTensor SparseCooToCsr(const Context& dev_ctx,
                               const SparseCooTensor& x) {
-  DenseTensor non_zero_crows = phi::Empty<int64_t, Context>(dev_ctx);
+  DenseTensor non_zero_crows;
-  DenseTensor non_zero_cols = phi::Empty<int64_t, Context>(dev_ctx);
+  DenseTensor non_zero_cols;
-  DenseTensor non_zero_elements = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor non_zero_elements;
  SparseCsrTensor csr(
      non_zero_crows, non_zero_cols, non_zero_elements, x.dims());
  SparseCooToCsrKernel<T, Context>(dev_ctx, x, &csr);
@@ -113,8 +114,8 @@ void DenseToSparseCsrKernel(const Context& dev_ctx,
                    phi::errors::InvalidArgument(
                        "SparseCsrTensor only support 2-D or 3-D Tensor."));
  const int64_t sparse_dim = x_dims.size() == 2 ? 2 : 3;
-  DenseTensor indices = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor indices;
-  DenseTensor values = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor values;
  SparseCooTensor coo(indices, values, x.dims());
  DenseToSparseCooKernel<T, Context>(dev_ctx, x, sparse_dim, &coo);
  SparseCooToCsrKernel<T, Context>(dev_ctx, coo, out);
@@ -122,9 +123,9 @@ void DenseToSparseCsrKernel(const Context& dev_ctx,
 template <typename T, typename Context>
 SparseCsrTensor DenseToSparseCsr(const Context& dev_ctx, const DenseTensor& x) {
-  DenseTensor non_zero_crows = phi::Empty<int64_t, Context>(dev_ctx);
+  DenseTensor non_zero_crows;
-  DenseTensor non_zero_cols = phi::Empty<int64_t, Context>(dev_ctx);
+  DenseTensor non_zero_cols;
-  DenseTensor non_zero_elements = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor non_zero_elements;
  SparseCsrTensor csr(
      non_zero_crows, non_zero_cols, non_zero_elements, x.dims());
  DenseToSparseCsrKernel<T, Context>(dev_ctx, x, &csr);
@@ -148,8 +149,8 @@ template <typename T, typename Context>
 void SparseCsrToDenseKernel(const Context& dev_ctx,
                            const SparseCsrTensor& x,
                            DenseTensor* out) {
-  DenseTensor indices = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor indices;
-  DenseTensor values = phi::Empty<T, Context>(dev_ctx);
+  DenseTensor values;
  SparseCooTensor coo(indices, values, x.dims());
  SparseCsrToCooKernel<T, Context>(dev_ctx, x, &coo);
  SparseCooToDenseKernel<T, Context>(dev_ctx, coo, out);

--- a/paddle/phi/kernels/split_kernel.h
+++ b/paddle/phi/kernels/split_kernel.h
@@ -50,7 +50,7 @@ std::vector<DenseTensor> Split(const Context& dev_ctx,
  result.reserve(out_number);
  for (size_t i = 0; i < out_number; ++i) {
-    result.emplace_back(phi::Empty<T, Context>(dev_ctx));
+    result.emplace_back(DenseTensor());
    out_meta.emplace_back(&result.back());
    out_meta_ptr.push_back(&out_meta.back());
  }

--- a/paddle/phi/kernels/transpose_kernel.h
+++ b/paddle/phi/kernels/transpose_kernel.h
@@ -32,7 +32,7 @@ template <typename T, typename Context>
 DenseTensor Transpose(const Context& dev_ctx,
                      const DenseTensor& x,
                      const std::vector<int>& axis) {
-  auto dense_out = Empty<T, Context>(dev_ctx);
+  DenseTensor dense_out;
  MetaTensor meta_out(&dense_out);
  TransposeInferMeta(x, axis, &meta_out);
  TransposeKernel<T, Context>(dev_ctx, x, axis, &dense_out);

--- a/paddle/phi/tests/api/scale_api.h
+++ b/paddle/phi/tests/api/scale_api.h
@@ -20,6 +20,7 @@
 #include "paddle/phi/api/lib/api_registry.h"
 #include "paddle/phi/api/lib/kernel_dispatch.h"
 #include "paddle/phi/api/lib/utils/allocator.h"
+#include "paddle/phi/api/lib/utils/storage.h"
 #include "paddle/phi/common/scalar.h"
 #include "paddle/phi/common/scalar_array.h"
 #include "paddle/phi/core/kernel_registry.h"

--- a/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc
@@ -90,6 +90,10 @@ void TestDenseToSparseCoo(const DenseTensor& dense_x,
  phi::CPUContext dev_ctx_cpu;
  dev_ctx_cpu.Init();
+  dev_ctx_cpu.SetAllocator(
+      paddle::memory::allocation::AllocatorFacade::Instance()
+          .GetAllocator(phi::CPUPlace())
+          .get());
  // 1. test cpu
  auto cpu_sparse_out =
@@ -300,6 +304,11 @@ void TestSparseCsrToCoo(const DDim& dense_dims,
  // 1. test cpu
  phi::CPUContext dev_ctx_cpu;
+  dev_ctx_cpu.Init();
+  dev_ctx_cpu.SetAllocator(
+      paddle::memory::allocation::AllocatorFacade::Instance()
+          .GetAllocator(phi::CPUPlace())
+          .get());
  auto cpu_sparse_out = sparse::SparseCsrToCoo<T>(dev_ctx_cpu, csr);
  CheckResult<T, int64_t>(&dev_ctx_cpu,
                          cpu_sparse_out,
@@ -473,6 +482,11 @@ void TestCooToCsr(const DDim& dense_dims,
  // 1. test cpu
  phi::CPUContext dev_ctx_cpu;
+  dev_ctx_cpu.Init();
+  dev_ctx_cpu.SetAllocator(
+      paddle::memory::allocation::AllocatorFacade::Instance()
+          .GetAllocator(phi::CPUPlace())
+          .get());
  auto cpu_sparse_out = sparse::SparseCooToCsr<T>(dev_ctx_cpu, coo);
  CheckCsrResult<T, int64_t>(&dev_ctx_cpu,
                             cpu_sparse_out,
@@ -563,6 +577,11 @@ void TestDenseToSparseCsr(const DenseTensor& dense_x,
  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
      paddle::platform::CPUPlace());
  phi::CPUContext dev_ctx_cpu;
+  dev_ctx_cpu.Init();
+  dev_ctx_cpu.SetAllocator(
+      paddle::memory::allocation::AllocatorFacade::Instance()
+          .GetAllocator(phi::CPUPlace())
+          .get());
  // 1. test cpu
  auto cpu_sparse_out = sparse::DenseToSparseCsr<T>(dev_ctx_cpu, dense_x);
@@ -667,6 +686,11 @@ void TestSparseCooToDense(const DDim& dense_dims,
                          const int64_t non_zero_num,
                          const int64_t sparse_dim) {
  phi::CPUContext dev_ctx_cpu;
+  dev_ctx_cpu.Init();
+  dev_ctx_cpu.SetAllocator(
+      paddle::memory::allocation::AllocatorFacade::Instance()
+          .GetAllocator(phi::CPUPlace())
+          .get());
  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
      paddle::platform::CPUPlace());
@@ -836,6 +860,11 @@ void TestSparseCsrToDense(const DDim& dense_dims,
  // 1. test cpu
  phi::CPUContext dev_ctx_cpu;
+  dev_ctx_cpu.Init();
+  dev_ctx_cpu.SetAllocator(
+      paddle::memory::allocation::AllocatorFacade::Instance()
+          .GetAllocator(phi::CPUPlace())
+          .get());
  DenseTensor cpu_sparse_out = sparse::SparseCsrToDense<T>(dev_ctx_cpu, csr);
  int cmp_cpu = memcmp(cpu_sparse_out.data<T>(),
                       dense_data.data(),