clean useless api tests in phi (#47321)

c334405f · Chen Weihang · GitHub · 1cb12ff5 · c334405f · 1cb12ff5
39 changed file
--- a/paddle/phi/tests/api/CMakeLists.txt
+++ b/paddle/phi/tests/api/CMakeLists.txt
@@ -21,42 +21,6 @@ cc_test(
  DEPS gtest)

 set(COMMON_API_TEST_DEPS phi_tensor phi_api phi_api_utils)
-cc_test(
-  test_mean_api
-  SRCS test_mean_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_dot_api
-  SRCS test_dot_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_matmul_api
-  SRCS test_matmul_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_empty_api
-  SRCS test_empty_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_fill_api
-  SRCS test_fill_api.cc
-  DEPS ${COMMON_API_TEST_DEPS} api_scalar)
-cc_test(
-  test_elementwise_api
-  SRCS test_elementwise_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_embedding_api
-  SRCS test_embedding_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_cast_api
-  SRCS test_cast_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_reshape_api
-  SRCS test_reshape_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
 cc_test(
  test_to_api
  SRCS test_to_api.cc
@@ -65,42 +29,14 @@ cc_test(
  test_slice_api
  SRCS test_slice_api.cc
  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_sum_api
-  SRCS test_sum_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_scale_api
-  SRCS test_scale_api.cc
-  DEPS ${COMMON_API_TEST_DEPS} api_scalar)
 cc_test(
  test_scale_benchmark
  SRCS test_scale_benchmark.cc
  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_conj_api
-  SRCS test_conj_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_concat_api
-  SRCS test_concat_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_split_api
-  SRCS test_split_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
 cc_test(
  test_data_transform
  SRCS test_data_transform.cc
  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_sparse_utils_api
-  SRCS test_sparse_utils_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_sparse_conv_api
-  SRCS test_sparse_conv_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
 cc_test(
  test_strings_empty_api
  SRCS test_strings_empty_api.cc
@@ -109,7 +45,3 @@ cc_test(
  test_strings_lower_upper_api
  SRCS test_strings_lower_upper_api.cc
  DEPS ${COMMON_API_TEST_DEPS})
-cc_test(
-  test_add_n_api
-  SRCS test_add_n_api.cc
-  DEPS ${COMMON_API_TEST_DEPS})
--- a/paddle/phi/tests/api/test_add_n_api.cc
+++ b/paddle/phi/tests/api/test_add_n_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/include/tensor.h"
-#include "paddle/phi/api/lib/api_custom_impl.h"
-#include "paddle/phi/common/place.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/selected_rows.h"
-
-PD_DECLARE_KERNEL(add_n_sr, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-TEST(API, add_n) {
-  // 1. create tensor
-  std::vector<int64_t> rows = {0, 1, 2, 3, 4, 5, 6};
-  int64_t row_numel = 12;
-  auto x_sr = std::make_shared<phi::SelectedRows>(rows, 10);
-  auto x_meta = phi::DenseTensorMeta(
-      phi::DataType::FLOAT32,
-      phi::make_ddim({static_cast<int64_t>(rows.size()), row_numel}),
-      phi::DataLayout::NCHW);
-  x_sr->mutable_value()->set_meta(x_meta);
-  x_sr->AllocateFrom(paddle::memory::allocation::AllocatorFacade::Instance()
-                         .GetAllocator(paddle::platform::CPUPlace())
-                         .get(),
-                     phi::DataType::FLOAT32);
-  auto* dense_x_data = x_sr->mutable_value()->data<float>();
-
-  auto y_sr = std::make_shared<phi::SelectedRows>(rows, 10);
-  y_sr->mutable_value()->set_meta(x_meta);
-  y_sr->AllocateFrom(paddle::memory::allocation::AllocatorFacade::Instance()
-                         .GetAllocator(paddle::platform::CPUPlace())
-                         .get(),
-                     phi::DataType::FLOAT32);
-  auto* dense_y_data = y_sr->mutable_value()->data<float>();
-
-  float sum[84] = {0.0};
-  for (size_t i = 0; i < 7; ++i) {
-    for (size_t j = 0; j < 12; ++j) {
-      dense_x_data[i * 12 + j] = (i * 4 + j);
-      dense_y_data[i * 12 + j] = (i * 4 + j);
-      sum[i * 12 + j] += (i * 4 + j) * 2;
-    }
-  }
-
-  paddle::experimental::Tensor x(x_sr);
-  paddle::experimental::Tensor y(y_sr);
-  auto out = paddle::experimental::add_n_impl({x, y});
-
-  // check slice result
-  ASSERT_EQ(
-      static_cast<int>(std::dynamic_pointer_cast<phi::SelectedRows>(out.impl())
-                           ->rows()
-                           .size()),
-      7);
-  for (int64_t i = 0; i < 84; ++i) {
-    ASSERT_EQ(sum[i],
-              std::dynamic_pointer_cast<phi::SelectedRows>(out.impl())
-                  ->value()
-                  .data<float>()[i]);
-  }
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_cast_api.cc
+++ b/paddle/phi/tests/api/test_cast_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, cast) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 4}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  for (int i = 0; i < dense_x->numel(); i++) {
-    dense_x_data[i] = i;
-  }
-
-  paddle::experimental::Tensor x(dense_x);
-  phi::DataType out_dtype = phi::DataType::FLOAT64;
-  // 2. test API
-  auto out = paddle::experimental::cast(x, out_dtype);
-
-  // 3. check result
-  std::vector<int> expect_shape = {3, 4};
-  ASSERT_EQ(out.shape().size(), size_t(2));
-  ASSERT_EQ(out.shape()[0], expect_shape[0]);
-  ASSERT_EQ(out.shape()[1], expect_shape[1]);
-  ASSERT_EQ(out.numel(), 12);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT64);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto* dense_out_data = dense_out->data<double>();
-  for (int i = 0; i < dense_x->numel(); i++) {
-    ASSERT_NEAR(dense_out_data[i], static_cast<double>(dense_x_data[i]), 1e-6f);
-  }
-}
-
-TEST(Tensor, cast) {
-  auto x = paddle::experimental::full({3, 4}, 1.0, phi::DataType::FLOAT32);
-  auto y = x.cast(phi::DataType::INT32);
-
-  // check slice result
-  ASSERT_EQ(y.dims().size(), 2);
-  ASSERT_EQ(y.dims()[0], 3);
-  ASSERT_EQ(y.dims()[1], 4);
-  ASSERT_EQ(y.numel(), 12);
-  ASSERT_EQ(y.is_cpu(), true);
-  ASSERT_EQ(y.type(), phi::DataType::INT32);
-  ASSERT_EQ(y.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(y.initialized(), true);
-  for (int64_t i = 0; i < y.numel(); ++i) {
-    ASSERT_EQ(y.mutable_data<int>()[i], 1);
-  }
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_concat_api.cc
+++ b/paddle/phi/tests/api/test_concat_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(concat, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-using DDim = phi::DDim;
-
-// TODO(chentianyu03): Remove this test after the API is used in the dygraph
-TEST(API, concat) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 10}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  auto dense_y = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 10}),
-                           phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y->mutable_data<float>(paddle::platform::CPUPlace());
-
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      dense_y_data[i * 10 + j] = (i * 10 + j) * 1.0;
-    }
-  }
-
-  paddle::experimental::Tensor x(dense_x);
-  paddle::experimental::Tensor y(dense_y);
-
-  std::vector<paddle::experimental::Tensor> inputs{x, y};
-
-  // 2. test API
-  auto out = paddle::experimental::concat(inputs, 0);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 6);
-  ASSERT_EQ(out.dims()[1], 10);
-  ASSERT_EQ(out.numel(), 60);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto out_data = dense_out->data<float>();
-  for (size_t i = 0; i < 60; ++i) {
-    if (i < 30) {
-      ASSERT_NEAR(dense_x_data[i], out_data[i], 1e-6f);
-    } else {
-      ASSERT_NEAR(dense_y_data[i - 30], out_data[i], 1e-6f);
-    }
-  }
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_conj_api.cc
+++ b/paddle/phi/tests/api/test_conj_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, conj) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::COMPLEX64,
-                           phi::make_ddim({3, 10}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<paddle::complex64>(paddle::platform::CPUPlace());
-
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = paddle::complex64(i * 10 + j, i * 10 + j);
-    }
-  }
-
-  paddle::experimental::Tensor x(dense_x);
-
-  // 2. test API
-  auto out = paddle::experimental::conj(x);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.dims()[1], 10);
-  ASSERT_EQ(out.numel(), 30);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::COMPLEX64);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto actual_result = dense_out->data<paddle::complex64>();
-
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = paddle::complex64(i * 10 + j, i * 10 + j);
-      ASSERT_NEAR(actual_result[i * 10 + j].real, 1.0 * (i * 10 + j), 1e-6f);
-      ASSERT_NEAR(actual_result[i * 10 + j].imag, -1.0 * (i * 10 + j), 1e-6f);
-    }
-  }
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_dot_api.cc
+++ b/paddle/phi/tests/api/test_dot_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(dot, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, dot) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 10}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  auto dense_y = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 10}),
-                           phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y->mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sum[3] = {0.0, 0.0, 0.0};
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      dense_y_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      sum[i] += (i * 10 + j) * (i * 10 + j) * 1.0;
-    }
-  }
-
-  paddle::experimental::Tensor x(dense_x);
-  paddle::experimental::Tensor y(dense_y);
-
-  // 2. test API
-  auto out = paddle::experimental::dot(x, y);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.numel(), 3);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto expect_result = sum;
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto actual_result0 = dense_out->data<float>()[0];
-  auto actual_result1 = dense_out->data<float>()[1];
-  auto actual_result2 = dense_out->data<float>()[2];
-  ASSERT_NEAR(expect_result[0], actual_result0, 1e-6f);
-  ASSERT_NEAR(expect_result[1], actual_result1, 1e-6f);
-  ASSERT_NEAR(expect_result[2], actual_result2, 1e-6f);
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_elementwise_api.cc
+++ b/paddle/phi/tests/api/test_elementwise_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(subtract, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(multiply, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(divide, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, add) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 10}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  auto dense_y = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, phi::make_ddim({10}), phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y->mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sum[3][10] = {0.0};
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      sum[i][j] = (i * 10 + j) * 1.0 + j * 2.0;
-    }
-  }
-  for (size_t i = 0; i < 10; ++i) {
-    dense_y_data[i] = i * 2.0;
-  }
-  paddle::experimental::Tensor x(dense_x);
-  paddle::experimental::Tensor y(dense_y);
-
-  // 2. test API
-  auto out = paddle::experimental::add(x, y);
-
-  // 3. check result
-  ASSERT_EQ(out.shape().size(), 2UL);
-  ASSERT_EQ(out.shape()[0], 3);
-  ASSERT_EQ(out.numel(), 30);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto expect_result = sum;
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto actual_result0 = dense_out->data<float>()[0];
-  auto actual_result1 = dense_out->data<float>()[1];
-  auto actual_result2 = dense_out->data<float>()[10];
-  ASSERT_NEAR(expect_result[0][0], actual_result0, 1e-6f);
-  ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f);
-  ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f);
-}
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, subtract) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 10}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  auto dense_y = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, phi::make_ddim({10}), phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y->mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sub[3][10] = {0.0};
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      sub[i][j] = (i * 10 + j) * 1.0 - j * 2.0;
-    }
-  }
-  for (size_t i = 0; i < 10; ++i) {
-    dense_y_data[i] = i * 2.0;
-  }
-  paddle::experimental::Tensor x(dense_x);
-  paddle::experimental::Tensor y(dense_y);
-
-  // 2. test API
-  auto out = paddle::experimental::subtract(x, y);
-
-  // 3. check result
-  ASSERT_EQ(out.shape().size(), 2UL);
-  ASSERT_EQ(out.shape()[0], 3);
-  ASSERT_EQ(out.numel(), 30);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto expect_result = sub;
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto actual_result0 = dense_out->data<float>()[0];
-  auto actual_result1 = dense_out->data<float>()[1];
-  auto actual_result2 = dense_out->data<float>()[10];
-  ASSERT_NEAR(expect_result[0][0], actual_result0, 1e-6f);
-  ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f);
-  ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f);
-}
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, divide) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 10}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  auto dense_y = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, phi::make_ddim({10}), phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y->mutable_data<float>(paddle::platform::CPUPlace());
-
-  float div[3][10] = {0.0};
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      div[i][j] = (i * 10 + j) * 1.0 / (j * 2.0 + 1);
-    }
-  }
-  for (size_t i = 0; i < 10; ++i) {
-    dense_y_data[i] = i * 2.0 + 1;
-  }
-
-  paddle::experimental::Tensor x(dense_x);
-  paddle::experimental::Tensor y(dense_y);
-
-  // 2. test API
-  auto out = paddle::experimental::divide(x, y);
-
-  // 3. check result
-  ASSERT_EQ(out.shape().size(), 2UL);
-  ASSERT_EQ(out.shape()[0], 3);
-  ASSERT_EQ(out.numel(), 30);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto expect_result = div;
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto actual_result0 = dense_out->data<float>()[0];
-  auto actual_result1 = dense_out->data<float>()[1];
-  auto actual_result2 = dense_out->data<float>()[10];
-  ASSERT_NEAR(expect_result[0][0], actual_result0, 1e-6f);
-  ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f);
-  ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f);
-}
-
-TEST(API, multiply) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 10}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  auto dense_y = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, phi::make_ddim({10}), phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y->mutable_data<float>(paddle::platform::CPUPlace());
-
-  float mul[3][10] = {0.0};
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      mul[i][j] = (i * 10 + j) * 1.0 * j * 2.0;
-    }
-  }
-  for (size_t i = 0; i < 10; ++i) {
-    dense_y_data[i] = i * 2.0;
-  }
-  paddle::experimental::Tensor x(dense_x);
-  paddle::experimental::Tensor y(dense_y);
-
-  // 2. test API
-  auto out = paddle::experimental::multiply(x, y);
-
-  // 3. check result
-  ASSERT_EQ(out.shape().size(), 2UL);
-  ASSERT_EQ(out.shape()[0], 3);
-  ASSERT_EQ(out.numel(), 30);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto expect_result = mul;
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto actual_result0 = dense_out->data<float>()[0];
-  auto actual_result1 = dense_out->data<float>()[1];
-  auto actual_result2 = dense_out->data<float>()[10];
-  ASSERT_NEAR(expect_result[0][0], actual_result0, 1e-6f);
-  ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f);
-  ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f);
-}
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_embedding_api.cc
+++ b/paddle/phi/tests/api/test_embedding_api.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/backward/backward_api.h"
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(sparse_weight_embedding, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(sparse_weight_embedding_grad, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(sparse_weight_embedding_sparse_grad, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(empty, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-TEST(API, sparse_weight_embedding) {
-  auto x = paddle::experimental::empty({4}, DataType::INT32);
-  auto* x_data = x.data<int32_t>();
-  x_data[0] = 0;
-  x_data[1] = 4;
-  x_data[2] = 3;
-  x_data[3] = 1;
-
-  auto weight_sr = std::make_shared<phi::SelectedRows>(
-      std::vector<int64_t>{0, 1, 2, 3, 4, 5, 6}, 16);
-  *weight_sr->mutable_value() = *static_cast<phi::DenseTensor*>(
-      paddle::experimental::full({7, 3}, 2, DataType::FLOAT32).impl().get());
-  paddle::experimental::Tensor weight;
-  weight.set_impl(weight_sr);
-
-  auto out = paddle::experimental::embedding(x, weight);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 4);
-  ASSERT_EQ(out.numel(), 12);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-}
-
-TEST(API, sparse_weight_embedding_grad) {
-  auto x = paddle::experimental::empty({4}, DataType::INT32);
-  auto* x_data = x.data<int32_t>();
-  x_data[0] = 0;
-  x_data[1] = 4;
-  x_data[2] = 3;
-  x_data[3] = 1;
-
-  auto weight_sr = std::make_shared<phi::SelectedRows>(
-      std::vector<int64_t>{0, 1, 2, 3, 4, 5, 6}, 16);
-  *weight_sr->mutable_value() = *static_cast<phi::DenseTensor*>(
-      paddle::experimental::full({7, 3}, 2, DataType::FLOAT32).impl().get());
-  paddle::experimental::Tensor weight;
-  weight.set_impl(weight_sr);
-
-  auto out_grad = paddle::experimental::full({4, 3}, 1, DataType::FLOAT32);
-
-  paddle::experimental::Tensor weight_grad;
-
-  paddle::experimental::embedding_grad(
-      x, weight, out_grad, -1, false, &weight_grad);
-
-  // 3. check result
-  ASSERT_EQ(weight_grad.dims().size(), 2);
-  ASSERT_EQ(weight_grad.dims()[0], 16);
-  ASSERT_EQ(weight_grad.numel(), 48);
-  ASSERT_EQ(weight_grad.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(weight_grad.layout(), phi::DataLayout::NCHW);
-}
-
-TEST(API, sparse_weight_embedding_sparse_grad) {
-  auto x = paddle::experimental::empty({4}, DataType::INT32);
-  auto* x_data = x.data<int32_t>();
-  x_data[0] = 0;
-  x_data[1] = 4;
-  x_data[2] = 3;
-  x_data[3] = 1;
-
-  auto weight_sr = std::make_shared<phi::SelectedRows>(
-      std::vector<int64_t>{0, 1, 2, 3, 4, 5, 6}, 16);
-  *weight_sr->mutable_value() = *static_cast<phi::DenseTensor*>(
-      paddle::experimental::full({7, 3}, 2, DataType::FLOAT32).impl().get());
-  paddle::experimental::Tensor weight;
-  weight.set_impl(weight_sr);
-
-  auto out_grad = paddle::experimental::full({4, 3}, 1, DataType::FLOAT32);
-
-  paddle::experimental::Tensor weight_grad;
-
-  paddle::experimental::embedding_grad(
-      x, weight, out_grad, -1, true, &weight_grad);
-
-  // 3. check result
-  ASSERT_EQ(weight_grad.dims().size(), 2);
-  ASSERT_EQ(weight_grad.dims()[0], 4);
-  ASSERT_EQ(weight_grad.numel(), 12);
-  ASSERT_EQ(weight_grad.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(weight_grad.layout(), phi::DataLayout::NCHW);
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_empty_api.cc
+++ b/paddle/phi/tests/api/test_empty_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(empty, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, empty_like) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 2}),
-                           phi::DataLayout::NCHW));
-
-  paddle::experimental::Tensor x(dense_x);
-
-  // 2. test API
-  auto out = paddle::experimental::empty_like(x, phi::DataType::FLOAT32);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-}
-
-TEST(API, empty1) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  auto dense_shape = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::INT64, phi::make_ddim({2}), phi::DataLayout::NCHW));
-  auto* shape_data =
-      dense_shape->mutable_data<int64_t>(paddle::platform::CPUPlace());
-  shape_data[0] = 2;
-  shape_data[1] = 3;
-
-  paddle::experimental::Tensor tensor_shape(dense_shape);
-
-  // 2. test API
-  auto out = paddle::experimental::empty(tensor_shape, phi::DataType::FLOAT32);
-
-  // 3. check result
-  ASSERT_EQ(out.shape().size(), 2UL);
-  ASSERT_EQ(out.shape()[0], 2);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-}
-
-TEST(API, empty2) {
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  auto dense_scalar = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::INT32, phi::make_ddim({1}), phi::DataLayout::NCHW));
-  dense_scalar->mutable_data<int32_t>(paddle::platform::CPUPlace())[0] = 2;
-
-  paddle::experimental::Tensor shape_scalar1(dense_scalar);
-  paddle::experimental::Tensor shape_scalar2(dense_scalar);
-  std::vector<paddle::experimental::Tensor> list_shape{shape_scalar1,
-                                                       shape_scalar2};
-
-  auto out = paddle::experimental::empty(list_shape, phi::DataType::FLOAT32);
-
-  ASSERT_EQ(out.shape().size(), 2UL);
-  ASSERT_EQ(out.shape()[0], 2);
-  ASSERT_EQ(out.numel(), 4);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-}
-
-TEST(API, empty3) {
-  std::vector<int64_t> vector_shape{2, 3};
-
-  auto out = paddle::experimental::empty(vector_shape, phi::DataType::INT32);
-
-  ASSERT_EQ(out.shape().size(), 2UL);
-  ASSERT_EQ(out.shape()[0], 2);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.type(), phi::DataType::INT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_fill_api.cc
+++ b/paddle/phi/tests/api/test_fill_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, full_like) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 2}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-  dense_x_data[0] = 0;
-
-  float val = 1.0;
-
-  paddle::experimental::Tensor x(dense_x);
-
-  // 2. test API
-  auto out = paddle::experimental::full_like(x, val, phi::DataType::FLOAT32);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto* actual_result = dense_out->data<float>();
-  for (auto i = 0; i < 6; i++) {
-    ASSERT_NEAR(actual_result[i], val, 1e-6f);
-  }
-}
-
-TEST(API, zeros_like) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 2}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-  dense_x_data[0] = 1;
-
-  paddle::experimental::Tensor x(dense_x);
-
-  // 2. test API
-  auto out = paddle::experimental::zeros_like(x, phi::DataType::INT32);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::INT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto* actual_result = dense_out->data<int32_t>();
-  for (auto i = 0; i < 6; i++) {
-    ASSERT_EQ(actual_result[i], 0);
-  }
-}
-
-TEST(API, ones_like) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::INT32, phi::make_ddim({3, 2}), phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<int32_t>(paddle::platform::CPUPlace());
-  dense_x_data[0] = 0;
-
-  paddle::experimental::Tensor x(dense_x);
-
-  // 2. test API
-  auto out = paddle::experimental::ones_like(x, phi::DataType::INT32);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::INT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto* actual_result = dense_out->data<int32_t>();
-  for (auto i = 0; i < 6; i++) {
-    ASSERT_EQ(actual_result[i], 1);
-  }
-}
-
-TEST(API, full1) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  auto dense_shape = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::INT64, phi::make_ddim({2}), phi::DataLayout::NCHW));
-  auto* shape_data =
-      dense_shape->mutable_data<int64_t>(paddle::platform::CPUPlace());
-  shape_data[0] = 2;
-  shape_data[1] = 3;
-
-  auto dense_scalar = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, phi::make_ddim({1}), phi::DataLayout::NCHW));
-  dense_scalar->mutable_data<float>(paddle::platform::CPUPlace())[0] = 1.0;
-
-  paddle::experimental::Tensor value(dense_scalar);
-
-  paddle::experimental::Tensor tensor_shape(dense_shape);
-
-  float val = 1.0;
-
-  // 2. test API
-  auto out =
-      paddle::experimental::full(tensor_shape, value, phi::DataType::FLOAT32);
-
-  // 3. check result
-  ASSERT_EQ(out.shape().size(), 2UL);
-  ASSERT_EQ(out.shape()[0], 2);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto* actual_result = dense_out->data<float>();
-  for (auto i = 0; i < 6; i++) {
-    ASSERT_NEAR(actual_result[i], val, 1e-6f);
-  }
-}
-
-TEST(API, full2) {
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  auto dense_scalar = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::INT32, phi::make_ddim({1}), phi::DataLayout::NCHW));
-  dense_scalar->mutable_data<int>(paddle::platform::CPUPlace())[0] = 2;
-
-  paddle::experimental::Tensor shape_scalar1(dense_scalar);
-  paddle::experimental::Tensor shape_scalar2(dense_scalar);
-  std::vector<paddle::experimental::Tensor> list_shape{shape_scalar1,
-                                                       shape_scalar2};
-
-  float val = 1.0;
-
-  auto out =
-      paddle::experimental::full(list_shape, val, phi::DataType::FLOAT32);
-
-  ASSERT_EQ(out.shape().size(), 2UL);
-  ASSERT_EQ(out.shape()[0], 2);
-  ASSERT_EQ(out.numel(), 4);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto* actual_result = dense_out->data<float>();
-  for (auto i = 0; i < 4; i++) {
-    ASSERT_NEAR(actual_result[i], val, 1e-6f);
-  }
-}
-
-TEST(API, full3) {
-  std::vector<int64_t> vector_shape{2, 3};
-
-  float val = 1.0;
-
-  auto out =
-      paddle::experimental::full(vector_shape, val, phi::DataType::INT32);
-
-  ASSERT_EQ(out.shape().size(), 2UL);
-  ASSERT_EQ(out.shape()[0], 2);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::INT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto* actual_result = dense_out->data<int>();
-  for (auto i = 0; i < 6; i++) {
-    ASSERT_EQ(actual_result[i], 1);
-  }
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_matmul_api.cc
+++ b/paddle/phi/tests/api/test_matmul_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/backward/backward_api.h"
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/tensor_utils.h"
-
-// See Note [ Why still include the fluid headers? ]
-#include "paddle/fluid/platform/device_context.h"
-
-PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(matmul_double_grad, CPU, ALL_LAYOUT);
-
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
-#endif
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(API, matmul_cpu) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 3}),
-                           phi::DataLayout::NCHW));
-
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  auto dense_y = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 3}),
-                           phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y->mutable_data<float>(paddle::platform::CPUPlace());
-
-  for (size_t i = 0; i < 9; ++i) {
-    dense_x_data[i] = 1.0;
-    dense_y_data[i] = 2.0;
-  }
-  std::vector<float> sum(9, 6.0);
-
-  paddle::experimental::Tensor x(dense_x);
-  paddle::experimental::Tensor y(dense_y);
-
-  // 2. test API
-  auto out = paddle::experimental::matmul(x, y, false, false);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.dims()[1], 3);
-  ASSERT_EQ(out.numel(), 9);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-
-  for (size_t i = 0; i < 9; i++) {
-    ASSERT_NEAR(sum[i], dense_out->data<float>()[i], 1e-6f);
-  }
-}
-
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-TEST(API, matmul_cuda) {
-  // Prepare CPU Dense Tensor
-  const auto alloc_cpu =
-      std::make_unique<paddle::experimental::DefaultAllocator>(
-          paddle::platform::CPUPlace());
-  auto ref_x = std::make_shared<phi::DenseTensor>(
-      alloc_cpu.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 3}),
-                           phi::DataLayout::NCHW));
-
-  auto* ref_x_data = ref_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  auto ref_y = std::make_shared<phi::DenseTensor>(
-      alloc_cpu.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 3}),
-                           phi::DataLayout::NCHW));
-  auto* ref_y_data = ref_y->mutable_data<float>(paddle::platform::CPUPlace());
-
-  for (size_t i = 0; i < 9; ++i) {
-    ref_x_data[i] = 1.0;
-    ref_y_data[i] = 2.0;
-  }
-  std::vector<float> sum(9, 6.0);
-
-  // 1. create tensor
-  const auto alloc_cuda =
-      std::make_unique<paddle::experimental::DefaultAllocator>(
-          paddle::platform::CUDAPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc_cuda.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 3}),
-                           phi::DataLayout::NCHW));
-
-  auto dense_y = std::make_shared<phi::DenseTensor>(
-      alloc_cuda.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 3}),
-                           phi::DataLayout::NCHW));
-
-  auto& pool = paddle::platform::DeviceContextPool::Instance();
-  auto place = paddle::platform::CUDAPlace();
-  auto* dev_ctx = static_cast<const phi::GPUContext*>(pool.GetByPlace(place));
-
-  phi::Copy(*dev_ctx, *ref_x.get(), phi::GPUPlace(), false, dense_x.get());
-  phi::Copy(*dev_ctx, *ref_y.get(), phi::GPUPlace(), false, dense_y.get());
-
-  paddle::experimental::Tensor x(dense_x);
-  paddle::experimental::Tensor y(dense_y);
-
-  // 2. test API
-  auto out = paddle::experimental::matmul(x, y, false, false);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.dims()[1], 3);
-  ASSERT_EQ(out.numel(), 9);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-
-  auto ref_out = std::make_shared<phi::DenseTensor>(
-      alloc_cpu.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, out.dims(), phi::DataLayout::NCHW));
-
-  phi::Copy(*dev_ctx, *dense_out.get(), phi::CPUPlace(), false, ref_out.get());
-
-  for (size_t i = 0; i < 9; i++) {
-    ASSERT_NEAR(sum[i], ref_out->data<float>()[i], 1e-6f);
-  }
-}
-
-#endif
-
-TEST(API, matmul_double_grad) {
-  // 1. create tensor
-  auto x = paddle::experimental::full({3, 3}, 1.0);
-  auto y = paddle::experimental::full({3, 3}, 2.0);
-  auto out_grad = paddle::experimental::full({3, 3}, 2.0);
-  auto dx_grad = paddle::experimental::full({3, 3}, 2.0);
-
-  // 2. test API
-  std::vector<std::vector<paddle::experimental::Tensor>> out(
-      3, std::vector<paddle::experimental::Tensor>(1));
-  paddle::experimental::matmul_double_grad(x,
-                                           y,
-                                           out_grad,
-                                           dx_grad,
-                                           {},
-                                           false,
-                                           false,
-                                           &out[0][0],
-                                           &out[1][0],
-                                           &out[2][0]);
-
-  // 3. check result
-  ASSERT_EQ(out.size(), 3UL);
-  ASSERT_EQ(out[0].size(), 1UL);
-  ASSERT_EQ(out[1].size(), 1UL);
-  ASSERT_EQ(out[2].size(), 1UL);
-  ASSERT_EQ(out[0][0].dims()[1], 3);
-  ASSERT_EQ(out[0][0].numel(), 9);
-  ASSERT_EQ(out[1][0].numel(), 9);
-  ASSERT_EQ(out[2][0].numel(), 9);
-  ASSERT_EQ(out[0][0].type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out[0][0].layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out[1][0].initialized(), true);
-  ASSERT_EQ(out[2][0].initialized(), true);
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_mean_api.cc
+++ b/paddle/phi/tests/api/test_mean_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, mean) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 4}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sum = 0.0;
-  for (size_t i = 0; i < 12; ++i) {
-    dense_x_data[i] = i * 1.0;
-    sum += i * 1.0;
-  }
-
-  paddle::experimental::Tensor x(dense_x);
-  std::vector<int64_t> axis = {0, 1};
-
-  // 2. test API
-  auto out = paddle::experimental::mean(x, axis, false);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 1);
-  ASSERT_EQ(out.dims()[0], 1);
-  ASSERT_EQ(out.numel(), 1);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto expect_result = sum / 12;
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto actual_result = dense_out->data<float>()[0];
-  ASSERT_NEAR(expect_result, actual_result, 1e-6f);
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_reshape_api.cc
+++ b/paddle/phi/tests/api/test_reshape_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(reshape, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, reshape) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 2, 2, 3}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  for (int i = 0; i < dense_x->numel(); i++) {
-    dense_x_data[i] = i;
-  }
-
-  paddle::experimental::Tensor x(dense_x);
-  std::vector<int64_t> shape{12, 3};
-  // 2. test API
-  auto out = paddle::experimental::reshape(x, shape);
-  // 3. check result
-  std::vector<int64_t> expect_shape = {12, 3};
-  ASSERT_EQ(out.shape()[0], expect_shape[0]);
-  ASSERT_EQ(out.shape()[1], expect_shape[1]);
-  ASSERT_EQ(out.numel(), 36);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-  bool value_equal = true;
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto* dense_out_data = dense_out->data<float>();
-  for (int i = 0; i < dense_x->numel(); i++) {
-    if (std::abs(dense_x_data[i] - dense_out_data[i]) > 1e-6f)
-      value_equal = false;
-  }
-  ASSERT_EQ(value_equal, true);
-}
-
-TEST(API, reshape_) {
-  // 1. create tensor
-  auto x = paddle::experimental::full(
-      {3, 2, 2, 3}, 1.0, experimental::DataType::FLOAT32);
-
-  // 2. test API
-  paddle::experimental::Tensor out = paddle::experimental::reshape_(x, {12, 3});
-  // 3. check result
-  std::vector<int64_t> expect_shape = {12, 3};
-  ASSERT_EQ(out.shape()[0], expect_shape[0]);
-  ASSERT_EQ(out.shape()[1], expect_shape[1]);
-  ASSERT_EQ(out.numel(), 36);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-  ASSERT_EQ(out.data<float>(), x.data<float>());
-}
-
-TEST(Tensor, old_reshape) {
-  paddle::experimental::Tensor x(paddle::PlaceType::kCPU);
-  x.reshape({3, 4});
-  x.mutable_data<float>(paddle::PlaceType::kCPU);
-
-  ASSERT_EQ(x.shape()[0], 3);
-  ASSERT_EQ(x.shape()[1], 4);
-  ASSERT_EQ(x.numel(), 12);
-  ASSERT_EQ(x.is_cpu(), true);
-  ASSERT_EQ(x.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(x.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(x.initialized(), true);
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_scale_api.cc
+++ b/paddle/phi/tests/api/test_scale_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/selected_rows.h"
-
-PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT);
-PD_DECLARE_KERNEL(scale_sr, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-void CheckScaleResult(const experimental::Tensor* out) {
-  ASSERT_EQ(out->dims().size(), 2);
-  ASSERT_EQ(out->dims()[0], 3);
-  ASSERT_EQ(out->dims()[1], 4);
-  ASSERT_EQ(out->numel(), 12);
-  ASSERT_EQ(out->is_cpu(), true);
-  ASSERT_EQ(out->type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out->layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out->initialized(), true);
-  for (int64_t i = 0; i < out->numel(); ++i) {
-    ASSERT_NEAR(3.0, out->data<float>()[i], 1e-6f);
-  }
-}
-
-TEST(API, scale) {
-  // 1. check `scale` is float value
-  auto x = experimental::full({3, 4}, 1.0, phi::DataType::FLOAT32);
-  auto out1 = experimental::scale(x, 2.0, 1.0, true);
-  CheckScaleResult(&out1);
-
-  // 2. check `scale` is Tensor with shape [1]
-  auto scale = experimental::full({1}, 2.0, phi::DataType::FLOAT32);
-  auto out2 = experimental::scale(x, scale, 1.0, true);
-  CheckScaleResult(&out2);
-}
-
-TEST(API, scale_sr) {
-  // 1. check `scale` is float value
-  std::vector<int64_t> rows{0, 4, 7};
-  int64_t height = 10;
-  auto selected_rows = std::make_shared<phi::SelectedRows>(rows, height);
-  auto dense_tensor = std::dynamic_pointer_cast<phi::DenseTensor>(
-      experimental::full({3, 4}, 1.0, phi::DataType::FLOAT32).impl());
-  *(selected_rows->mutable_value()) = *dense_tensor;
-  experimental::Tensor x(selected_rows);
-  auto out = experimental::scale(x, 2.0, 1.0, true);
-
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.dims()[1], 4);
-  ASSERT_EQ(out.numel(), 12);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-  for (int64_t i = 0; i < out.numel(); ++i) {
-    ASSERT_NEAR(3.0, out.data<float>()[i], 1e-6f);
-  }
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_sparse_conv_api.cc
+++ b/paddle/phi/tests/api/test_sparse_conv_api.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See
-the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/include/sparse_api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/sparse_coo_tensor.h"
-
-PD_DECLARE_KERNEL(conv3d_coo, CPU, ALL_LAYOUT);
-
-template <typename T>
-void TestConv3dBase(const std::vector<int>& indices,
-                    const std::vector<T>& features,
-                    const phi::DDim& x_dims,
-                    const std::vector<T>& kernel,
-                    const phi::DDim& kernel_dims,
-                    const std::vector<int>& correct_out_indices,
-                    const std::vector<T>& correct_out_features,
-                    const phi::DDim& correct_out_dims,
-                    const int non_zero_num,
-                    const std::vector<int>& paddings,
-                    const std::vector<int>& strides,
-                    const std::vector<int>& dilations,
-                    const float diff = 1e-3) {
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  const int in_channels = kernel_dims[3];
-  const int out_channels = kernel_dims[4];
-
-  phi::DenseTensor indices_tensor(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::INT32, {4, non_zero_num}, phi::DataLayout::NCHW));
-  memcpy(
-      indices_tensor.data<int>(), indices.data(), indices.size() * sizeof(int));
-
-  phi::DenseTensor features_tensor(
-      alloc.get(),
-      phi::DenseTensorMeta(paddle::experimental::CppTypeToDataType<T>::Type(),
-                           {non_zero_num, in_channels},
-                           phi::DataLayout::NHWC));
-  memcpy(
-      features_tensor.data<T>(), features.data(), features.size() * sizeof(T));
-
-  auto x_tensor = std::make_shared<phi::SparseCooTensor>(
-      indices_tensor, features_tensor, x_dims);
-  paddle::experimental::Tensor x(x_tensor);
-
-  auto kernel_tensor = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(paddle::experimental::CppTypeToDataType<T>::Type(),
-                           kernel_dims,
-                           phi::DataLayout::NHWC));
-  paddle::experimental::Tensor weight(kernel_tensor);
-
-  memcpy(kernel_tensor->mutable_data<T>(paddle::platform::CPUPlace()),
-         kernel.data(),
-         kernel.size() * sizeof(T));
-
-  if (!std::is_same<T, phi::dtype::float16>::value) {
-    auto tensor_out = paddle::experimental::sparse::conv3d(
-        x, weight, paddings, dilations, strides, 1, false, "Conv3d");
-
-    auto out =
-        std::dynamic_pointer_cast<phi::SparseCooTensor>(tensor_out.impl());
-    ASSERT_EQ(correct_out_dims.size(), out->dims().size());
-    for (int i = 0; i < correct_out_dims.size(); i++) {
-      ASSERT_EQ(correct_out_dims[i], out->dims()[i]);
-    }
-    ASSERT_EQ((int64_t)correct_out_features.size() / out_channels, out->nnz());
-
-    int cmp_indices = memcmp(correct_out_indices.data(),
-                             out->non_zero_indices().data<int>(),
-                             correct_out_indices.size() * sizeof(int));
-    ASSERT_EQ(cmp_indices, 0);
-
-    for (uint64_t i = 0; i < correct_out_features.size(); i++) {
-      float tmp = std::fabs(static_cast<float>(
-          correct_out_features[i] - out->non_zero_elements().data<T>()[i]));
-      ASSERT_LT(tmp, diff);
-    }
-  }
-}
-
-void TestConv3d(const std::vector<int>& indices,
-                const std::vector<float>& features,
-                const phi::DDim& x_dims,
-                const std::vector<float>& kernel,
-                const phi::DDim& kernel_dims,
-                const std::vector<int>& correct_out_indices,
-                const std::vector<float>& correct_out_features,
-                const phi::DDim& correct_out_dims,
-                const int non_zero_num,
-                const std::vector<int>& paddings,
-                const std::vector<int>& strides,
-                const std::vector<int>& dilations) {
-  // test float
-  TestConv3dBase<float>(indices,
-                        features,
-                        x_dims,
-                        kernel,
-                        kernel_dims,
-                        correct_out_indices,
-                        correct_out_features,
-                        correct_out_dims,
-                        non_zero_num,
-                        paddings,
-                        strides,
-                        dilations);
-}
-
-TEST(API, sparse_conv2d) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  const int in_channels = 1;
-  const int out_channels = 1;
-  phi::DDim x_dims = {1, 1, 5, 5, in_channels};
-  phi::DDim kernel_dims = {1, 3, 3, in_channels, out_channels};
-  phi::DDim out_dims = {1, 1, 3, 3, out_channels};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 3;
-  std::vector<int> indices_flatten = {0, 0, 0, 0, 0, 0, 0, 4, 0, 3, 2, 4};
-
-  std::vector<float> features = {-0.79394531, -0.3125, -0.55029297};
-  // 3*3*3=27
-  std::vector<float> kernel = {0.65820312,
-                               0.75048828,
-                               0.21411133,
-                               0.17370605,
-                               0.85546875,
-                               0.53076172,
-                               0.28833008,
-                               0.71044922,
-                               0.00659943};
-
-  std::vector<int> out_indices_flatten = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                          0, 0, 2, 2, 2, 1, 2, 0, 1, 2};
-
-  std::vector<float> out_features = {
-      -0.17004, -0.71338, -0.00206, -0.22205, -0.09009};
-
-  TestConv3d(indices_flatten,
-             features,
-             x_dims,
-             kernel,
-             kernel_dims,
-             out_indices_flatten,
-             out_features,
-             out_dims,
-             non_zero_num,
-             paddings,
-             strides,
-             dilations);
-}
--- a/paddle/phi/tests/api/test_sparse_utils_api.cc
+++ b/paddle/phi/tests/api/test_sparse_utils_api.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See
-the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/include/sparse_api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/sparse_coo_tensor.h"
-
-PD_DECLARE_KERNEL(dense_to_coo, CPU, ALL_LAYOUT);
-
-TEST(API, to_sparse_coo) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 3}),
-                           phi::DataLayout::NCHW));
-
-  phi::CPUPlace cpu;
-  const int64_t sparse_dim = 2;
-  auto* dense_x_data = dense_x->mutable_data<float>(cpu);
-  float dense_data[3][3] = {{0.0, 1.0, 0.0}, {2.0, 0.0, 3.0}, {3.2, 0.0, 0.0}};
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> indices_data = {0, 1, 1, 2, 1, 0, 2, 0};
-  std::vector<int64_t> cols_data = {1, 0, 2, 0};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4};
-  const int64_t non_zero_num = 4;
-
-  std::copy(&dense_data[0][0], &dense_data[0][0] + 9, dense_x_data);
-
-  phi::CPUContext dev_ctx_cpu;
-
-  // 1. test dense_to_sparse_coo
-  paddle::experimental::Tensor x(dense_x);
-  auto out = paddle::experimental::sparse::to_sparse_coo(x, sparse_dim);
-  auto coo = std::dynamic_pointer_cast<phi::SparseCooTensor>(out.impl());
-  ASSERT_EQ(coo->nnz(), non_zero_num);
-  int cmp_indices = memcmp(coo->non_zero_indices().data<int64_t>(),
-                           indices_data.data(),
-                           indices_data.size() * sizeof(int64_t));
-  ASSERT_EQ(cmp_indices, 0);
-  int cmp_elements = memcmp(coo->non_zero_elements().data<float>(),
-                            non_zero_data.data(),
-                            non_zero_data.size() * sizeof(float));
-  ASSERT_EQ(cmp_elements, 0);
-
-  // 1. test sparse_csr_to_coo
-  auto dense_dims = phi::make_ddim({3, 3});
-  phi::DenseTensorMeta crows_meta(
-      phi::DataType::INT64, {dense_dims[0] + 1}, phi::DataLayout::NCHW);
-  phi::DenseTensorMeta cols_meta(
-      phi::DataType::INT64, {non_zero_num}, phi::DataLayout::NCHW);
-  phi::DenseTensorMeta values_meta(
-      phi::DataType::FLOAT32, {non_zero_num}, phi::DataLayout::NCHW);
-
-  phi::CPUPlace place;
-  phi::DenseTensor crows(alloc.get(), crows_meta);
-  phi::DenseTensor cols(alloc.get(), cols_meta);
-  phi::DenseTensor values(alloc.get(), values_meta);
-  memcpy(crows.mutable_data<int64_t>(place),
-         crows_data.data(),
-         crows_data.size() * sizeof(int64_t));
-  memcpy(cols.mutable_data<int64_t>(place),
-         cols_data.data(),
-         cols_data.size() * sizeof(int64_t));
-  memcpy(values.mutable_data<float>(place),
-         non_zero_data.data(),
-         non_zero_data.size() * sizeof(float));
-  auto csr =
-      std::make_shared<phi::SparseCsrTensor>(crows, cols, values, dense_dims);
-  paddle::experimental::Tensor csr_x(csr);
-  auto out2 = paddle::experimental::sparse::to_sparse_coo(csr_x, sparse_dim);
-
-  auto coo2 = std::dynamic_pointer_cast<phi::SparseCooTensor>(out.impl());
-  ASSERT_EQ(coo2->nnz(), non_zero_num);
-  int cmp_indices2 = memcmp(coo2->non_zero_indices().data<int64_t>(),
-                            indices_data.data(),
-                            indices_data.size() * sizeof(int64_t));
-  ASSERT_EQ(cmp_indices2, 0);
-  int cmp_elements2 = memcmp(coo2->non_zero_elements().data<float>(),
-                             non_zero_data.data(),
-                             non_zero_data.size() * sizeof(float));
-  ASSERT_EQ(cmp_elements2, 0);
-}
-
-TEST(API, to_sparse_csr) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 3}),
-                           phi::DataLayout::NCHW));
-
-  phi::CPUPlace cpu;
-  const int64_t sparse_dim = 2;
-  auto* dense_x_data = dense_x->mutable_data<float>(cpu);
-  float dense_data[3][3] = {{0.0, 1.0, 0.0}, {2.0, 0.0, 3.0}, {3.2, 0.0, 0.0}};
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> indices_data = {0, 1, 1, 2, 1, 0, 2, 0};
-  std::vector<int64_t> cols_data = {1, 0, 2, 0};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4};
-  const int64_t non_zero_num = 4;
-
-  std::copy(&dense_data[0][0], &dense_data[0][0] + 9, dense_x_data);
-
-  phi::CPUContext dev_ctx_cpu;
-
-  // 1. test dense_to_sparse_csr
-  paddle::experimental::Tensor x(dense_x);
-  auto out = paddle::experimental::sparse::to_sparse_csr(x);
-  auto csr = std::dynamic_pointer_cast<phi::SparseCsrTensor>(out.impl());
-  auto check = [&](const phi::SparseCsrTensor& csr) {
-    ASSERT_EQ(csr.non_zero_cols().numel(), non_zero_num);
-    int cmp_crows = memcmp(csr.non_zero_crows().data<int64_t>(),
-                           crows_data.data(),
-                           crows_data.size() * sizeof(int64_t));
-    ASSERT_EQ(cmp_crows, 0);
-    int cmp_cols = memcmp(csr.non_zero_cols().data<int64_t>(),
-                          cols_data.data(),
-                          cols_data.size() * sizeof(int64_t));
-    ASSERT_EQ(cmp_cols, 0);
-    int cmp_elements = memcmp(csr.non_zero_elements().data<float>(),
-                              non_zero_data.data(),
-                              non_zero_data.size() * sizeof(float));
-    ASSERT_EQ(cmp_elements, 0);
-  };
-  check(*csr);
-
-  // 1. test sparse_coo_to_csr
-  auto dense_dims = phi::make_ddim({3, 3});
-  phi::DenseTensorMeta indices_meta(
-      phi::DataType::INT64, {sparse_dim, non_zero_num}, phi::DataLayout::NCHW);
-  phi::DenseTensorMeta values_meta(
-      phi::DataType::FLOAT32, {non_zero_num}, phi::DataLayout::NCHW);
-
-  phi::CPUPlace place;
-  phi::DenseTensor indices(alloc.get(), indices_meta);
-  phi::DenseTensor values(alloc.get(), values_meta);
-  memcpy(indices.mutable_data<int64_t>(place),
-         indices_data.data(),
-         indices_data.size() * sizeof(int64_t));
-  memcpy(values.mutable_data<float>(place),
-         non_zero_data.data(),
-         non_zero_data.size() * sizeof(float));
-  auto coo =
-      std::make_shared<phi::SparseCooTensor>(indices, values, dense_dims);
-  paddle::experimental::Tensor coo_x(coo);
-  auto out2 = paddle::experimental::sparse::to_sparse_csr(coo_x);
-
-  auto csr2 = std::dynamic_pointer_cast<phi::SparseCsrTensor>(out.impl());
-  check(*csr2);
-}
-
-TEST(API, to_dense) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  phi::CPUPlace cpu;
-  const int64_t sparse_dim = 2;
-  float dense_data[3][3] = {{0.0, 1.0, 0.0}, {2.0, 0.0, 3.0}, {3.2, 0.0, 0.0}};
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> indices_data = {0, 1, 1, 2, 1, 0, 2, 0};
-  std::vector<int64_t> cols_data = {1, 0, 2, 0};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4};
-  const int64_t non_zero_num = 4;
-  auto dense_dims = phi::make_ddim({3, 3});
-
-  phi::CPUContext dev_ctx_cpu;
-
-  // 1. test sparse_coo_to_dense
-  phi::DenseTensorMeta indices_meta(
-      phi::DataType::INT64, {sparse_dim, non_zero_num}, phi::DataLayout::NCHW);
-  phi::DenseTensorMeta values_meta(
-      phi::DataType::FLOAT32, {non_zero_num}, phi::DataLayout::NCHW);
-
-  phi::CPUPlace place;
-  phi::DenseTensor indices(alloc.get(), indices_meta);
-  phi::DenseTensor values(alloc.get(), values_meta);
-  memcpy(indices.mutable_data<int64_t>(place),
-         indices_data.data(),
-         indices_data.size() * sizeof(int64_t));
-  memcpy(values.mutable_data<float>(place),
-         non_zero_data.data(),
-         non_zero_data.size() * sizeof(float));
-  auto coo =
-      std::make_shared<phi::SparseCooTensor>(indices, values, dense_dims);
-
-  paddle::experimental::Tensor coo_x(coo);
-  auto out = paddle::experimental::sparse::to_dense(coo_x);
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  int cmp1 =
-      memcmp(dense_out->data<float>(), &dense_data[0][0], 9 * sizeof(float));
-  ASSERT_EQ(cmp1, 0);
-
-  // 1. test sparse_csr_to_dense
-  phi::DenseTensorMeta crows_meta(
-      phi::DataType::INT64, {dense_dims[0] + 1}, phi::DataLayout::NCHW);
-  phi::DenseTensorMeta cols_meta(
-      phi::DataType::INT64, {non_zero_num}, phi::DataLayout::NCHW);
-  phi::DenseTensor crows(alloc.get(), crows_meta);
-  phi::DenseTensor cols(alloc.get(), cols_meta);
-  memcpy(crows.mutable_data<int64_t>(place),
-         crows_data.data(),
-         crows_data.size() * sizeof(int64_t));
-  memcpy(cols.mutable_data<int64_t>(place),
-         cols_data.data(),
-         cols_data.size() * sizeof(int64_t));
-  memcpy(values.mutable_data<float>(place),
-         non_zero_data.data(),
-         non_zero_data.size() * sizeof(float));
-  auto csr =
-      std::make_shared<phi::SparseCsrTensor>(crows, cols, values, dense_dims);
-  paddle::experimental::Tensor csr_x(csr);
-  auto out2 = paddle::experimental::sparse::to_dense(csr_x);
-
-  auto dense_out2 = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  int cmp2 =
-      memcmp(dense_out2->data<float>(), &dense_data[0][0], 9 * sizeof(float));
-  ASSERT_EQ(cmp2, 0);
-}
--- a/paddle/phi/tests/api/test_split_api.cc
+++ b/paddle/phi/tests/api/test_split_api.cc
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(split, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chentianyu03): Remove this test after the API is used in the dygraph
-TEST(API, split) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({4, 10}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  for (size_t i = 0; i < 4; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-    }
-  }
-
-  paddle::experimental::Tensor x(dense_x);
-
-  // 2. test API
-  auto out = paddle::experimental::split(x, {2, 2}, 0);
-
-  // 3. check result
-  ASSERT_EQ(out.size(), static_cast<size_t>(2));
-  ASSERT_EQ(out[0].dims().size(), 2);
-  ASSERT_EQ(out[0].dims()[0], 2);
-  ASSERT_EQ(out[0].dims()[1], 10);
-  ASSERT_EQ(out[0].type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out[0].layout(), phi::DataLayout::NCHW);
-
-  ASSERT_EQ(out[1].dims().size(), 2);
-  ASSERT_EQ(out[1].dims()[0], 2);
-  ASSERT_EQ(out[1].dims()[1], 10);
-  ASSERT_EQ(out[1].type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out[1].layout(), phi::DataLayout::NCHW);
-
-  auto out_data_0 =
-      std::dynamic_pointer_cast<phi::DenseTensor>(out[0].impl())->data<float>();
-  auto out_data_1 =
-      std::dynamic_pointer_cast<phi::DenseTensor>(out[1].impl())->data<float>();
-  for (size_t i = 0; i < 4; ++i) {
-    if (i < 20) {
-      ASSERT_NEAR(dense_x_data[i], out_data_0[i], 1e-6);
-    } else {
-      ASSERT_NEAR(dense_x_data[i], out_data_1[i - 20], 1e-6);
-    }
-  }
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/api/test_sum_api.cc
+++ b/paddle/phi/tests/api/test_sum_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/phi/api/include/api.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-
-PD_DECLARE_KERNEL(sum, CPU, ALL_LAYOUT);
-
-namespace paddle {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(API, sum) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_x = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({3, 4}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sum = 0.0;
-  for (size_t i = 0; i < 12; ++i) {
-    dense_x_data[i] = i * 1.0;
-    sum += i * 1.0;
-  }
-
-  paddle::experimental::Tensor x(dense_x);
-
-  std::vector<int64_t> axis = {0, 1};
-
-  // 2. test API
-  auto out = paddle::experimental::sum(x, axis, DataType::UNDEFINED, false);
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 1);
-  ASSERT_EQ(out.dims()[0], 1);
-  ASSERT_EQ(out.numel(), 1);
-  ASSERT_EQ(out.is_cpu(), true);
-  ASSERT_EQ(out.type(), phi::DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  auto expect_result = sum;
-  auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(out.impl());
-  auto actual_result = dense_out->data<float>()[0];
-  ASSERT_NEAR(expect_result, actual_result, 1e-6f);
-}
-
-}  // namespace tests
-}  // namespace paddle
--- a/paddle/phi/tests/kernels/CMakeLists.txt
+++ b/paddle/phi/tests/kernels/CMakeLists.txt
-cc_test(
-  test_copy_dev_api
-  SRCS test_copy_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_dot_dev_api
-  SRCS test_dot_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_creation_dev_api
-  SRCS test_creation_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_flatten_dev_api
-  SRCS test_flatten_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_matmul_dev_api
-  SRCS test_matmul_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_mean_dev_api
-  SRCS test_mean_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_scale_dev_api
-  SRCS test_scale_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_cast_dev_api
-  SRCS test_cast_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_elementwise_dev_api
-  SRCS test_elementwise_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_reshape_dev_api
-  SRCS test_reshape_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_sum_dev_api
-  SRCS test_sum_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_conj_dev_api
-  SRCS test_conj_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_concat_dev_api
-  SRCS test_concat_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_split_dev_api
-  SRCS test_split_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_sparse_utils_dev_api
-  SRCS test_sparse_utils_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_sparse_conv3d_dev_api
-  SRCS test_sparse_conv3d_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_sparse_pool_dev_api
-  SRCS test_sparse_pool_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_sparse_activation_dev_api
-  SRCS test_sparse_activation_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_sparse_elementwise_dev_api
-  SRCS test_sparse_elementwise_dev_api.cc
-  DEPS phi phi_api_utils)
-cc_test(
-  test_sparse_transpose_dev_api
-  SRCS test_sparse_transpose_dev_api.cc
-  DEPS phi phi_api_utils)
-
 cc_test(
  test_math_function
  SRCS test_math_function.cc

--- a/paddle/phi/tests/kernels/test_cast_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_cast_dev_api.cc
-
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/common/data_type.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/cast_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, cast) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 4}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sum = 0.0;
-  for (size_t i = 0; i < 12; ++i) {
-    dense_x_data[i] = i * 1.0;
-    sum += i * 1.0;
-  }
-
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-
-  phi::DataType out_dtype = phi::DataType::FLOAT64;
-  // 2. test API
-  auto out = phi::Cast<float>(dev_ctx, dense_x, out_dtype);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.dims()[1], 4);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT64);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  auto actual_result = out.data<double>();
-  for (size_t i = 0; i < 12; ++i) {
-    ASSERT_NEAR(actual_result[i], static_cast<double>(dense_x_data[i]), 1e-6f);
-  }
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_concat_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_concat_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/concat_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, concat) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 10}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  phi::DenseTensor dense_y(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 10}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y.mutable_data<float>(paddle::platform::CPUPlace());
-
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      dense_y_data[i * 10 + j] = (i * 10 + j) * 1.0;
-    }
-  }
-
-  std::vector<const phi::DenseTensor*> inputs = {&dense_x, &dense_y};
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto out = phi::Concat<float>(dev_ctx, inputs, 0);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 6);
-  ASSERT_EQ(out.dims()[1], 10);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  auto out_data = out.data<float>();
-
-  for (size_t i = 0; i < 60; ++i) {
-    if (i < 30) {
-      ASSERT_NEAR(dense_x_data[i], out_data[i], 1e-6f);
-    } else {
-      ASSERT_NEAR(dense_y_data[i - 30], out_data[i], 1e-6f);
-    }
-  }
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_conj_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_conj_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/complex_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, conj) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::COMPLEX64,
-                                                phi::make_ddim({3, 4}),
-                                                phi::DataLayout::NCHW));
-
-  auto* dense_x_data =
-      dense_x.mutable_data<paddle::complex64>(paddle::platform::CPUPlace());
-  for (size_t i = 0; i < 12; ++i) {
-    dense_x_data[i] = paddle::complex64(i * 1.0, i * 1.0);
-  }
-
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-
-  // 2. test API
-  auto out = phi::Conj<paddle::complex64>(dev_ctx, dense_x);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.numel(), 12);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::COMPLEX64);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  auto actual_result = out.data<paddle::complex64>();
-
-  for (size_t i = 0; i < 12; ++i) {
-    ASSERT_NEAR(i * 1.0, actual_result[i].real, 1e-6f);
-    ASSERT_NEAR(i * -1.0, actual_result[i].imag, 1e-6f);
-  }
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_copy_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_copy_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/tensor_utils.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(YuanRisheng): This TEST file need to be refactored after 'copy' realized
-// in 'paddle/api'
-TEST(DEV_API, copy) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  auto dense_src = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({2, 3}),
-                           phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_src->mutable_data<float>(paddle::platform::CPUPlace());
-
-  auto dense_dst = std::make_shared<phi::DenseTensor>(
-      alloc.get(),
-      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                           phi::make_ddim({2, 3}),
-                           phi::DataLayout::NCHW));
-
-  for (size_t i = 0; i < 2; ++i) {
-    for (size_t j = 0; j < 3; ++j) {
-      dense_x_data[i * 3 + j] = (i * 3 + j) * 1.0;
-    }
-  }
-  const auto& a = paddle::platform::CPUPlace();
-  std::cout << typeid(a).name() << std::endl;
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  dev_ctx.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  phi::Copy(
-      dev_ctx, *(dense_src.get()), phi::CPUPlace(), false, dense_dst.get());
-
-  // 3. check result
-  for (int64_t i = 0; i < dense_src->numel(); i++) {
-    ASSERT_EQ(dense_src->data<float>()[i], dense_dst->data<float>()[i]);
-  }
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_creation_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_creation_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/empty_kernel.h"
-#include "paddle/phi/kernels/full_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, empty) {
-  // 1. create input
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-
-  // 2. test API
-  auto out = phi::Empty<int>(dev_ctx, {3, 2});
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::INT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-}
-
-TEST(DEV_API, empty_like) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 2}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-  dense_x_data[0] = 0;
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto out = phi::EmptyLike<float>(dev_ctx, dense_x);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-}
-
-TEST(DEV_API, full) {
-  // 1. create input
-  float val = 1.0;
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto out = phi::Full<float>(dev_ctx, {3, 2}, val);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  auto* actual_result = out.data<float>();
-  for (auto i = 0; i < 6; i++) {
-    ASSERT_NEAR(actual_result[i], val, 1e-6f);
-  }
-}
-
-TEST(DEV_API, full_like) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 2}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-  dense_x_data[0] = 0;
-  float val = 1.0;
-
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-
-  // 2. test API
-  auto out = phi::FullLike<float>(dev_ctx, dense_x, val);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.numel(), 6);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  auto* actual_result = out.data<float>();
-  for (auto i = 0; i < 6; i++) {
-    ASSERT_NEAR(actual_result[i], val, 1e-6f);
-  }
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_dot_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_dot_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/dot_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, dot) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 10}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  phi::DenseTensor dense_y(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 10}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y.mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sum[3] = {0.0, 0.0, 0.0};
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      dense_y_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      sum[i] += (i * 10 + j) * (i * 10 + j) * 1.0;
-    }
-  }
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto out = phi::Dot<float>(dev_ctx, dense_x, dense_y);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  auto expect_result = sum;
-  auto actual_result0 = out.data<float>()[0];
-  auto actual_result1 = out.data<float>()[1];
-  auto actual_result2 = out.data<float>()[2];
-  ASSERT_NEAR(expect_result[0], actual_result0, 1e-6f);
-  ASSERT_NEAR(expect_result[1], actual_result1, 1e-6f);
-  ASSERT_NEAR(expect_result[2], actual_result2, 1e-6f);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_elementwise_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_elementwise_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/elementwise_add_kernel.h"
-#include "paddle/phi/kernels/elementwise_divide_kernel.h"
-#include "paddle/phi/kernels/elementwise_multiply_kernel.h"
-#include "paddle/phi/kernels/elementwise_subtract_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, add) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 10}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  phi::DenseTensor dense_y(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, phi::make_ddim({10}), phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y.mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sum[3][10] = {0.0};
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      sum[i][j] = (i * 10 + j) * 1.0 + j * 2.0;
-    }
-  }
-  for (size_t i = 0; i < 10; ++i) {
-    dense_y_data[i] = i * 2.0;
-  }
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto dense_out = phi::Add<float>(dev_ctx, dense_x, dense_y);
-
-  // 3. check result
-  ASSERT_EQ(dense_out.dims().size(), 2);
-  ASSERT_EQ(dense_out.dims()[0], 3);
-  ASSERT_EQ(dense_out.dtype(), phi::DataType::FLOAT32);
-  ASSERT_EQ(dense_out.layout(), phi::DataLayout::NCHW);
-
-  auto expect_result = sum;
-  auto actual_result0 = dense_out.data<float>()[0];
-  auto actual_result1 = dense_out.data<float>()[1];
-  auto actual_result2 = dense_out.data<float>()[10];
-  ASSERT_NEAR(expect_result[0][0], actual_result0, 1e-6f);
-  ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f);
-  ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f);
-}
-
-TEST(DEV_API, subtract) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 10}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  phi::DenseTensor dense_y(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, phi::make_ddim({10}), phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y.mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sub[3][10] = {0.0};
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      sub[i][j] = (i * 10 + j) * 1.0 - j * 2.0;
-    }
-  }
-  for (size_t i = 0; i < 10; ++i) {
-    dense_y_data[i] = i * 2.0;
-  }
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto dense_out = phi::Subtract<float>(dev_ctx, dense_x, dense_y);
-
-  // 3. check result
-  ASSERT_EQ(dense_out.dims().size(), 2);
-  ASSERT_EQ(dense_out.dims()[0], 3);
-  ASSERT_EQ(dense_out.dtype(), phi::DataType::FLOAT32);
-  ASSERT_EQ(dense_out.meta().layout, phi::DataLayout::NCHW);
-
-  auto expect_result = sub;
-  auto actual_result0 = dense_out.data<float>()[0];
-  auto actual_result1 = dense_out.data<float>()[1];
-  auto actual_result2 = dense_out.data<float>()[10];
-  ASSERT_NEAR(expect_result[0][0], actual_result0, 1e-6f);
-  ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f);
-  ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f);
-}
-
-TEST(DEV_API, divide) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 10}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  phi::DenseTensor dense_y(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, phi::make_ddim({10}), phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y.mutable_data<float>(paddle::platform::CPUPlace());
-
-  float div[3][10] = {0.0};
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      div[i][j] = (i * 10 + j) * 1.0 / (j * 2.0 + 1);
-    }
-  }
-  for (size_t i = 0; i < 10; ++i) {
-    dense_y_data[i] = i * 2.0 + 1;
-  }
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto dense_out = phi::Divide<float>(dev_ctx, dense_x, dense_y);
-
-  // 3. check result
-  ASSERT_EQ(dense_out.dims().size(), 2);
-  ASSERT_EQ(dense_out.dims()[0], 3);
-  ASSERT_EQ(dense_out.dtype(), phi::DataType::FLOAT32);
-  ASSERT_EQ(dense_out.layout(), phi::DataLayout::NCHW);
-
-  auto expect_result = div;
-  auto actual_result0 = dense_out.data<float>()[0];
-  auto actual_result1 = dense_out.data<float>()[1];
-  auto actual_result2 = dense_out.data<float>()[10];
-  ASSERT_NEAR(expect_result[0][0], actual_result0, 1e-6f);
-  ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f);
-  ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f);
-}
-
-TEST(DEV_API, multiply) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 10}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  phi::DenseTensor dense_y(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, phi::make_ddim({10}), phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y.mutable_data<float>(paddle::platform::CPUPlace());
-
-  float mul[3][10] = {0.0};
-  for (size_t i = 0; i < 3; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-      mul[i][j] = (i * 10 + j) * 1.0 * j * 2.0;
-    }
-  }
-  for (size_t i = 0; i < 10; ++i) {
-    dense_y_data[i] = i * 2.0;
-  }
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto dense_out = phi::Multiply<float>(dev_ctx, dense_x, dense_y);
-
-  // 3. check result
-  ASSERT_EQ(dense_out.dims().size(), 2);
-  ASSERT_EQ(dense_out.dims()[0], 3);
-  ASSERT_EQ(dense_out.dtype(), phi::DataType::FLOAT32);
-  ASSERT_EQ(dense_out.layout(), phi::DataLayout::NCHW);
-
-  auto expect_result = mul;
-  auto actual_result0 = dense_out.data<float>()[0];
-  auto actual_result1 = dense_out.data<float>()[1];
-  auto actual_result2 = dense_out.data<float>()[10];
-  ASSERT_NEAR(expect_result[0][0], actual_result0, 1e-6f);
-  ASSERT_NEAR(expect_result[0][1], actual_result1, 1e-6f);
-  ASSERT_NEAR(expect_result[1][0], actual_result2, 1e-6f);
-}
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_flatten_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_flatten_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/flatten_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, flatten) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 2, 2, 3}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  for (int i = 0; i < dense_x.numel(); i++) {
-    dense_x_data[i] = i;
-  }
-  int start_axis = 1, stop_axis = 2;
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  dev_ctx.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-
-  // 2. test API
-  auto out = phi::Flatten<float>(dev_ctx, dense_x, start_axis, stop_axis);
-
-  // 3. check result
-  std::vector<int> expect_shape = {3, 4, 3};
-  ASSERT_EQ(out.dims()[0], expect_shape[0]);
-  ASSERT_EQ(out.dims()[1], expect_shape[1]);
-  ASSERT_EQ(out.dims()[2], expect_shape[2]);
-  ASSERT_EQ(out.numel(), 36);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  bool value_equal = true;
-  auto* dense_out_data = out.data<float>();
-  for (int i = 0; i < dense_x.numel(); i++) {
-    if (std::abs(dense_x_data[i] - dense_out_data[i]) > 1e-6f)
-      value_equal = false;
-  }
-  ASSERT_EQ(value_equal, true);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_matmul_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_matmul_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/matmul_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, dot) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  DenseTensor dense_x(alloc.get(),
-                      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                           phi::make_ddim({3, 3}),
-                                           phi::DataLayout::NCHW));
-
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  DenseTensor dense_y(alloc.get(),
-                      phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                           phi::make_ddim({3, 3}),
-                                           phi::DataLayout::NCHW));
-  auto* dense_y_data =
-      dense_y.mutable_data<float>(paddle::platform::CPUPlace());
-
-  for (size_t i = 0; i < 9; ++i) {
-    dense_x_data[i] = 1.0;
-    dense_y_data[i] = 2.0;
-  }
-  std::vector<float> sum(9, 6.0);
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto out = Matmul<float, CPUContext>(dev_ctx, dense_x, dense_y, false, false);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.dims()[0], 3);
-  ASSERT_EQ(out.dims()[1], 3);
-  ASSERT_EQ(out.numel(), 9);
-  ASSERT_EQ(out.dtype(), DataType::FLOAT32);
-  ASSERT_EQ(out.layout(), DataLayout::NCHW);
-  ASSERT_EQ(out.initialized(), true);
-
-  for (size_t i = 0; i < 9; i++) {
-    ASSERT_NEAR(sum[i], out.data<float>()[i], 1e-6f);
-  }
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_mean_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_mean_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/reduce_mean_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, mean) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 4}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sum = 0.0;
-  for (size_t i = 0; i < 12; ++i) {
-    dense_x_data[i] = i * 1.0;
-    sum += i * 1.0;
-  }
-  std::vector<int64_t> dims = {0, 1};
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto out = phi::Mean<float>(dev_ctx, dense_x, dims, false);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 1);
-  ASSERT_EQ(out.numel(), 1);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  auto expect_result = sum / 12;
-  auto actual_result = out.data<float>()[0];
-  ASSERT_NEAR(expect_result, actual_result, 1e-6f);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_reshape_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_reshape_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/reshape_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-// TODO(chenweihang): Remove this test after the API is used in the dygraph
-TEST(DEV_API, reshape) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 2, 2, 3}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  for (int i = 0; i < dense_x.numel(); i++) {
-    dense_x_data[i] = i;
-  }
-  std::vector<int64_t> shape{12, 3};
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  dev_ctx.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  auto out = phi::Reshape<float>(dev_ctx, dense_x, shape);
-  // 3. check result
-  std::vector<int64_t> expect_shape = {12, 3};
-  ASSERT_EQ(out.dims()[0], expect_shape[0]);
-  ASSERT_EQ(out.dims()[1], expect_shape[1]);
-  ASSERT_EQ(out.numel(), 36);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  bool value_equal = true;
-  auto* dense_out_data = out.data<float>();
-  for (int i = 0; i < dense_x.numel(); i++) {
-    if (std::abs(dense_x_data[i] - dense_out_data[i]) > 1e-6f)
-      value_equal = false;
-  }
-  ASSERT_EQ(value_equal, true);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_scale_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_scale_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/scale_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, scale) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 4}),
-                                                phi::DataLayout::NCHW));
-
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-  for (size_t i = 0; i < 12; ++i) {
-    dense_x_data[i] = i * 1.0;
-  }
-  float scale = 2;
-  float bias = 1;
-  bool bias_after_scale = true;
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-
-  auto out = phi::Scale<float>(dev_ctx, dense_x, scale, bias, bias_after_scale);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.numel(), 12);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  auto expect_result = 23;
-  auto actual_result = out.data<float>()[11];
-  ASSERT_NEAR(expect_result, actual_result, 1e-6f);
-}
-
-TEST(DEV_API, scale_host) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 4}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-  for (size_t i = 0; i < 12; ++i) {
-    dense_x_data[i] = i * 1.0;
-  }
-
-  phi::DenseTensor scale(
-      alloc.get(),
-      phi::DenseTensorMeta(
-          phi::DataType::FLOAT32, phi::make_ddim({1}), phi::DataLayout::NCHW));
-  scale.data<float>()[0] = 2;
-  float bias = 1;
-  bool bias_after_scale = true;
-
-  // 2. test API
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-
-  auto out = phi::Scale<float>(dev_ctx, dense_x, scale, bias, bias_after_scale);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 2);
-  ASSERT_EQ(out.numel(), 12);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  auto expect_result = 23;
-  auto actual_result = out.data<float>()[11];
-  ASSERT_NEAR(expect_result, actual_result, 1e-6f);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sparse_activation_dev_api.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
-#include "paddle/phi/common/place.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/activation_grad_kernel.h"
-#include "paddle/phi/kernels/activation_kernel.h"
-#include "paddle/phi/kernels/empty_kernel.h"
-#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"
-#include "paddle/phi/kernels/sparse/unary_grad_kernel.h"
-#include "paddle/phi/kernels/sparse/unary_kernel.h"
-
-namespace phi {
-namespace tests {
-
-TEST(DEV_API, sparse_relu) {
-  std::vector<float> data = {0, -1, 0, 2, 0, 0, -3, 0, 4, 5, 0, 0};
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  dev_ctx_cpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-
-  DenseTensor dense_x =
-      phi::Empty(dev_ctx_cpu,
-                 DenseTensorMeta(DataType::FLOAT32, {3, 4}, DataLayout::NCHW));
-  memcpy(dense_x.data<float>(), data.data(), data.size() * sizeof(float));
-  auto sparse_coo = sparse::DenseToCoo<float>(dev_ctx_cpu, dense_x, 2);
-
-  auto sparse_out = sparse::ReluCoo<float>(dev_ctx_cpu, sparse_coo);
-  DenseTensor dense_out =
-      phi::EmptyLike<float>(dev_ctx_cpu, sparse_out.non_zero_elements());
-  ReluKernel<float>(dev_ctx_cpu, sparse_coo.non_zero_elements(), &dense_out);
-
-  int cmp = memcmp(dense_out.data<float>(),
-                   sparse_out.non_zero_elements().data<float>(),
-                   dense_out.numel() * sizeof(float));
-  ASSERT_EQ(cmp, 0);
-  // backward
-  DenseTensor dense_grad_x = phi::EmptyLike<float>(dev_ctx_cpu, dense_out);
-  ReluGradKernel<float>(
-      dev_ctx_cpu, sparse_coo.non_zero_elements(), dense_out, &dense_grad_x);
-  SparseCooTensor sparse_grad_x(
-      phi::EmptyLike<int>(dev_ctx_cpu, sparse_coo.non_zero_indices()),
-      phi::EmptyLike<int>(dev_ctx_cpu, sparse_coo.non_zero_elements()),
-      {3, 4});
-
-  SparseCooTensor sparse_out_grad(
-      sparse_coo.non_zero_indices(), dense_out, {3, 4});
-  sparse::ReluCooGradKernel<float>(
-      dev_ctx_cpu, sparse_coo, sparse_out_grad, &sparse_grad_x);
-
-  cmp = memcmp(dense_grad_x.data<float>(),
-               sparse_grad_x.non_zero_elements().data<float>(),
-               dense_grad_x.numel() * sizeof(float));
-  ASSERT_EQ(cmp, 0);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
-#include "paddle/phi/common/place.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/tensor_utils.h"
-#include "paddle/phi/kernels/sparse/coalesce_kernel.h"
-#include "paddle/phi/kernels/sparse/conv_grad_kernel.h"
-#include "paddle/phi/kernels/sparse/conv_kernel.h"
-
-namespace phi {
-namespace tests {
-
-std::vector<int> flatten(const std::vector<std::vector<int>>& in) {
-  std::vector<int> out;
-  if (in.size() == 0) return out;
-  const int cols = in[0].size();
-  out.resize(in.size() * cols);
-  for (uint64_t i = 0; i < in.size(); i++) {
-    memcpy(&out[i * cols], in[i].data(), cols * sizeof(int));
-  }
-  return out;
-}
-
-template <typename T1, typename T2>
-std::vector<T2> cast(const std::vector<T1>& in) {
-  std::vector<T2> out(in.size());
-  for (uint64_t i = 0; i < in.size(); i++) {
-    out[i] = static_cast<T2>(in[i]);
-  }
-  return out;
-}
-
-template <typename T, typename IntT = int>
-void TestConv3dBase(const std::vector<IntT>& indices,
-                    const std::vector<T>& features,
-                    const DDim& x_dims,
-                    const std::vector<T>& kernel,
-                    const DDim& kernel_dims,
-                    const std::vector<IntT>& correct_out_indices,
-                    const std::vector<T>& correct_out_features,
-                    const DDim& correct_out_dims,
-                    const int non_zero_num,
-                    const std::vector<int>& paddings,
-                    const std::vector<int>& strides,
-                    const std::vector<int>& dilations,
-                    const float diff = 1e-3,
-                    const bool backward = false,
-                    const std::vector<T> features_grad = {},
-                    const std::vector<T> kernel_grad = {},
-                    const bool subm = false) {
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  dev_ctx_cpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-
-  const int in_channels = kernel_dims[3];
-  const int out_channels = kernel_dims[4];
-
-  auto indices_dtype = paddle::experimental::CppTypeToDataType<IntT>::Type();
-  DenseTensor indices_tensor = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(indices_dtype, {4, non_zero_num}, DataLayout::NCHW));
-  memcpy(indices_tensor.data<IntT>(),
-         indices.data(),
-         indices.size() * sizeof(IntT));
-  DenseTensor features_tensor = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(paddle::experimental::CppTypeToDataType<T>::Type(),
-                      {non_zero_num, in_channels},
-                      DataLayout::NHWC));
-  memcpy(
-      features_tensor.data<T>(), features.data(), features.size() * sizeof(T));
-
-  SparseCooTensor x_tensor(indices_tensor, features_tensor, x_dims);
-
-  DenseTensor kernel_tensor = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(paddle::experimental::CppTypeToDataType<T>::Type(),
-                      kernel_dims,
-                      DataLayout::NHWC));
-  memcpy(kernel_tensor.data<T>(), kernel.data(), kernel.size() * sizeof(T));
-
-  auto f_verify = [&](const T* real_data, const std::vector<T>& correct_data) {
-    for (uint64_t i = 0; i < correct_data.size(); i++) {
-      float tmp = std::fabs(static_cast<float>(correct_data[i] - real_data[i]));
-      ASSERT_LT(tmp, diff);
-    }
-  };
-
-  if (!std::is_same<T, phi::dtype::float16>::value) {
-    DenseTensor rulebook, counter;
-    SparseCooTensor out = sparse::Conv3dCoo<T>(dev_ctx_cpu,
-                                               x_tensor,
-                                               kernel_tensor,
-                                               paddings,
-                                               dilations,
-                                               strides,
-                                               1,
-                                               subm,
-                                               "Conv3d",
-                                               &rulebook,
-                                               &counter);
-
-    ASSERT_EQ(correct_out_dims.size(), out.dims().size());
-    for (int i = 0; i < correct_out_dims.size(); i++) {
-      ASSERT_EQ(correct_out_dims[i], out.dims()[i]);
-    }
-    ASSERT_EQ((int64_t)correct_out_features.size() / out_channels, out.nnz());
-
-    int cmp_indices = memcmp(correct_out_indices.data(),
-                             out.non_zero_indices().data<IntT>(),
-                             correct_out_indices.size() * sizeof(IntT));
-    ASSERT_EQ(cmp_indices, 0);
-
-    f_verify(out.non_zero_elements().data<T>(), correct_out_features);
-
-    if (backward) {
-      std::tuple<SparseCooTensor, DenseTensor> grads =
-          sparse::Conv3dCooGrad<T>(dev_ctx_cpu,
-                                   x_tensor,
-                                   kernel_tensor,
-                                   out,
-                                   rulebook,
-                                   counter,
-                                   out,
-                                   paddings,
-                                   dilations,
-                                   strides,
-                                   1,
-                                   subm,
-                                   "Conv3d");
-      f_verify(std::get<0>(grads).non_zero_elements().data<T>(), features_grad);
-      f_verify(std::get<1>(grads).data<T>(), kernel_grad);
-    }
-  }
-
-// test gpu
-#if defined(PADDLE_WITH_CUDA)
-  phi::GPUContext dev_ctx_gpu{phi::GPUPlace()};
-  dev_ctx_gpu.PartialInitWithoutAllocator();
-  dev_ctx_gpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(dev_ctx_gpu.GetPlace(), dev_ctx_gpu.stream())
-          .get());
-  dev_ctx_gpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  dev_ctx_gpu.SetPinnedAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CUDAPinnedPlace())
-          .get());
-  dev_ctx_gpu.PartialInitWithAllocator();
-
-  DenseTensor d_indices_tensor = phi::Empty(
-      dev_ctx_gpu,
-      DenseTensorMeta(indices_dtype, {4, non_zero_num}, DataLayout::NCHW));
-  phi::Copy(
-      dev_ctx_gpu, indices_tensor, phi::GPUPlace(), true, &d_indices_tensor);
-
-  DenseTensor d_features_tensor = phi::Empty(
-      dev_ctx_gpu,
-      DenseTensorMeta(paddle::experimental::CppTypeToDataType<T>::Type(),
-                      {non_zero_num, in_channels},
-                      DataLayout::NHWC));
-  phi::Copy(
-      dev_ctx_gpu, features_tensor, phi::GPUPlace(), true, &d_features_tensor);
-
-  SparseCooTensor d_x_tensor(d_indices_tensor, d_features_tensor, x_dims);
-
-  DenseTensor d_kernel_tensor = phi::Empty(
-      dev_ctx_gpu,
-      DenseTensorMeta(paddle::experimental::CppTypeToDataType<T>::Type(),
-                      kernel_dims,
-                      DataLayout::NHWC));
-  phi::Copy(
-      dev_ctx_gpu, kernel_tensor, phi::GPUPlace(), true, &d_kernel_tensor);
-
-  DenseTensor d_rulebook, d_counter;
-  SparseCooTensor d_out = sparse::Conv3dCoo<T>(dev_ctx_gpu,
-                                               d_x_tensor,
-                                               d_kernel_tensor,
-                                               paddings,
-                                               dilations,
-                                               strides,
-                                               1,
-                                               subm,
-                                               "Conv3d",
-                                               &d_rulebook,
-                                               &d_counter);
-  SparseCooTensor tmp_d_out = sparse::CoalesceCoo<T>(dev_ctx_gpu, d_out);
-
-  ASSERT_EQ(correct_out_dims.size(), d_out.dims().size());
-  ASSERT_EQ((int64_t)correct_out_features.size() / out_channels, d_out.nnz());
-  for (int i = 0; i < correct_out_dims.size(); i++) {
-    ASSERT_EQ(correct_out_dims[i], d_out.dims()[i]);
-  }
-
-  DenseTensor h_indices_tensor = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(indices_dtype, {4, d_out.nnz()}, DataLayout::NCHW));
-  phi::Copy(dev_ctx_gpu,
-            tmp_d_out.non_zero_indices(),
-            phi::CPUPlace(),
-            true,
-            &h_indices_tensor);
-
-  int cmp_indices2 = memcmp(correct_out_indices.data(),
-                            h_indices_tensor.data<IntT>(),
-                            correct_out_indices.size() * sizeof(IntT));
-  ASSERT_EQ(cmp_indices2, 0);
-
-  DenseTensor h_features_tensor =
-      phi::EmptyLike<T>(dev_ctx_cpu, d_out.non_zero_elements());
-
-  phi::Copy(dev_ctx_gpu,
-            tmp_d_out.non_zero_elements(),
-            phi::CPUPlace(),
-            true,
-            &h_features_tensor);
-  f_verify(h_features_tensor.data<T>(), correct_out_features);
-
-  if (backward) {
-    std::tuple<SparseCooTensor, DenseTensor> grads =
-        sparse::Conv3dCooGrad<T>(dev_ctx_gpu,
-                                 d_x_tensor,
-                                 d_kernel_tensor,
-                                 d_out,
-                                 d_rulebook,
-                                 d_counter,
-                                 d_out,
-                                 paddings,
-                                 dilations,
-                                 strides,
-                                 1,
-                                 subm,
-                                 "Conv3d");
-    DenseTensor d_features_grad = std::get<0>(grads).non_zero_elements();
-    DenseTensor d_kernel_grad = std::get<1>(grads);
-    DenseTensor h_features_grad =
-        phi::EmptyLike<T>(dev_ctx_cpu, d_features_grad);
-    phi::Copy(
-        dev_ctx_gpu, d_features_grad, phi::CPUPlace(), true, &h_features_grad);
-    f_verify(h_features_grad.data<T>(), features_grad);
-
-    DenseTensor h_kernel_grad = phi::EmptyLike<T>(dev_ctx_cpu, d_kernel_grad);
-    phi::Copy(
-        dev_ctx_gpu, std::get<1>(grads), phi::CPUPlace(), true, &h_kernel_grad);
-    f_verify(h_kernel_grad.data<T>(), kernel_grad);
-  }
-#endif
-}
-
-template <typename IntT = int>
-void TestConv3d(const std::vector<IntT>& indices,
-                const std::vector<float>& features,
-                const DDim& x_dims,
-                const std::vector<float>& kernel,
-                const DDim& kernel_dims,
-                const std::vector<IntT>& correct_out_indices,
-                const std::vector<float>& correct_out_features,
-                const DDim& correct_out_dims,
-                const int non_zero_num,
-                const std::vector<int>& paddings,
-                const std::vector<int>& strides,
-                const std::vector<int>& dilations,
-                const float diff = 1e-3,
-                const bool backward = false,
-                const std::vector<float> features_grad = {},
-                const std::vector<float> kernel_grad = {},
-                const bool subm = false) {
-  // test float
-  TestConv3dBase<float, IntT>(indices,
-                              features,
-                              x_dims,
-                              kernel,
-                              kernel_dims,
-                              correct_out_indices,
-                              correct_out_features,
-                              correct_out_dims,
-                              non_zero_num,
-                              paddings,
-                              strides,
-                              dilations,
-                              diff,
-                              backward,
-                              features_grad,
-                              kernel_grad,
-                              subm);
-  // test double
-  TestConv3dBase<double, IntT>(indices,
-                               cast<float, double>(features),
-                               x_dims,
-                               cast<float, double>(kernel),
-                               kernel_dims,
-                               correct_out_indices,
-                               cast<float, double>(correct_out_features),
-                               correct_out_dims,
-                               non_zero_num,
-                               paddings,
-                               strides,
-                               dilations,
-                               diff,
-                               backward,
-                               cast<float, double>(features_grad),
-                               cast<float, double>(kernel_grad),
-                               subm);
-}
-
-TEST(DEV_API, sparse_conv3d) {
-  const int in_channels = 1;
-  const int out_channels = 1;
-  DDim x_dims = {1, 4, 4, 4, in_channels};
-  DDim kernel_dims = {3, 3, 3, in_channels, out_channels};
-  DDim out_dims = {1, 2, 2, 2, out_channels};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 4;
-  std::vector<std::vector<int>> indices = {
-      {0, 0, 0, 0}, {0, 2, 0, 2}, {3, 2, 2, 3}, {3, 2, 3, 2}};
-  std::vector<int> indices_flatten = flatten(indices);
-
-  std::vector<float> features = {-0.2883, 0.0287, 0.2864, -0.0992};
-  // 3*3*3=27
-  std::vector<float> kernel = {
-      0.4721, 0.2292, 0.9751, 0.8616, 0.5784, 0.9178, 0.8727, 0.1659, 0.4455,
-
-      0.0189, 0.4646, 0.4472, 0.1991, 0.8968, 0.3717, 0.0051, 0.6963, 0.2690,
-
-      0.7473, 0.5403, 0.5391, 0.0796, 0.4734, 0.9097, 0.1712, 0.6237, 0.8837};
-
-  std::vector<std::vector<int>> out_indices = {{0, 0, 0, 0, 0, 0, 0, 0},
-                                               {0, 0, 0, 0, 1, 1, 1, 1},
-                                               {0, 0, 1, 1, 0, 0, 1, 1},
-                                               {0, 1, 0, 1, 0, 1, 0, 1}};
-  std::vector<int> out_indices_flatten = flatten(out_indices);
-
-  std::vector<float> out_features = {
-      0.0254, 0.1455, -0.0615, 0.0862, 0.0077, 0.0200, -0.0160, -0.0433};
-
-  TestConv3d(indices_flatten,
-             features,
-             x_dims,
-             kernel,
-             kernel_dims,
-             out_indices_flatten,
-             out_features,
-             out_dims,
-             non_zero_num,
-             paddings,
-             strides,
-             dilations);
-}
-
-TEST(DEV_API, sparse_conv3d_batch) {
-  const int in_channels = 1;
-  const int out_channels = 1;
-  DDim x_dims = {2, 4, 4, 4, in_channels};
-  DDim kernel_dims = {3, 3, 3, in_channels, out_channels};
-  DDim out_dims = {2, 2, 2, 2, out_channels};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 8;
-  std::vector<std::vector<int>> indices = {{0, 0, 0, 0, 1, 1, 1, 1},
-                                           {0, 2, 0, 2, 0, 2, 0, 2},
-                                           {3, 2, 2, 3, 3, 2, 2, 3},
-                                           {3, 2, 3, 2, 3, 2, 3, 2}};
-  std::vector<int> indices_flatten = flatten(indices);
-
-  std::vector<float> features = {
-      -0.2883, 0.0287, 0.2864, -0.0992, -0.2883, 0.0287, 0.2864, -0.0992};
-  // 3*3*3=27
-  std::vector<float> kernel = {
-      0.4721, 0.2292, 0.9751, 0.8616, 0.5784, 0.9178, 0.8727, 0.1659, 0.4455,
-
-      0.0189, 0.4646, 0.4472, 0.1991, 0.8968, 0.3717, 0.0051, 0.6963, 0.2690,
-
-      0.7473, 0.5403, 0.5391, 0.0796, 0.4734, 0.9097, 0.1712, 0.6237, 0.8837};
-
-  std::vector<std::vector<int>> out_indices = {
-      {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
-      {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1},
-      {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1},
-      {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}};
-  std::vector<int> out_indices_flatten = flatten(out_indices);
-
-  std::vector<float> out_features = {0.0254,
-                                     0.1455,
-                                     -0.0615,
-                                     0.0862,
-                                     0.0077,
-                                     0.0200,
-                                     -0.0160,
-                                     -0.0433,
-                                     0.0254,
-                                     0.1455,
-                                     -0.0615,
-                                     0.0862,
-                                     0.0077,
-                                     0.0200,
-                                     -0.0160,
-                                     -0.0433};
-
-  TestConv3d(indices_flatten,
-             features,
-             x_dims,
-             kernel,
-             kernel_dims,
-             out_indices_flatten,
-             out_features,
-             out_dims,
-             non_zero_num,
-             paddings,
-             strides,
-             dilations);
-}
-
-TEST(DEV_API, sparse_conv3d_stride) {
-  const int in_channels = 1;
-  const int out_channels = 1;
-  DDim x_dims = {1, 4, 4, 4, in_channels};
-  DDim kernel_dims = {3, 3, 3, in_channels, out_channels};
-  DDim out_dims = {1, 1, 1, 1, out_channels};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {2, 2, 2};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 3;
-  std::vector<std::vector<int>> indices = {
-      {0, 0, 0}, {0, 2, 0}, {3, 2, 2}, {3, 2, 3}};
-  std::vector<int> indices_flatten = flatten(indices);
-
-  std::vector<float> features = {-0.28833008, 0.02873230, 0.28637695};
-  // 3*3*3=27
-  std::vector<float> kernel = {
-      0.45043945, 0.47216797, 0.22924805, 0.97509766, 0.86181641, 0.57861328,
-      0.91796875, 0.87255859, 0.16589355, 0.44555664, 0.01889038, 0.46459961,
-      0.44726562, 0.19909668, 0.89697266, 0.37158203, 0.00513077, 0.69628906,
-      0.26904297, 0.74707031, 0.54003906, 0.5390625,  0.07958984, 0.47338867,
-      0.90966797, 0.17126465, 0.62353516};
-
-  std::vector<std::vector<int>> out_indices = {{0, 0, 0, 0}};
-  std::vector<int> out_indices_flatten = flatten(out_indices);
-
-  std::vector<float> out_features = {0.01791};
-
-  TestConv3d(indices_flatten,
-             features,
-             x_dims,
-             kernel,
-             kernel_dims,
-             out_indices_flatten,
-             out_features,
-             out_dims,
-             non_zero_num,
-             paddings,
-             strides,
-             dilations);
-}
-
-TEST(DEV_API, sparse_conv3d_dilation) {
-  const int in_channels = 1;
-  const int out_channels = 1;
-  DDim x_dims = {1, 6, 6, 6, in_channels};
-  DDim kernel_dims = {3, 3, 3, in_channels, out_channels};
-  DDim out_dims = {1, 2, 2, 2, out_channels};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {2, 2, 2};
-
-  const int non_zero_num = 3;
-  std::vector<std::vector<int>> indices = {
-      {0, 0, 0}, {2, 3, 3}, {2, 3, 3}, {5, 2, 0}};
-  std::vector<int> indices_flatten = flatten(indices);
-
-  std::vector<float> features = {-0.78710938, -0.64746094, 0.98828125};
-  // 3*3*3=27
-  std::vector<float> kernel = {
-      0.20617676, 0.99365234, 0.16760254, 0.30639648, 0.41479492, 0.75732422,
-      0.65625,    0.48535156, 0.72167969, 0.56005859, 0.5,        0.3581543,
-      0.20324707, 0.88769531, 0.81298828, 0.58398438, 0.30810547, 0.12634277,
-      0.70507812, 0.38720703, 0.34814453, 0.02690125, 0.80273438, 0.90625,
-      0.2277832,  0.4362793,  0.44482422};
-
-  std::vector<std::vector<int>> out_indices = {{0, 0, 0, 1, 0, 1, 1, 0}};
-  std::vector<int> out_indices_flatten = flatten(out_indices);
-
-  std::vector<float> out_features = {-0.64014, -0.37402};
-
-  TestConv3d(indices_flatten,
-             features,
-             x_dims,
-             kernel,
-             kernel_dims,
-             out_indices_flatten,
-             out_features,
-             out_dims,
-             non_zero_num,
-             paddings,
-             strides,
-             dilations);
-}
-
-TEST(DEV_API, sparse_conv3d_padding) {
-  const int in_channels = 1;
-  const int out_channels = 1;
-  DDim x_dims = {1, 3, 3, 3, in_channels};
-  DDim kernel_dims = {3, 3, 3, in_channels, out_channels};
-  DDim out_dims = {1, 3, 3, 3, out_channels};
-  std::vector<int> paddings = {1, 1, 1};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 1;
-  std::vector<std::vector<int>> indices = {{0, 1, 0, 0}};
-  std::vector<int> indices_flatten = flatten(indices);
-
-  std::vector<float> features = {-0.79394531};
-  // 3*3*3=27
-  std::vector<float> kernel = {
-      0.34375,    0.22485352, 0.65820312, 0.75048828, 0.21411133, 0.17370605,
-      0.85546875, 0.53076172, 0.28833008, 0.71044922, 0.00659943, 0.45922852,
-      0.19372559, 0.64599609, 0.78808594, 0.49316406, 0.62646484, 0.40649414,
-      0.62744141, 0.5703125,  0.23144531, 0.50048828, 0.31835938, 0.90869141,
-      0.38208008, 0.60449219, 0.09075928};
-
-  std::vector<int> out_indices_flatten = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
-      0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1};
-
-  std::vector<float> out_features = {-0.25269,
-                                     -0.39746,
-                                     -0.45288,
-                                     -0.49805,
-                                     -0.5127,
-                                     -0.15381,
-                                     -0.00524,
-                                     -0.56396,
-                                     -0.17004,
-                                     -0.5957,
-                                     -0.17847,
-                                     -0.27295};
-
-  TestConv3d(indices_flatten,
-             features,
-             x_dims,
-             kernel,
-             kernel_dims,
-             out_indices_flatten,
-             out_features,
-             out_dims,
-             non_zero_num,
-             paddings,
-             strides,
-             dilations);
-}
-
-TEST(DEV_API, sparse_conv2d) {
-  const int in_channels = 1;
-  const int out_channels = 1;
-  DDim x_dims = {1, 1, 5, 5, in_channels};
-  DDim kernel_dims = {1, 3, 3, in_channels, out_channels};
-  DDim out_dims = {1, 1, 3, 3, out_channels};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 3;
-  std::vector<int> indices_flatten = {0, 0, 0, 0, 0, 0, 0, 4, 0, 3, 2, 4};
-
-  std::vector<float> features = {-0.79394531, -0.3125, -0.55029297};
-  // 3*3*3=27
-  std::vector<float> kernel = {0.65820312,
-                               0.75048828,
-                               0.21411133,
-                               0.17370605,
-                               0.85546875,
-                               0.53076172,
-                               0.28833008,
-                               0.71044922,
-                               0.00659943};
-
-  std::vector<int> out_indices_flatten = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                          0, 0, 2, 2, 2, 1, 2, 0, 1, 2};
-
-  std::vector<float> out_features = {
-      -0.17004, -0.71338, -0.00206, -0.22205, -0.09009};
-
-  TestConv3d(indices_flatten,
-             features,
-             x_dims,
-             kernel,
-             kernel_dims,
-             out_indices_flatten,
-             out_features,
-             out_dims,
-             non_zero_num,
-             paddings,
-             strides,
-             dilations);
-}
-
-TEST(DEV_API, sparse_conv2d_int64) {
-  const int in_channels = 1;
-  const int out_channels = 1;
-  DDim x_dims = {1, 1, 5, 5, in_channels};
-  DDim kernel_dims = {1, 3, 3, in_channels, out_channels};
-  DDim out_dims = {1, 1, 3, 3, out_channels};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 3;
-  std::vector<int64_t> indices_flatten = {0, 0, 0, 0, 0, 0, 0, 4, 0, 3, 2, 4};
-
-  std::vector<float> features = {-0.79394531, -0.3125, -0.55029297};
-  // 3*3*3=27
-  std::vector<float> kernel = {0.65820312,
-                               0.75048828,
-                               0.21411133,
-                               0.17370605,
-                               0.85546875,
-                               0.53076172,
-                               0.28833008,
-                               0.71044922,
-                               0.00659943};
-
-  std::vector<int64_t> out_indices_flatten = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                              0, 0, 2, 2, 2, 1, 2, 0, 1, 2};
-
-  std::vector<float> out_features = {
-      -0.17004, -0.71338, -0.00206, -0.22205, -0.09009};
-
-  TestConv3d<int64_t>(indices_flatten,
-                      features,
-                      x_dims,
-                      kernel,
-                      kernel_dims,
-                      out_indices_flatten,
-                      out_features,
-                      out_dims,
-                      non_zero_num,
-                      paddings,
-                      strides,
-                      dilations);
-}
-
-TEST(DEV_API, sparse_conv3d_backward) {
-  const int in_channels = 1;
-  const int out_channels = 1;
-  DDim x_dims = {1, 4, 4, 4, in_channels};
-  DDim kernel_dims = {3, 3, 3, in_channels, out_channels};
-  DDim out_dims = {1, 2, 2, 2, out_channels};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 2;
-  std::vector<int> indices_flatten = {0, 0, 0, 2, 3, 2, 3, 2};
-
-  std::vector<float> features = {-0.28833008, 0.0287323};
-  // 3*3*3=27
-  std::vector<float> kernel = {
-      0.64306641, 0.45043945, 0.47216797, 0.22924805, 0.97509766, 0.86181641,
-      0.57861328, 0.91796875, 0.87255859, 0.16589355, 0.44555664, 0.01889038,
-      0.46459961, 0.44726562, 0.19909668, 0.89697266, 0.37158203, 0.00513077,
-      0.69628906, 0.26904297, 0.74707031, 0.54003906, 0.5390625,  0.07958984,
-      0.47338867, 0.90966797, 0.17126465};
-
-  std::vector<int> out_indices_flatten = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                          0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0,
-                                          1, 1, 0, 1, 0, 1, 0, 1, 0, 1};
-
-  std::vector<float> out_features = {4.9200e-03,
-                                     2.6140e-02,
-                                     2.2900e-03,
-                                     -2.3596e-01,
-                                     1.5000e-04,
-                                     1.0670e-02,
-                                     5.7200e-03,
-                                     1.2850e-02};
-
-  std::vector<float> features_grad = {-0.20593, -0.09149};
-  std::vector<float> kernel_grad = {
-      0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,  0.000e+00,
-      0.000e+00, 0.000e+00, 6.805e-02, 0.000e+00, 0.000e+00,  0.000e+00,
-      0.000e+00, 3.700e-04, 1.600e-04, 0.000e+00, 3.100e-04,  0.000e+00,
-      0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, -6.780e-03, 7.000e-05,
-      0.000e+00, 7.500e-04, 1.400e-04};
-
-  TestConv3d(indices_flatten,
-             features,
-             x_dims,
-             kernel,
-             kernel_dims,
-             out_indices_flatten,
-             out_features,
-             out_dims,
-             non_zero_num,
-             paddings,
-             strides,
-             dilations,
-             1e-3,
-             true,
-             features_grad,
-             kernel_grad);
-}
-
-TEST(DEV_API, sparse_conv2d_subm) {
-  const int in_channels = 1;
-  const int out_channels = 1;
-  DDim x_dims = {1, 1, 4, 5, in_channels};
-  DDim kernel_dims = {1, 3, 3, in_channels, out_channels};
-  DDim out_dims = {1, 1, 4, 5, out_channels};
-  std::vector<int> paddings = {0, 1, 1};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 4;
-  std::vector<int> indices_flatten = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 3, 2, 2, 3};
-
-  std::vector<float> features = {0.8854, 0.6505, -0.1999, 0.3583};
-  // 3*3*3=27
-  std::vector<float> kernel = {
-      0.9364, 0.9460, 0.6564, 0.7999, 0.2013, 0.3812, 0.5474, 0.1016, 0.3368};
-
-  std::vector<int> out_indices_flatten = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 3, 2, 2, 3};
-
-  std::vector<float> out_features = {0.1782, 0.2313, 0.7117, 0.5214};
-
-  std::vector<float> features_grad = {0.0359, 1.2080, 0.5838, 0.4541};
-  std::vector<float> kernel_grad = {
-      0.3391, 0.4630, 0.0000, -0.1042, 0.3528, 0.2550, 0.0000, -0.0462, 0.0829};
-
-  TestConv3d(indices_flatten,
-             features,
-             x_dims,
-             kernel,
-             kernel_dims,
-             out_indices_flatten,
-             out_features,
-             out_dims,
-             non_zero_num,
-             paddings,
-             strides,
-             dilations,
-             1e-3,
-             true,
-             features_grad,
-             kernel_grad,
-             true);
-}
-
-TEST(DEV_API, sparse_conv3d_subm) {
-  const int in_channels = 1;
-  const int out_channels = 1;
-  DDim x_dims = {1, 4, 4, 5, in_channels};
-  DDim kernel_dims = {3, 3, 3, in_channels, out_channels};
-  DDim out_dims = {1, 4, 4, 5, out_channels};
-  std::vector<int> paddings = {1, 1, 1};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 3;
-  std::vector<int> indices_flatten = {0, 0, 0, 1, 3, 3, 2, 0, 2, 0, 3, 1};
-
-  std::vector<float> features = {-0.9578, 0.1572, 0.1036};
-  // 3*3*3=27
-  std::vector<float> kernel = {
-      0.1367, 0.4534, 0.2138, 0.8264, 0.7534, 0.3270, 0.2880, 0.1562, 0.7770,
-      0.6902, 0.1981, 0.1369, 0.6582, 0.7582, 0.5640, 0.8894, 0.7350, 0.1845,
-      0.6892, 0.3654, 0.6076, 0.0326, 0.8412, 0.5289, 0.9824, 0.8235, 0.9802};
-
-  std::vector<int> out_indices_flatten = {0, 0, 0, 1, 3, 3, 2, 0, 2, 0, 3, 1};
-
-  std::vector<float> out_features = {-0.7262, 0.1192, 0.0785};
-
-  std::vector<float> features_grad = {-0.5506, 0.0904, 0.0595};
-  std::vector<float> kernel_grad = {
-      0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
-      0.0000, 0.0000, 0.0000, 0.0000, 0.7224, 0.0000, 0.0000, 0.0000, 0.0000,
-      0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000};
-
-  TestConv3d(indices_flatten,
-             features,
-             x_dims,
-             kernel,
-             kernel_dims,
-             out_indices_flatten,
-             out_features,
-             out_dims,
-             non_zero_num,
-             paddings,
-             strides,
-             dilations,
-             1e-3,
-             true,
-             features_grad,
-             kernel_grad,
-             true);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_sparse_elementwise_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sparse_elementwise_dev_api.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <cmath>
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/elementwise_add_grad_kernel.h"
-#include "paddle/phi/kernels/elementwise_add_kernel.h"
-#include "paddle/phi/kernels/elementwise_divide_grad_kernel.h"
-#include "paddle/phi/kernels/elementwise_divide_kernel.h"
-#include "paddle/phi/kernels/elementwise_multiply_grad_kernel.h"
-#include "paddle/phi/kernels/elementwise_multiply_kernel.h"
-#include "paddle/phi/kernels/elementwise_subtract_grad_kernel.h"
-#include "paddle/phi/kernels/elementwise_subtract_kernel.h"
-#include "paddle/phi/kernels/sparse/elementwise_grad_kernel.h"
-#include "paddle/phi/kernels/sparse/elementwise_kernel.h"
-#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"
-
-namespace phi {
-namespace tests {
-
-#define TEST_ELEMENTWISE_OP(name)          \
-  TEST_ELEMENTWISE_OP_WITH_TYPE(name, Csr) \
-                                           \
-  TEST_ELEMENTWISE_OP_WITH_TYPE(name, Coo)
-
-#define TEST_ELEMENTWISE_OP_WITH_TYPE(name, type)                            \
-  template <typename T, typename Context>                                    \
-  void TestElementWise##name##type(const Context& dev_ctx_cpu,               \
-                                   const Sparse##type##Tensor& x,            \
-                                   const Sparse##type##Tensor& y,            \
-                                   const DDim& dense_dims) {                 \
-    auto out = sparse::ElementWise##name##type<T>(dev_ctx_cpu, x, y);        \
-    const DenseTensor denseX = sparse::type##ToDense<T>(dev_ctx_cpu, x);     \
-    const DenseTensor denseY = sparse::type##ToDense<T>(dev_ctx_cpu, y);     \
-    const DenseTensor denseOut = sparse::type##ToDense<T>(dev_ctx_cpu, out); \
-    auto expectResult = name<T>(dev_ctx_cpu, denseX, denseY);                \
-    for (int j = 0; j < denseOut.numel(); ++j) {                             \
-      auto actualResultRow = denseOut.template data<T>()[j];                 \
-      auto expectResultRow = expectResult.template data<T>()[j];             \
-      if (std::is_same<T, float>::value || std::is_same<T, double>::value) { \
-        if (!std::isnan(expectResultRow)) {                                  \
-          ASSERT_DOUBLE_EQ(expectResultRow, actualResultRow);                \
-        }                                                                    \
-      } else {                                                               \
-        ASSERT_EQ(expectResultRow, actualResultRow);                         \
-      }                                                                      \
-    }                                                                        \
-  }
-
-TEST_ELEMENTWISE_OP(Add)
-TEST_ELEMENTWISE_OP(Subtract)
-TEST_ELEMENTWISE_OP(Multiply)
-TEST_ELEMENTWISE_OP(Divide)
-
-TEST(DEV_API, sparse_elementwise_coo_kernel_double) {
-  using T = double;
-  using IntT = int64_t;
-  for (int epoch = 0; epoch < 100; ++epoch) {
-    DDim dense_dims = phi::make_ddim({2, 4, 4});
-    IntT sparse_dim = 2;
-    // 32els
-    std::vector<T> x_dense_data = {0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0,
-                                   0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0,
-                                   0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0,
-                                   0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0};
-
-    std::vector<T> y_dense_data = {0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0,
-                                   0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                                   0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 3.0, 0.0,
-                                   0.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0};
-
-    const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-        paddle::platform::CPUPlace());
-
-    phi::DenseTensor dense_x(
-        alloc.get(),
-        phi::DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-    auto* dense_x_data = dense_x.mutable_data<T>(paddle::platform::CPUPlace());
-
-    memcpy(dense_x_data, x_dense_data.data(), x_dense_data.size() * sizeof(T));
-
-    phi::DenseTensor dense_y(
-        alloc.get(),
-        phi::DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-    auto* dense_y_data = dense_y.mutable_data<T>(paddle::platform::CPUPlace());
-
-    memcpy(dense_y_data, y_dense_data.data(), y_dense_data.size() * sizeof(T));
-
-    phi::CPUContext dev_ctx_cpu;
-    dev_ctx_cpu.SetAllocator(
-        paddle::memory::allocation::AllocatorFacade::Instance()
-            .GetAllocator(paddle::platform::CPUPlace())
-            .get());
-
-    auto coo_x = sparse::DenseToCoo<T>(dev_ctx_cpu, dense_x, sparse_dim);
-    auto coo_y = sparse::DenseToCoo<T>(dev_ctx_cpu, dense_y, sparse_dim);
-
-    TestElementWiseAddCoo<T>(dev_ctx_cpu, coo_x, coo_y, dense_dims);
-    TestElementWiseSubtractCoo<T>(dev_ctx_cpu, coo_x, coo_y, dense_dims);
-    TestElementWiseMultiplyCoo<T>(dev_ctx_cpu, coo_x, coo_y, dense_dims);
-    TestElementWiseDivideCoo<T>(dev_ctx_cpu, coo_x, coo_y, dense_dims);
-  }
-}
-
-TEST(DEV_API, sparse_elementwise_csr_kernel_float) {
-  using T = float;
-
-  DDim dense_dims = phi::make_ddim({6, 4});
-  // 24els
-  std::vector<T> x_dense_data = {0.0, 0.0, 4.0, 2.0, 6.0, 3.0, 0.2, 0.1,
-                                 2.2, 1.1, 4.2, 2.1, 0.4, 0.2, 0.0, 0.0,
-                                 4.4, 2.2, 0.6, 0.3, 2.6, 1.3, 0.0, 0.0};
-  std::vector<T> y_dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 3.5,
-                                 0.7, 0.0, 3.5, 0.7, 3.2, 0.1, 0.0, 3.2,
-                                 1.0, 0.0, 1.2, 0.5, 0.7, 3.3, 0.0, 9.0};
-
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  phi::DenseTensor dense_x(
-      alloc.get(),
-      phi::DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-  auto* dense_x_data = dense_x.mutable_data<T>(paddle::platform::CPUPlace());
-
-  memcpy(dense_x_data, x_dense_data.data(), x_dense_data.size() * sizeof(T));
-
-  phi::DenseTensor dense_y(
-      alloc.get(),
-      phi::DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-  auto* dense_y_data = dense_y.mutable_data<T>(paddle::platform::CPUPlace());
-
-  memcpy(dense_y_data, y_dense_data.data(), y_dense_data.size() * sizeof(T));
-
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-
-  auto csr_x = sparse::DenseToCsr<T>(dev_ctx_cpu, dense_x);
-  auto csr_y = sparse::DenseToCsr<T>(dev_ctx_cpu, dense_y);
-
-  TestElementWiseAddCsr<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-  TestElementWiseSubtractCsr<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-  TestElementWiseMultiplyCsr<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-  TestElementWiseDivideCsr<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-}
-
-#define TEST_ELEMENTWISE_OP_GRAD(name)          \
-  TEST_ELEMENTWISE_OP_GRAD_WITH_TYPE(name, Csr) \
-                                                \
-  TEST_ELEMENTWISE_OP_GRAD_WITH_TYPE(name, Coo)
-
-#define TEST_ELEMENTWISE_OP_GRAD_WITH_TYPE(name, type)                       \
-  template <typename T, typename Context>                                    \
-  void TestElementWise##name##type##Grad(const Context& dev_ctx_cpu,         \
-                                         const Sparse##type##Tensor& x,      \
-                                         const Sparse##type##Tensor& y,      \
-                                         const DDim& dense_dims) {           \
-    auto out = sparse::ElementWise##name##type<T>(dev_ctx_cpu, x, y);        \
-    auto dresult =                                                           \
-        sparse::ElementWise##name##type##Grad<T>(dev_ctx_cpu, x, y, out);    \
-                                                                             \
-    DenseTensor expectdy = phi::Empty(                                       \
-        dev_ctx_cpu,                                                         \
-        DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));   \
-    DenseTensor expectdx = phi::Empty(                                       \
-        dev_ctx_cpu,                                                         \
-        DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));   \
-                                                                             \
-    phi::name##GradKernel<T>(dev_ctx_cpu,                                    \
-                             sparse::type##ToDense<T>(dev_ctx_cpu, x),       \
-                             sparse::type##ToDense<T>(dev_ctx_cpu, y),       \
-                             sparse::type##ToDense<T>(dev_ctx_cpu, out),     \
-                             -1,                                             \
-                             &expectdx,                                      \
-                             &expectdy);                                     \
-    const DenseTensor densedX =                                              \
-        sparse::type##ToDense<T>(dev_ctx_cpu, dresult[0]);                   \
-    const DenseTensor densedY =                                              \
-        sparse::type##ToDense<T>(dev_ctx_cpu, dresult[1]);                   \
-    const DenseTensor denseOut = sparse::type##ToDense<T>(dev_ctx_cpu, out); \
-                                                                             \
-    for (int j = 0; j < densedX.numel(); ++j) {                              \
-      auto actualResultRow = densedX.template data<T>()[j];                  \
-      auto expectResultRow = expectdx.template data<T>()[j];                 \
-      if (std::is_same<T, float>::value || std::is_same<T, double>::value) { \
-        if (!std::isnan(expectResultRow)) {                                  \
-          ASSERT_DOUBLE_EQ(expectResultRow, actualResultRow);                \
-        }                                                                    \
-      } else {                                                               \
-        ASSERT_EQ(expectResultRow, actualResultRow);                         \
-      }                                                                      \
-    }                                                                        \
-    for (int j = 0; j < densedY.numel(); ++j) {                              \
-      auto actualResultRow = densedY.template data<T>()[j];                  \
-      auto expectResultRow = expectdy.template data<T>()[j];                 \
-      if (std::is_same<T, float>::value || std::is_same<T, double>::value) { \
-        if (!std::isnan(expectResultRow)) {                                  \
-          ASSERT_DOUBLE_EQ(expectResultRow, actualResultRow);                \
-        }                                                                    \
-      } else {                                                               \
-        ASSERT_EQ(expectResultRow, actualResultRow);                         \
-      }                                                                      \
-    }                                                                        \
-  }
-
-TEST_ELEMENTWISE_OP_GRAD(Add)
-TEST_ELEMENTWISE_OP_GRAD(Subtract)
-TEST_ELEMENTWISE_OP_GRAD(Multiply)
-
-template <typename T, typename Context>
-void TestElementWiseDivideCsrGrad(const Context& dev_ctx_cpu,
-                                  const SparseCsrTensor& x,
-                                  const SparseCsrTensor& y,
-                                  const DDim& dense_dims) {
-  auto out = sparse::ElementWiseDivideCsr<T>(dev_ctx_cpu, x, y);
-  auto dresult =
-      sparse::ElementWiseDivideCsrGrad<T>(dev_ctx_cpu, x, y, out, out);
-  DenseTensor expectdy = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-  DenseTensor expectdx = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-  phi::DivideGradKernel<T>(dev_ctx_cpu,
-                           sparse::CsrToDense<T>(dev_ctx_cpu, x),
-                           sparse::CsrToDense<T>(dev_ctx_cpu, y),
-                           sparse::CsrToDense<T>(dev_ctx_cpu, out),
-                           sparse::CsrToDense<T>(dev_ctx_cpu, out),
-                           -1,
-                           &expectdx,
-                           &expectdy);
-  const DenseTensor densedX = sparse::CsrToDense<T>(dev_ctx_cpu, dresult[0]);
-  const DenseTensor densedY = sparse::CsrToDense<T>(dev_ctx_cpu, dresult[1]);
-  const DenseTensor denseOut = sparse::CsrToDense<T>(dev_ctx_cpu, out);
-  for (int j = 0; j < densedX.numel(); ++j) {
-    auto actualResultRow = densedX.template data<T>()[j];
-    auto expectResultRow = expectdx.template data<T>()[j];
-    if (!std::isnan(expectResultRow)) {
-      ASSERT_DOUBLE_EQ(expectResultRow, actualResultRow);
-    }
-  }
-  for (int j = 0; j < densedY.numel(); ++j) {
-    auto actualResultRow = densedY.template data<T>()[j];
-    auto expectResultRow = expectdy.template data<T>()[j];
-    if (!std::isnan(expectResultRow)) {
-      ASSERT_DOUBLE_EQ(expectResultRow, actualResultRow);
-    }
-  }
-}
-
-template <typename T, typename Context>
-void TestElementWiseDivideCooGrad(const Context& dev_ctx_cpu,
-                                  const SparseCooTensor& x,
-                                  const SparseCooTensor& y,
-                                  const DDim& dense_dims) {
-  auto out = sparse::ElementWiseDivideCoo<T>(dev_ctx_cpu, x, y);
-  auto dresult =
-      sparse::ElementWiseDivideCooGrad<T>(dev_ctx_cpu, x, y, out, out);
-  DenseTensor expectdy = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-  DenseTensor expectdx = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-  phi::DivideGradKernel<T>(dev_ctx_cpu,
-                           sparse::CooToDense<T>(dev_ctx_cpu, x),
-                           sparse::CooToDense<T>(dev_ctx_cpu, y),
-                           sparse::CooToDense<T>(dev_ctx_cpu, out),
-                           sparse::CooToDense<T>(dev_ctx_cpu, out),
-                           -1,
-                           &expectdx,
-                           &expectdy);
-  const DenseTensor densedX = sparse::CooToDense<T>(dev_ctx_cpu, dresult[0]);
-  const DenseTensor densedY = sparse::CooToDense<T>(dev_ctx_cpu, dresult[1]);
-  const DenseTensor denseOut = sparse::CooToDense<T>(dev_ctx_cpu, out);
-  for (int j = 0; j < densedX.numel(); ++j) {
-    auto actualResultRow = densedX.template data<T>()[j];
-    auto expectResultRow = expectdx.template data<T>()[j];
-    if (!std::isnan(expectResultRow)) {
-      ASSERT_DOUBLE_EQ(expectResultRow, actualResultRow);
-    }
-  }
-  for (int j = 0; j < densedY.numel(); ++j) {
-    auto actualResultRow = densedY.template data<T>()[j];
-    auto expectResultRow = expectdy.template data<T>()[j];
-    if (!std::isnan(expectResultRow)) {
-      ASSERT_DOUBLE_EQ(expectResultRow, actualResultRow);
-    }
-  }
-}
-
-TEST(DEV_API, sparse_elementwise_csr_grad_kernel_float) {
-  using T = float;
-  DDim dense_dims = phi::make_ddim({2, 3, 4});
-
-  std::vector<T> x_dense_data = {0.0, 0.0, 4.0, 2.0, 6.0, 3.0, 0.2, 0.1,
-                                 2.2, 1.1, 4.2, 2.1, 0.4, 0.2, 0.0, 0.0,
-                                 4.4, 2.2, 0.6, 0.3, 2.6, 1.3, 0.0, 0.0};
-
-  std::vector<T> y_dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 3.5,
-                                 0.7, 0.0, 3.5, 0.7, 3.2, 0.1, 0.0, 3.2,
-                                 1.0, 0.0, 1.2, 0.5, 0.7, 3.3, 0.0, 9.0};
-
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  phi::DenseTensor dense_x(
-      alloc.get(),
-      phi::DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-  auto* dense_x_data = dense_x.mutable_data<T>(paddle::platform::CPUPlace());
-  memcpy(dense_x_data, x_dense_data.data(), x_dense_data.size() * sizeof(T));
-
-  phi::DenseTensor dense_y(
-      alloc.get(),
-      phi::DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-  auto* dense_y_data = dense_y.mutable_data<T>(paddle::platform::CPUPlace());
-  memcpy(dense_y_data, y_dense_data.data(), y_dense_data.size() * sizeof(T));
-
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  dev_ctx_cpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-
-  auto csr_x = sparse::DenseToCsr<T>(dev_ctx_cpu, dense_x);
-  auto csr_y = sparse::DenseToCsr<T>(dev_ctx_cpu, dense_y);
-
-  auto dx = sparse::DenseToCsr<T>(dev_ctx_cpu, dense_y);
-  auto dy = sparse::DenseToCsr<T>(dev_ctx_cpu, dense_x);
-
-  TestElementWiseAddCsrGrad<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-  TestElementWiseSubtractCsrGrad<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-  TestElementWiseMultiplyCsrGrad<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-  TestElementWiseDivideCsrGrad<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-}
-
-TEST(DEV_API, sparse_elementwise_coo_grad_kernel_double) {
-  using T = double;
-  int64_t sparse_dim = 2;
-  DDim dense_dims = phi::make_ddim({3, 4});
-  std::vector<T> x_dense_data = {
-      0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0, 0.0, 3.2, 0.0, 0.0};
-  std::vector<T> y_dense_data = {
-      0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 3.5, 0.7, 0.0, 3.5, 0.7};
-
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  phi::DenseTensor dense_x(
-      alloc.get(),
-      phi::DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-  auto* dense_x_data = dense_x.mutable_data<T>(paddle::platform::CPUPlace());
-  memcpy(dense_x_data, x_dense_data.data(), x_dense_data.size() * sizeof(T));
-
-  phi::DenseTensor dense_y(
-      alloc.get(),
-      phi::DenseTensorMeta(DataType::FLOAT32, dense_dims, DataLayout::NCHW));
-  auto* dense_y_data = dense_y.mutable_data<T>(paddle::platform::CPUPlace());
-  memcpy(dense_y_data, y_dense_data.data(), y_dense_data.size() * sizeof(T));
-
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  dev_ctx_cpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-
-  auto csr_x = sparse::DenseToCoo<T>(dev_ctx_cpu, dense_x, sparse_dim);
-  auto csr_y = sparse::DenseToCoo<T>(dev_ctx_cpu, dense_y, sparse_dim);
-
-  auto dx = sparse::DenseToCoo<T>(dev_ctx_cpu, dense_y, sparse_dim);
-  auto dy = sparse::DenseToCoo<T>(dev_ctx_cpu, dense_x, sparse_dim);
-
-  TestElementWiseAddCooGrad<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-  TestElementWiseSubtractCooGrad<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-  TestElementWiseMultiplyCooGrad<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-  TestElementWiseDivideCooGrad<T>(dev_ctx_cpu, csr_x, csr_y, dense_dims);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sparse_pool_dev_api.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
-#include "paddle/phi/common/place.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/tensor_utils.h"
-#include "paddle/phi/kernels/sparse/coalesce_kernel.h"
-#include "paddle/phi/kernels/sparse/pool_grad_kernel.h"
-#include "paddle/phi/kernels/sparse/pool_kernel.h"
-
-namespace phi {
-namespace tests {
-
-template <typename T1, typename T2>
-std::vector<T2> cast(const std::vector<T1>& in) {
-  std::vector<T2> out(in.size());
-  for (uint64_t i = 0; i < in.size(); i++) {
-    out[i] = static_cast<T2>(in[i]);
-  }
-  return out;
-}
-template <typename T, typename IntT = int>
-void TestMaxPoolBase(const std::vector<IntT>& indices,
-                     const std::vector<T>& features,
-                     const DDim& x_dims,
-                     const std::vector<IntT>& correct_out_indices,
-                     const std::vector<T>& correct_out_features,
-                     const DDim& correct_out_dims,
-                     const int non_zero_num,
-                     const std::vector<int>& kernel_sizes,
-                     const std::vector<int>& paddings,
-                     const std::vector<int>& strides,
-                     const std::vector<int>& dilations,
-                     const float diff = 1e-3,
-                     const bool backward = false,
-                     const std::vector<T> features_grad = {}) {
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  dev_ctx_cpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-
-  const int in_channels = x_dims[4];
-  const int out_channels = in_channels;
-
-  auto indices_dtype = paddle::experimental::CppTypeToDataType<IntT>::Type();
-  DenseTensor indices_tensor = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(indices_dtype, {4, non_zero_num}, DataLayout::NCHW));
-  memcpy(indices_tensor.data<IntT>(),
-         indices.data(),
-         indices.size() * sizeof(IntT));
-  DenseTensor features_tensor = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(paddle::experimental::CppTypeToDataType<T>::Type(),
-                      {non_zero_num, in_channels},
-                      DataLayout::NHWC));
-  memcpy(
-      features_tensor.data<T>(), features.data(), features.size() * sizeof(T));
-
-  SparseCooTensor x_tensor(indices_tensor, features_tensor, x_dims);
-
-  auto f_verify = [&](const T* real_data, const std::vector<T>& correct_data) {
-    for (uint64_t i = 0; i < correct_data.size(); i++) {
-      float tmp = std::fabs(static_cast<float>(correct_data[i] - real_data[i]));
-      ASSERT_LT(tmp, diff);
-    }
-  };
-
-  if (!std::is_same<T, phi::dtype::float16>::value) {
-    DenseTensor rulebook, counter;
-    SparseCooTensor out = sparse::MaxPoolCoo<T>(dev_ctx_cpu,
-                                                x_tensor,
-                                                kernel_sizes,
-                                                paddings,
-                                                dilations,
-                                                strides,
-                                                &rulebook,
-                                                &counter);
-
-    ASSERT_EQ(correct_out_dims.size(), out.dims().size());
-    for (int i = 0; i < correct_out_dims.size(); i++) {
-      ASSERT_EQ(correct_out_dims[i], out.dims()[i]);
-    }
-    ASSERT_EQ((int64_t)correct_out_features.size() / out_channels, out.nnz());
-
-    int cmp_indices = memcmp(correct_out_indices.data(),
-                             out.non_zero_indices().data<IntT>(),
-                             correct_out_indices.size() * sizeof(IntT));
-    ASSERT_EQ(cmp_indices, 0);
-
-    f_verify(out.non_zero_elements().data<T>(), correct_out_features);
-
-    if (backward) {
-      SparseCooTensor x_grad = sparse::MaxPoolCooGrad<T>(
-          dev_ctx_cpu, x_tensor, rulebook, counter, out, out, kernel_sizes);
-      f_verify(x_grad.non_zero_elements().data<T>(), features_grad);
-    }
-  }
-
-// test gpu
-#if defined(PADDLE_WITH_CUDA)
-  phi::GPUContext dev_ctx_gpu{phi::GPUPlace()};
-  dev_ctx_gpu.PartialInitWithoutAllocator();
-  dev_ctx_gpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(dev_ctx_gpu.GetPlace(), dev_ctx_gpu.stream())
-          .get());
-  dev_ctx_gpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  dev_ctx_gpu.SetPinnedAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CUDAPinnedPlace())
-          .get());
-  dev_ctx_gpu.PartialInitWithAllocator();
-
-  DenseTensor d_indices_tensor = phi::Empty(
-      dev_ctx_gpu,
-      DenseTensorMeta(indices_dtype, {4, non_zero_num}, DataLayout::NCHW));
-  phi::Copy(
-      dev_ctx_gpu, indices_tensor, phi::GPUPlace(), true, &d_indices_tensor);
-
-  DenseTensor d_features_tensor =
-      phi::EmptyLike<T>(dev_ctx_gpu, features_tensor);
-  phi::Copy(
-      dev_ctx_gpu, features_tensor, phi::GPUPlace(), true, &d_features_tensor);
-
-  SparseCooTensor d_x_tensor(d_indices_tensor, d_features_tensor, x_dims);
-
-  DenseTensor d_rulebook, d_counter;
-  SparseCooTensor d_out = sparse::MaxPoolCoo<T>(dev_ctx_gpu,
-                                                d_x_tensor,
-                                                kernel_sizes,
-                                                paddings,
-                                                dilations,
-                                                strides,
-                                                &d_rulebook,
-                                                &d_counter);
-
-  SparseCooTensor tmp_d_out = sparse::CoalesceCoo<T>(dev_ctx_gpu, d_out);
-
-  ASSERT_EQ(correct_out_dims.size(), d_out.dims().size());
-  ASSERT_EQ((int64_t)correct_out_features.size() / out_channels, d_out.nnz());
-  for (int i = 0; i < correct_out_dims.size(); i++) {
-    ASSERT_EQ(correct_out_dims[i], d_out.dims()[i]);
-  }
-
-  DenseTensor h_indices_tensor = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(indices_dtype, {4, d_out.nnz()}, DataLayout::NCHW));
-  phi::Copy(dev_ctx_gpu,
-            tmp_d_out.non_zero_indices(),
-            phi::CPUPlace(),
-            true,
-            &h_indices_tensor);
-
-  int cmp_indices2 = memcmp(correct_out_indices.data(),
-                            h_indices_tensor.data<IntT>(),
-                            correct_out_indices.size() * sizeof(IntT));
-  ASSERT_EQ(cmp_indices2, 0);
-
-  DenseTensor h_features_tensor =
-      phi::EmptyLike<T>(dev_ctx_cpu, d_out.non_zero_elements());
-
-  phi::Copy(dev_ctx_gpu,
-            tmp_d_out.non_zero_elements(),
-            phi::CPUPlace(),
-            true,
-            &h_features_tensor);
-  f_verify(h_features_tensor.data<T>(), correct_out_features);
-
-  if (backward) {
-    SparseCooTensor x_grad = sparse::MaxPoolCooGrad<T>(dev_ctx_gpu,
-                                                       d_x_tensor,
-                                                       d_rulebook,
-                                                       d_counter,
-                                                       d_out,
-                                                       d_out,
-                                                       kernel_sizes);
-    DenseTensor h_features_grad =
-        phi::EmptyLike<T>(dev_ctx_cpu, x_grad.non_zero_elements());
-    phi::Copy(dev_ctx_gpu,
-              x_grad.non_zero_elements(),
-              phi::CPUPlace(),
-              true,
-              &h_features_grad);
-    f_verify(h_features_grad.data<T>(), features_grad);
-  }
-#endif
-}
-
-template <typename IntT = int>
-void TestMaxPool(const std::vector<IntT>& indices,
-                 const std::vector<float>& features,
-                 const DDim& x_dims,
-                 const std::vector<IntT>& correct_out_indices,
-                 const std::vector<float>& correct_out_features,
-                 const DDim& correct_out_dims,
-                 const int non_zero_num,
-                 const std::vector<int>& kernel_sizes,
-                 const std::vector<int>& paddings,
-                 const std::vector<int>& strides,
-                 const std::vector<int>& dilations,
-                 const float diff = 1e-3,
-                 const bool backward = false,
-                 const std::vector<float> features_grad = {}) {
-  // test float
-  TestMaxPoolBase<float, IntT>(indices,
-                               features,
-                               x_dims,
-                               correct_out_indices,
-                               correct_out_features,
-                               correct_out_dims,
-                               non_zero_num,
-                               kernel_sizes,
-                               paddings,
-                               strides,
-                               dilations,
-                               diff,
-                               backward,
-                               features_grad);
-  // test double
-  TestMaxPoolBase<double, IntT>(indices,
-                                cast<float, double>(features),
-                                x_dims,
-                                correct_out_indices,
-                                cast<float, double>(correct_out_features),
-                                correct_out_dims,
-                                non_zero_num,
-                                kernel_sizes,
-                                paddings,
-                                strides,
-                                dilations,
-                                diff,
-                                backward,
-                                cast<float, double>(features_grad));
-}
-
-TEST(DEV_API, sparse_maxpool) {
-  const int channels = 1;
-  DDim x_dims = {1, 1, 4, 4, channels};
-  DDim out_dims = {1, 1, 2, 2, channels};
-  std::vector<int> kernel_sizes = {1, 3, 3};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 3;
-  std::vector<int> indices = {0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 1, 2};
-  std::vector<float> features = {1, 2, 3};
-  std::vector<int> out_indices = {
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      1,
-      1,
-      0,
-      1,
-      0,
-      1,
-  };
-  std::vector<float> out_features = {2, 2, 3, 3};
-  std::vector<float> x_grad = {0, 4, 6};
-
-  TestMaxPool(indices,
-              features,
-              x_dims,
-              out_indices,
-              out_features,
-              out_dims,
-              non_zero_num,
-              kernel_sizes,
-              paddings,
-              strides,
-              dilations,
-              1e-6,
-              true,
-              x_grad);
-}
-
-TEST(DEV_API, sparse_maxpool_stride) {
-  const int channels = 1;
-  DDim x_dims = {1, 1, 4, 4, channels};
-  DDim out_dims = {1, 1, 1, 1, channels};
-  std::vector<int> kernel_sizes = {1, 3, 3};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {2, 2, 2};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 3;
-  std::vector<int> indices = {0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 1, 2};
-  std::vector<float> features = {1, 2, 3};
-  std::vector<int> out_indices = {0, 0, 0, 0};
-  std::vector<float> out_features = {2};
-  std::vector<float> x_grad = {0, 2, 0};
-
-  TestMaxPool(indices,
-              features,
-              x_dims,
-              out_indices,
-              out_features,
-              out_dims,
-              non_zero_num,
-              kernel_sizes,
-              paddings,
-              strides,
-              dilations,
-              1e-6,
-              true,
-              x_grad);
-}
-
-TEST(DEV_API, sparse_maxpool_channel) {
-  const int channels = 2;
-  DDim x_dims = {1, 1, 4, 4, channels};
-  DDim out_dims = {1, 1, 2, 2, channels};
-  std::vector<int> kernel_sizes = {1, 3, 3};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 3;
-  std::vector<int> indices = {0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 1, 2};
-  std::vector<float> features = {1, 1, 2, 2, 3, 3};
-  std::vector<int> out_indices = {
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      1,
-      1,
-      0,
-      1,
-      0,
-      1,
-  };
-  std::vector<float> out_features = {2, 2, 2, 2, 3, 3, 3, 3};
-  std::vector<float> x_grad = {0, 0, 4, 4, 6, 6};
-
-  TestMaxPool(indices,
-              features,
-              x_dims,
-              out_indices,
-              out_features,
-              out_dims,
-              non_zero_num,
-              kernel_sizes,
-              paddings,
-              strides,
-              dilations,
-              1e-6,
-              true,
-              x_grad);
-}
-
-TEST(DEV_API, sparse_maxpool3d) {
-  const int channels = 2;
-  DDim x_dims = {1, 5, 4, 4, channels};
-  DDim out_dims = {1, 3, 2, 2, channels};
-  std::vector<int> kernel_sizes = {3, 3, 3};
-  std::vector<int> paddings = {0, 0, 0};
-  std::vector<int> strides = {1, 1, 1};
-  std::vector<int> dilations = {1, 1, 1};
-
-  const int non_zero_num = 3;
-  std::vector<int> indices = {0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 1, 2};
-  std::vector<float> features = {1, 1, 2, 2, 3, 3};
-  std::vector<int> out_indices = {
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      0,
-      1,
-      1,
-      0,
-      1,
-      0,
-      1,
-  };
-  std::vector<float> out_features = {2, 2, 2, 2, 3, 3, 3, 3};
-  std::vector<float> x_grad = {0, 0, 4, 4, 6, 6};
-
-  TestMaxPool(indices,
-              features,
-              x_dims,
-              out_indices,
-              out_features,
-              out_dims,
-              non_zero_num,
-              kernel_sizes,
-              paddings,
-              strides,
-              dilations,
-              1e-6,
-              true,
-              x_grad);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_sparse_transpose_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sparse_transpose_dev_api.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
-#include "paddle/phi/common/place.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/empty_kernel.h"
-#include "paddle/phi/kernels/sparse/empty_kernel.h"
-#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"
-#include "paddle/phi/kernels/sparse/unary_grad_kernel.h"
-#include "paddle/phi/kernels/sparse/unary_kernel.h"
-#include "paddle/phi/kernels/transpose_grad_kernel.h"
-#include "paddle/phi/kernels/transpose_kernel.h"
-namespace phi {
-namespace tests {
-
-TEST(DEV_API, sparse_transpose_coo) {
-  std::vector<float> data = {0, -1, 0, 2, 0, 0, -3, 0, 4, 5, 0, 0};
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  dev_ctx_cpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-
-  DenseTensor dense_x = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(
-          DataType::FLOAT32, phi::make_ddim({3, 2, 2}), DataLayout::NCHW));
-  memcpy(dense_x.data<float>(), data.data(), data.size() * sizeof(float));
-  auto sparse_coo = sparse::DenseToCoo<float>(dev_ctx_cpu, dense_x, 3);
-  auto sparse_out =
-      sparse::TransposeCoo<float>(dev_ctx_cpu, sparse_coo, {2, 1, 0});
-  DenseTensor dense_out = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(
-          DataType::FLOAT32, phi::make_ddim({2, 2, 3}), DataLayout::NCHW));
-  TransposeKernel<float>(dev_ctx_cpu, dense_x, {2, 1, 0}, &dense_out);
-
-  // backward
-  DenseTensor dense_grad_x = phi::EmptyLike<float>(dev_ctx_cpu, dense_out);
-  TransposeGradKernel<float>(dev_ctx_cpu, dense_out, {2, 1, 0}, &dense_grad_x);
-  SparseCooTensor sparse_grad_x;
-  sparse::EmptyLikeCooKernel<float>(dev_ctx_cpu, sparse_coo, &sparse_grad_x);
-
-  SparseCooTensor sparse_out_grad(
-      sparse_coo.indices(), sparse_coo.values(), {2, 2, 3});
-  sparse::TransposeCooGradKernel<float>(
-      dev_ctx_cpu, sparse_out_grad, {2, 1, 0}, &sparse_grad_x);
-}
-
-TEST(DEV_API, sparse_transpose_csr_case1) {
-  std::vector<float> data = {0, -1, 0, 2, 0, 0, -3, 0, 4, 5, 0, 0};
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  dev_ctx_cpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-
-  DenseTensor dense_x = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(
-          DataType::FLOAT32, phi::make_ddim({3, 2, 2}), DataLayout::NCHW));
-  memcpy(dense_x.data<float>(), data.data(), data.size() * sizeof(float));
-  auto sparse_csr = sparse::DenseToCsr<float>(dev_ctx_cpu, dense_x);
-
-  auto sparse_out =
-      sparse::TransposeCsr<float>(dev_ctx_cpu, sparse_csr, {2, 1, 0});
-  DenseTensor dense_out = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(
-          DataType::FLOAT32, phi::make_ddim({2, 2, 3}), DataLayout::NCHW));
-  TransposeKernel<float>(dev_ctx_cpu, dense_x, {2, 1, 0}, &dense_out);
-
-  // backward
-  DenseTensor dense_grad_x = phi::EmptyLike<float>(dev_ctx_cpu, dense_out);
-  TransposeGradKernel<float>(dev_ctx_cpu, dense_out, {2, 1, 0}, &dense_grad_x);
-  SparseCsrTensor sparse_grad_x;
-  sparse::EmptyLikeCsrKernel<float>(dev_ctx_cpu, sparse_csr, &sparse_grad_x);
-  sparse::TransposeCsrGradKernel<float>(
-      dev_ctx_cpu, sparse_out, {2, 1, 0}, &sparse_grad_x);
-}
-
-TEST(DEV_API, sparse_transpose_csr_case2) {
-  std::vector<float> data = {0, -1, 0, 2, 0, 0, -3, 0, 4, 5, 0, 0};
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  dev_ctx_cpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-
-  DenseTensor dense_x = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(
-          DataType::FLOAT32, phi::make_ddim({3, 2, 2}), DataLayout::NCHW));
-  memcpy(dense_x.data<float>(), data.data(), data.size() * sizeof(float));
-  auto sparse_csr = sparse::DenseToCsr<float>(dev_ctx_cpu, dense_x);
-
-  auto sparse_out =
-      sparse::TransposeCsr<float>(dev_ctx_cpu, sparse_csr, {1, 2, 0});
-  DenseTensor dense_out = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(
-          DataType::FLOAT32, phi::make_ddim({2, 2, 3}), DataLayout::NCHW));
-  TransposeKernel<float>(dev_ctx_cpu, dense_x, {1, 2, 0}, &dense_out);
-}
-
-TEST(DEV_API, sparse_transpose_csr_case3) {
-  std::vector<float> data = {0, -1, 0, 2, 0, 0, -3, 0, 4, 5, 0, 0};
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-  dev_ctx_cpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CPUPlace())
-          .get());
-
-  DenseTensor dense_x = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(
-          DataType::FLOAT32, phi::make_ddim({3, 4}), DataLayout::NCHW));
-  memcpy(dense_x.data<float>(), data.data(), data.size() * sizeof(float));
-  auto sparse_csr = sparse::DenseToCsr<float>(dev_ctx_cpu, dense_x);
-
-  auto sparse_out =
-      sparse::TransposeCsr<float>(dev_ctx_cpu, sparse_csr, {1, 0});
-  DenseTensor dense_out = phi::Empty(
-      dev_ctx_cpu,
-      DenseTensorMeta(
-          DataType::FLOAT32, phi::make_ddim({4, 3}), DataLayout::NCHW));
-  TransposeKernel<float>(dev_ctx_cpu, dense_x, {1, 0}, &dense_out);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sparse_utils_dev_api.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/gpu/gpu_context.h"
-#include "paddle/phi/common/place.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/core/tensor_utils.h"
-#include "paddle/phi/kernels/sparse/sparse_utils_kernel.h"
-
-namespace phi {
-namespace tests {
-
-template <typename ValueT, typename IndicesT>
-inline void CheckResult(
-    const DeviceContext* dev_ctx,
-    const SparseCooTensor& coo,
-    const std::vector<ValueT> non_zero_elements,
-    const std::vector<IndicesT>& non_zero_indices,
-    const int64_t non_zero_num,
-    const std::shared_ptr<paddle::experimental::DefaultAllocator>& alloc) {
-  const DenseTensor real_indices = coo.non_zero_indices();
-  const DenseTensor real_elements = coo.non_zero_elements();
-  ASSERT_EQ(coo.nnz(), non_zero_num);
-
-#if defined(PADDLE_WITH_CUDA)
-  if (coo.place() == phi::GPUPlace()) {
-    const auto* dev_ctx_gpu = static_cast<const phi::GPUContext*>(dev_ctx);
-    DenseTensor indices(
-        alloc.get(),
-        DenseTensorMeta(
-            DataType::INT64, real_indices.dims(), real_indices.layout()));
-
-    DenseTensor elements(alloc.get(),
-                         DenseTensorMeta(real_elements.dtype(),
-                                         real_elements.dims(),
-                                         real_elements.layout()));
-    phi::Copy(*dev_ctx_gpu, real_indices, indices.place(), true, &indices);
-    phi::Copy(*dev_ctx_gpu, real_elements, elements.place(), true, &elements);
-
-    int cmp_indices = memcmp(indices.data<IndicesT>(),
-                             non_zero_indices.data(),
-                             non_zero_indices.size() * sizeof(IndicesT));
-    ASSERT_EQ(cmp_indices, 0);
-    int cmp_elements = memcmp(elements.data<ValueT>(),
-                              non_zero_elements.data(),
-                              non_zero_elements.size() * sizeof(ValueT));
-    ASSERT_EQ(cmp_elements, 0);
-  } else {
-#endif
-    int cmp_indices = memcmp(real_indices.data<IndicesT>(),
-                             non_zero_indices.data(),
-                             non_zero_indices.size() * sizeof(IndicesT));
-    ASSERT_EQ(cmp_indices, 0);
-    int cmp_elements = memcmp(real_elements.data<ValueT>(),
-                              non_zero_elements.data(),
-                              non_zero_elements.size() * sizeof(ValueT));
-    ASSERT_EQ(cmp_elements, 0);
-#if defined(PADDLE_WITH_CUDA)
-  }
-#endif
-}
-
-template <typename T>
-void TestDenseToSparseCoo(const DenseTensor& dense_x,
-                          const int64_t sparse_dim,
-                          const std::vector<T>& non_zero_data,
-                          const std::vector<int64_t>& indices_data,
-                          const int64_t non_zero_num) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-
-  // 1. test cpu
-  auto cpu_sparse_out = sparse::DenseToCoo<T>(dev_ctx_cpu, dense_x, sparse_dim);
-  CheckResult<T, int64_t>(&dev_ctx_cpu,
-                          cpu_sparse_out,
-                          non_zero_data,
-                          indices_data,
-                          non_zero_num,
-                          alloc);
-
-// 2. test cuda
-#if defined(PADDLE_WITH_CUDA)
-  phi::GPUContext dev_ctx_gpu{phi::GPUPlace()};
-  dev_ctx_gpu.PartialInitWithoutAllocator();
-  dev_ctx_gpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(dev_ctx_gpu.GetPlace(), dev_ctx_gpu.stream())
-          .get());
-  dev_ctx_gpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  dev_ctx_gpu.SetPinnedAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CUDAPinnedPlace())
-          .get());
-  dev_ctx_gpu.PartialInitWithAllocator();
-
-  const auto cuda_alloc =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          paddle::platform::CUDAPlace());
-  DenseTensor d_dense_x(
-      cuda_alloc.get(),
-      DenseTensorMeta(dense_x.dtype(), dense_x.dims(), dense_x.layout()));
-
-  phi::Copy(dev_ctx_gpu, dense_x, phi::GPUPlace(), true, &d_dense_x);
-  auto sparse_out = sparse::DenseToCoo<T>(dev_ctx_gpu, d_dense_x, sparse_dim);
-  CheckResult<T, int64_t>(&dev_ctx_gpu,
-                          sparse_out,
-                          non_zero_data,
-                          indices_data,
-                          non_zero_num,
-                          alloc);
-#endif
-}
-
-TEST(DEV_API, to_sparse_coo) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  std::default_random_engine random(time(NULL));
-  std::uniform_real_distribution<float> dis(0.0, 1.0);
-  std::uniform_int_distribution<int> dis_int(4, 64);
-  const int rows = dis_int(random), cols = dis_int(random);
-  DenseTensor dense_x(
-      alloc.get(),
-      DenseTensorMeta(DataType::FLOAT32, {rows, cols}, DataLayout::NCHW));
-
-  phi::CPUPlace cpu;
-  auto* dense_x_data = dense_x.mutable_data<float>(cpu);
-  std::vector<float> dense_data(rows * cols);
-  std::vector<float> non_zero_data;
-  std::vector<int64_t> rows_data, cols_data;
-  const int64_t sparse_dim = 2;
-
-  const float zero_rate = dis(random);
-
-  int64_t non_zero_num = 0;
-  for (int i = 0; i < rows; i++) {
-    for (int j = 0; j < cols; j++) {
-      bool iszero = dis(random) < zero_rate;
-      if (iszero) {
-        dense_data[i * cols + j] = 0.0;
-      } else {
-        float data = dis(random);
-        dense_data[i * cols + j] = data;
-        non_zero_data.push_back(data);
-        rows_data.push_back(i);
-        cols_data.push_back(j);
-        non_zero_num += 1;
-      }
-    }
-  }
-
-  std::copy(
-      dense_data.data(), dense_data.data() + dense_data.size(), dense_x_data);
-
-  std::vector<int64_t> indices_data(non_zero_num * 2);
-  memcpy(&indices_data[0], &rows_data[0], non_zero_num * sizeof(int64_t));
-  memcpy(&indices_data[non_zero_num],
-         &cols_data[0],
-         non_zero_num * sizeof(int64_t));
-
-  TestDenseToSparseCoo(
-      dense_x, sparse_dim, non_zero_data, indices_data, non_zero_num);
-}
-
-TEST(DEV_API, to_sparse_coo_hybird) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  DenseTensor dense_x(
-      alloc.get(),
-      DenseTensorMeta(DataType::FLOAT32, {3, 3}, DataLayout::NCHW));
-
-  phi::CPUPlace cpu;
-  const int64_t sparse_dim = 1;  // the non zero element is a vector
-  auto* dense_x_data = dense_x.mutable_data<float>(cpu);
-  float dense_data[3][3] = {{0.0, 1.0, 0.0}, {0.0, 0.0, 0.0}, {3.2, 0.0, 0.0}};
-  std::vector<float> non_zero_data = {
-      /*element0(*/ 0.0, 1.0, 0.0 /*)*/, /*element1(*/ 3.2, 0.0, 0.0 /*)*/};
-  std::vector<int64_t> indices_data = {0, 2};
-  const int64_t non_zero_num = 2;
-
-  std::copy(&dense_data[0][0], &dense_data[0][0] + 9, dense_x_data);
-  TestDenseToSparseCoo(
-      dense_x, sparse_dim, non_zero_data, indices_data, non_zero_num);
-}
-
-TEST(DEV_API, to_sparse_coo_fp16) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  DenseTensor dense_x(
-      alloc.get(),
-      DenseTensorMeta(DataType::FLOAT16, {3, 3}, DataLayout::NCHW));
-
-  phi::CPUPlace cpu;
-  const int64_t sparse_dim = 2;
-  const int64_t non_zero_num = 2;
-  auto* dense_x_data = dense_x.mutable_data<phi::dtype::float16>(cpu);
-  float dense_data[3][3] = {{0.0, 1.0, 0.0}, {0.0, 0.0, 0.0}, {3.2, 0.0, 0.0}};
-  std::vector<float> data = {1.0, 3.2};
-  std::vector<phi::dtype::float16> non_zero_data(non_zero_num);
-  for (int i = 0; i < non_zero_num; i++) {
-    non_zero_data[i] = static_cast<phi::dtype::float16>(data[i]);
-  }
-  std::vector<int64_t> indices_data = {0, 2, 1, 0};
-
-  std::copy(&dense_data[0][0], &dense_data[0][0] + 9, dense_x_data);
-  TestDenseToSparseCoo<paddle::float16>(
-      dense_x, sparse_dim, non_zero_data, indices_data, non_zero_num);
-}
-
-TEST(DEV_API, to_sparse_coo_batch) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  DenseTensor dense_x(
-      alloc.get(),
-      DenseTensorMeta(DataType::FLOAT32, {2, 3, 3}, DataLayout::NCHW));
-
-  phi::CPUPlace cpu;
-  const int64_t sparse_dim = 3;
-  const int64_t non_zero_num = 4;
-  auto* dense_x_data = dense_x.mutable_data<float>(cpu);
-  float dense_data[2][3][3] = {
-      {{0.0, 1.0, 0.0}, {0.0, 0.0, 0.0}, {2.0, 0.0, 0.0}},
-      {{0.0, 0.0, 0.0}, {0.0, 3.0, 0.0}, {4.0, 0.0, 0.0}}};
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 4.0};
-  std::vector<int64_t> indices_data = {0, 0, 1, 1, 0, 2, 1, 2, 1, 0, 1, 0};
-  /*
-      0, 0, 1, 1,
-      0, 2, 1, 2,
-      1, 0, 1, 0
-   */
-
-  std::copy(&dense_data[0][0][0], &dense_data[0][0][0] + 18, dense_x_data);
-  TestDenseToSparseCoo<float>(
-      dense_x, sparse_dim, non_zero_data, indices_data, non_zero_num);
-}
-
-template <typename T>
-void TestSparseCsrToCoo(const DDim& dense_dims,
-                        const std::vector<T>& non_zero_data,
-                        const std::vector<int64_t>& crows_data,
-                        const std::vector<int64_t>& cols_data,
-                        const std::vector<int64_t>& indices_data,
-                        const int64_t non_zero_num) {
-  int batchs = 1;
-  int rows = dense_dims[0];
-  if (dense_dims.size() == 3) {
-    batchs = dense_dims[0];
-    rows = dense_dims[1];
-  }
-  phi::DenseTensorMeta crows_meta(
-      DataType::INT64, {batchs * (rows + 1)}, DataLayout::NCHW);
-  phi::DenseTensorMeta cols_meta(
-      DataType::INT64, {non_zero_num}, DataLayout::NCHW);
-  phi::DenseTensorMeta values_meta(
-      paddle::experimental::CppTypeToDataType<T>::Type(),
-      {non_zero_num},
-      DataLayout::NCHW);
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::CPUPlace place;
-  phi::DenseTensor crows(alloc.get(), crows_meta);
-  phi::DenseTensor cols(alloc.get(), cols_meta);
-  phi::DenseTensor values(alloc.get(), values_meta);
-  memcpy(crows.mutable_data<int64_t>(place),
-         crows_data.data(),
-         crows_data.size() * sizeof(int64_t));
-  memcpy(cols.mutable_data<int64_t>(place),
-         cols_data.data(),
-         cols_data.size() * sizeof(int64_t));
-  memcpy(values.mutable_data<T>(place),
-         non_zero_data.data(),
-         non_zero_data.size() * sizeof(T));
-  phi::SparseCsrTensor csr(crows, cols, values, dense_dims);
-
-  // 1. test cpu
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  auto cpu_sparse_out = sparse::CsrToCoo<T>(dev_ctx_cpu, csr);
-  CheckResult<T, int64_t>(&dev_ctx_cpu,
-                          cpu_sparse_out,
-                          non_zero_data,
-                          indices_data,
-                          non_zero_num,
-                          alloc);
-// 2. test cuda
-#if defined(PADDLE_WITH_CUDA)
-  phi::GPUContext dev_ctx_gpu{phi::GPUPlace()};
-  dev_ctx_gpu.PartialInitWithoutAllocator();
-  dev_ctx_gpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(dev_ctx_gpu.GetPlace(), dev_ctx_gpu.stream())
-          .get());
-  dev_ctx_gpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  dev_ctx_gpu.SetPinnedAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CUDAPinnedPlace())
-          .get());
-  dev_ctx_gpu.PartialInitWithAllocator();
-
-  const auto cuda_alloc =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          paddle::platform::CUDAPlace());
-  phi::DenseTensor d_crows(cuda_alloc.get(), crows_meta);
-  phi::DenseTensor d_cols(cuda_alloc.get(), cols_meta);
-  phi::DenseTensor d_values(cuda_alloc.get(), values_meta);
-  phi::Copy(dev_ctx_gpu, crows, d_crows.place(), true, &d_crows);
-  phi::Copy(dev_ctx_gpu, cols, d_cols.place(), true, &d_cols);
-  phi::Copy(dev_ctx_gpu, values, d_values.place(), true, &d_values);
-  phi::SparseCsrTensor d_csr(d_crows, d_cols, d_values, dense_dims);
-  auto cuda_sparse_out = sparse::CsrToCoo<T>(dev_ctx_gpu, d_csr);
-  CheckResult<T, int64_t>(&dev_ctx_gpu,
-                          cuda_sparse_out,
-                          non_zero_data,
-                          indices_data,
-                          non_zero_num,
-                          alloc);
-#endif
-}
-
-TEST(DEV_API, sparse_csr_to_coo) {
-  DDim dense_dims = phi::make_ddim({3, 3});
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> indices_data = {0, 1, 1, 2, 1, 0, 2, 0};
-  std::vector<int64_t> cols_data = {1, 0, 2, 0};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4};
-  const int64_t non_zero_num = 4;
-  TestSparseCsrToCoo(dense_dims,
-                     non_zero_data,
-                     crows_data,
-                     cols_data,
-                     indices_data,
-                     non_zero_num);
-}
-
-TEST(DEV_API, sparse_csr_to_coo_batch_and_fp16) {
-  DDim dense_dims = phi::make_ddim({2, 3, 3});
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 3.2, 1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> cols_data = {1, 0, 2, 0, 1, 0, 2, 0};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4, 0, 1, 3, 4};
-  std::vector<int64_t> indices_data = {0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 2,
-                                       0, 1, 1, 2, 1, 0, 2, 0, 1, 0, 2, 0};
-  const int64_t non_zero_num = 8;
-  using float16 = phi::dtype::float16;
-  std::vector<float16> non_zero_data_fp16(non_zero_num);
-  for (int64_t i = 0; i < non_zero_num; i++) {
-    non_zero_data_fp16[i] = static_cast<float16>(non_zero_data[i]);
-  }
-  TestSparseCsrToCoo(dense_dims,
-                     non_zero_data_fp16,
-                     crows_data,
-                     cols_data,
-                     indices_data,
-                     non_zero_num);
-}
-
-template <typename ValueT, typename IndicesT>
-inline void CheckCsrResult(
-    const DeviceContext* dev_ctx,
-    const SparseCsrTensor& csr,
-    const std::vector<ValueT> non_zero_elements,
-    const std::vector<IndicesT>& non_zero_crows,
-    const std::vector<IndicesT>& non_zero_cols,
-    const int64_t non_zero_num,
-    const std::shared_ptr<paddle::experimental::DefaultAllocator>& alloc) {
-  const DenseTensor real_crows = csr.non_zero_crows();
-  const DenseTensor real_cols = csr.non_zero_cols();
-  const DenseTensor real_elements = csr.non_zero_elements();
-  ASSERT_EQ(csr.non_zero_cols().numel(), non_zero_num);
-
-#if defined(PADDLE_WITH_CUDA)
-  if (csr.place() == paddle::platform::CUDAPlace()) {
-    const auto* dev_ctx_gpu = static_cast<const phi::GPUContext*>(dev_ctx);
-    DenseTensor crows(
-        alloc.get(),
-        DenseTensorMeta(
-            DataType::INT64, real_crows.dims(), real_crows.layout()));
-    DenseTensor cols(
-        alloc.get(),
-        DenseTensorMeta(DataType::INT64, real_cols.dims(), real_cols.layout()));
-
-    DenseTensor elements(alloc.get(),
-                         DenseTensorMeta(real_elements.dtype(),
-                                         real_elements.dims(),
-                                         real_elements.layout()));
-    phi::Copy(*dev_ctx_gpu, real_crows, crows.place(), true, &crows);
-    phi::Copy(*dev_ctx_gpu, real_cols, cols.place(), true, &cols);
-    phi::Copy(*dev_ctx_gpu, real_elements, elements.place(), true, &elements);
-
-    int cmp_crows = memcmp(crows.data<IndicesT>(),
-                           non_zero_crows.data(),
-                           non_zero_crows.size() * sizeof(IndicesT));
-    ASSERT_EQ(cmp_crows, 0);
-    int cmp_cols = memcmp(cols.data<IndicesT>(),
-                          non_zero_cols.data(),
-                          non_zero_cols.size() * sizeof(IndicesT));
-    ASSERT_EQ(cmp_cols, 0);
-    int cmp_elements = memcmp(elements.data<ValueT>(),
-                              non_zero_elements.data(),
-                              non_zero_elements.size() * sizeof(ValueT));
-    ASSERT_EQ(cmp_elements, 0);
-  } else {
-#endif
-    int cmp_crows = memcmp(real_crows.data<IndicesT>(),
-                           non_zero_crows.data(),
-                           non_zero_crows.size() * sizeof(IndicesT));
-    ASSERT_EQ(cmp_crows, 0);
-    int cmp_cols = memcmp(real_cols.data<IndicesT>(),
-                          non_zero_cols.data(),
-                          non_zero_cols.size() * sizeof(IndicesT));
-    ASSERT_EQ(cmp_cols, 0);
-    int cmp_elements = memcmp(real_elements.data<ValueT>(),
-                              non_zero_elements.data(),
-                              non_zero_elements.size() * sizeof(ValueT));
-    ASSERT_EQ(cmp_elements, 0);
-#if defined(PADDLE_WITH_CUDA)
-  }
-#endif
-}
-
-template <typename T>
-void TestCooToCsr(const DDim& dense_dims,
-                  const int64_t& non_zero_num,
-                  const std::vector<T>& non_zero_data,
-                  const std::vector<int64_t>& non_zero_indices,
-                  const std::vector<int64_t>& cols_data,
-                  const std::vector<int64_t>& crows_data) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  phi::CPUPlace cpu;
-  DenseTensorMeta indices_meta(
-      DataType::INT64,
-      {static_cast<int64_t>(dense_dims.size()), non_zero_num},
-      DataLayout::NCHW);
-  DenseTensor indices(alloc.get(), indices_meta);
-  DenseTensorMeta values_meta(
-      paddle::experimental::CppTypeToDataType<T>::Type(),
-      {non_zero_num},
-      DataLayout::NCHW);
-  DenseTensor values(alloc.get(), values_meta);
-
-  memcpy(indices.mutable_data<int64_t>(cpu),
-         non_zero_indices.data(),
-         non_zero_indices.size() * sizeof(int64_t));
-  memcpy(values.mutable_data<T>(cpu),
-         non_zero_data.data(),
-         non_zero_data.size() * sizeof(T));
-  phi::SparseCooTensor coo(indices, values, dense_dims);
-
-  // 1. test cpu
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  auto cpu_sparse_out = sparse::CooToCsr<T>(dev_ctx_cpu, coo);
-  CheckCsrResult<T, int64_t>(&dev_ctx_cpu,
-                             cpu_sparse_out,
-                             non_zero_data,
-                             crows_data,
-                             cols_data,
-                             non_zero_num,
-                             alloc);
-
-// 2. test cuda
-#if defined(PADDLE_WITH_CUDA)
-  const auto cuda_alloc =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          paddle::platform::CUDAPlace());
-  phi::GPUContext dev_ctx_gpu{phi::GPUPlace()};
-  dev_ctx_gpu.PartialInitWithoutAllocator();
-  dev_ctx_gpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(dev_ctx_gpu.GetPlace(), dev_ctx_gpu.stream())
-          .get());
-  dev_ctx_gpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  dev_ctx_gpu.SetPinnedAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CUDAPinnedPlace())
-          .get());
-  dev_ctx_gpu.PartialInitWithAllocator();
-  phi::DenseTensor d_indices(cuda_alloc.get(), indices_meta);
-  phi::DenseTensor d_values(cuda_alloc.get(), values_meta);
-  phi::Copy(dev_ctx_gpu, indices, phi::GPUPlace(), true, &d_indices);
-  phi::Copy(dev_ctx_gpu, values, phi::GPUPlace(), true, &d_values);
-  phi::SparseCooTensor d_coo(d_indices, d_values, dense_dims);
-  auto cuda_sparse_out = sparse::CooToCsr<T>(dev_ctx_gpu, d_coo);
-  CheckCsrResult<T, int64_t>(&dev_ctx_gpu,
-                             cuda_sparse_out,
-                             non_zero_data,
-                             crows_data,
-                             cols_data,
-                             non_zero_num,
-                             alloc);
-#endif
-}
-
-TEST(DEV_API, coo_to_csr) {
-  // float dense_data[3][3] = {{0.0, 1.0, 0.0}, {2.0, 0.0, 3.0}, {3.2, 0.0,
-  // 0.0}};
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> non_zero_indices = {0, 1, 1, 2, 1, 0, 2, 0};
-  std::vector<int64_t> cols_data = {1, 0, 2, 0};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4};
-  const int64_t non_zero_num = 4;
-  auto dense_dims = phi::make_ddim({3, 3});
-  TestCooToCsr<float>(dense_dims,
-                      non_zero_num,
-                      non_zero_data,
-                      non_zero_indices,
-                      cols_data,
-                      crows_data);
-}
-
-TEST(DEV_API, batch_coo_to_csr) {
-  // float dense_data[2][3][3] =
-  //  {{{0.0, 1.0, 0.0}, {2.0, 0.0, 3.0}, {3.2, 0.0, 0.0}},
-  //  {{0.0, 1.0, 0.0}, {2.0, 0.0, 3.0}, {0.0, 0.0, 0.0}}};
-  const int64_t non_zero_num = 7;
-  std::vector<float> data = {1.0, 2.0, 3.0, 3.2, 1.0, 2.0, 3.0};
-  std::vector<phi::dtype::float16> non_zero_data(non_zero_num);
-  for (int64_t i = 0; i < non_zero_num; i++) {
-    non_zero_data[i] = static_cast<phi::dtype::float16>(data[i]);
-  }
-  std::vector<int64_t> non_zero_indices = {0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 2,
-                                           0, 1, 1, 1, 0, 2, 0, 1, 0, 2};
-  std::vector<int64_t> cols_data = {1, 0, 2, 0, 1, 0, 2};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4, 0, 1, 3, 3};
-  auto dense_dims = phi::make_ddim({2, 3, 3});
-  TestCooToCsr<phi::dtype::float16>(dense_dims,
-                                    non_zero_num,
-                                    non_zero_data,
-                                    non_zero_indices,
-                                    cols_data,
-                                    crows_data);
-}
-
-template <typename T>
-void TestDenseToSparseCsr(const DenseTensor& dense_x,
-                          const int64_t non_zero_num,
-                          const std::vector<T>& non_zero_data,
-                          const std::vector<int64_t>& crows_data,
-                          const std::vector<int64_t>& cols_data) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-
-  // 1. test cpu
-  auto cpu_sparse_out = sparse::DenseToCsr<T>(dev_ctx_cpu, dense_x);
-  CheckCsrResult<T, int64_t>(&dev_ctx_cpu,
-                             cpu_sparse_out,
-                             non_zero_data,
-                             crows_data,
-                             cols_data,
-                             non_zero_num,
-                             alloc);
-// 2. test cuda
-#if defined(PADDLE_WITH_CUDA)
-  const auto cuda_alloc =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          paddle::platform::CUDAPlace());
-  DenseTensor d_dense_x(
-      cuda_alloc.get(),
-      DenseTensorMeta(dense_x.dtype(), dense_x.dims(), dense_x.layout()));
-
-  phi::GPUContext dev_ctx_gpu{phi::GPUPlace()};
-  dev_ctx_gpu.PartialInitWithoutAllocator();
-  dev_ctx_gpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(dev_ctx_gpu.GetPlace(), dev_ctx_gpu.stream())
-          .get());
-  dev_ctx_gpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  dev_ctx_gpu.SetPinnedAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CUDAPinnedPlace())
-          .get());
-  dev_ctx_gpu.PartialInitWithAllocator();
-  phi::Copy(dev_ctx_gpu, dense_x, phi::GPUPlace(), true, &d_dense_x);
-  auto sparse_out = sparse::DenseToCsr<T>(dev_ctx_gpu, d_dense_x);
-
-  CheckCsrResult<T, int64_t>(&dev_ctx_gpu,
-                             sparse_out,
-                             non_zero_data,
-                             crows_data,
-                             cols_data,
-                             non_zero_num,
-                             alloc);
-#endif
-}
-
-TEST(DEV_API, dense_to_sparse_csr) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  DenseTensor dense_x(
-      alloc.get(),
-      DenseTensorMeta(
-          DataType::FLOAT32, phi::make_ddim({3, 3}), DataLayout::NCHW));
-
-  phi::CPUPlace cpu;
-  auto* dense_x_data = dense_x.mutable_data<float>(cpu);
-  float dense_data[3][3] = {{0.0, 1.0, 0.0}, {2.0, 0.0, 3.0}, {3.2, 0.0, 0.0}};
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> cols_data = {1, 0, 2, 0};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4};
-  const int64_t non_zero_num = 4;
-
-  std::copy(&dense_data[0][0], &dense_data[0][0] + 9, dense_x_data);
-  TestDenseToSparseCsr<float>(
-      dense_x, non_zero_num, non_zero_data, crows_data, cols_data);
-}
-
-TEST(DEV_API, dense_to_sparse_csr_batch) {
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  DenseTensor dense_x(
-      alloc.get(),
-      DenseTensorMeta(
-          DataType::FLOAT16, phi::make_ddim({2, 3, 3}), DataLayout::NCHW));
-
-  phi::CPUPlace cpu;
-  auto* dense_x_data = dense_x.mutable_data<phi::dtype::float16>(cpu);
-  const int64_t non_zero_num = 7;
-  float dense_data[2][3][3] = {
-      {{0.0, 1.0, 0.0}, {2.0, 0.0, 3.0}, {3.2, 0.0, 0.0}},
-      {{0.0, 1.0, 0.0}, {2.0, 0.0, 0.0}, {3.2, 0.0, 0.0}}};
-  std::vector<float> data = {1.0, 2.0, 3.0, 3.2, 1.0, 2.0, 3.2};
-  std::vector<phi::dtype::float16> non_zero_data(non_zero_num);
-  for (int64_t i = 0; i < non_zero_num; i++) {
-    non_zero_data[i] = static_cast<phi::dtype::float16>(data[i]);
-  }
-  std::vector<int64_t> cols_data = {1, 0, 2, 0, 1, 0, 0};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4, 0, 1, 2, 3};
-
-  float* dense_ptr = &dense_data[0][0][0];
-  for (int i = 0; i < 18; i++) {
-    dense_x_data[i] = static_cast<phi::dtype::float16>(dense_ptr[i]);
-  }
-  TestDenseToSparseCsr<phi::dtype::float16>(
-      dense_x, non_zero_num, non_zero_data, crows_data, cols_data);
-}
-
-template <typename T>
-void TestSparseCooToDense(const DDim& dense_dims,
-                          const std::vector<T>& dense_data,
-                          const std::vector<T>& non_zero_data,
-                          const std::vector<int64_t>& indices_data,
-                          const int64_t non_zero_num,
-                          const int64_t sparse_dim) {
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  DenseTensor dense_indices(
-      alloc.get(),
-      DenseTensorMeta(DataType::INT64,
-                      phi::make_ddim({sparse_dim, non_zero_num}),
-                      DataLayout::NCHW));
-  std::vector<int64_t> dense_elements_vec;
-  dense_elements_vec.push_back(non_zero_num);
-  for (int64_t i = sparse_dim; i < dense_dims.size(); i++) {
-    dense_elements_vec.push_back(dense_dims[i]);
-  }
-  DDim dense_elements_dims = phi::make_ddim(dense_elements_vec);
-  DenseTensor dense_elements(
-      alloc.get(),
-      DenseTensorMeta(paddle::experimental::CppTypeToDataType<T>::Type(),
-                      dense_elements_dims,
-                      DataLayout::NCHW));
-
-  phi::CPUPlace cpu_place;
-  memcpy(dense_indices.mutable_data<int64_t>(cpu_place),
-         indices_data.data(),
-         indices_data.size() * sizeof(int64_t));
-  memcpy(dense_elements.mutable_data<T>(cpu_place),
-         non_zero_data.data(),
-         non_zero_num * sizeof(T));
-
-  SparseCooTensor coo(dense_indices, dense_elements, dense_dims);
-
-  DenseTensor dense_out = sparse::CooToDense<T>(dev_ctx_cpu, coo);
-
-  int cmp = memcmp(
-      &dense_data[0], dense_out.data<T>(), sizeof(T) * dense_data.size());
-  ASSERT_EQ(cmp, 0);
-
-#if defined(PADDLE_WITH_CUDA)
-  const auto cuda_alloc =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          paddle::platform::CUDAPlace());
-  phi::GPUContext dev_ctx_gpu{phi::GPUPlace()};
-  dev_ctx_gpu.PartialInitWithoutAllocator();
-  dev_ctx_gpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(dev_ctx_gpu.GetPlace(), dev_ctx_gpu.stream())
-          .get());
-  dev_ctx_gpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  dev_ctx_gpu.SetPinnedAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CUDAPinnedPlace())
-          .get());
-  dev_ctx_gpu.PartialInitWithAllocator();
-  DenseTensor d_dense_indices(cuda_alloc.get(), dense_indices.meta());
-  DenseTensor d_dense_elements(cuda_alloc.get(), dense_elements.meta());
-  phi::Copy(
-      dev_ctx_gpu, dense_indices, phi::GPUPlace(), true, &d_dense_indices);
-  phi::Copy(
-      dev_ctx_gpu, dense_elements, phi::GPUPlace(), true, &d_dense_elements);
-  SparseCooTensor coo_cuda(d_dense_indices, d_dense_elements, dense_dims);
-  auto dense_out_cuda = sparse::CooToDense<T>(dev_ctx_gpu, coo_cuda);
-
-  DenseTensor h_dense_out(alloc.get(),
-                          DenseTensorMeta(dense_out_cuda.dtype(),
-                                          dense_out_cuda.dims(),
-                                          dense_out_cuda.layout()));
-  phi::Copy(
-      dev_ctx_gpu, dense_out_cuda, h_dense_out.place(), true, &h_dense_out);
-  int cmp_cuda = memcmp(
-      &dense_data[0], h_dense_out.data<T>(), sizeof(T) * dense_data.size());
-  ASSERT_EQ(cmp_cuda, 0);
-#endif
-}
-
-TEST(DEV_API, sparse_coo_to_dense) {
-  const int non_zero_num = 4;
-  const int sparse_dim = 2;
-  std::vector<float> dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0, 0.0};
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> indices_data = {0, 1, 1, 2, 1, 0, 2, 0};
-  DDim dense_dims = phi::make_ddim({3, 3});
-  TestSparseCooToDense(dense_dims,
-                       dense_data,
-                       non_zero_data,
-                       indices_data,
-                       non_zero_num,
-                       sparse_dim);
-}
-
-TEST(DEV_API, sparse_coo_to_dense_batch_and_fp16) {
-  std::vector<float> dense_data = {0.0,
-                                   1.0,
-                                   0.0,
-                                   0.0,
-                                   0.0,
-                                   0.0,
-                                   2.0,
-                                   0.0,
-                                   0.0,
-                                   0.0,
-                                   0.0,
-                                   0.0,
-                                   0.0,
-                                   3.0,
-                                   0.0,
-                                   4.0,
-                                   0.0,
-                                   0.0};
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 4.0};
-  std::vector<int64_t> indices_data = {0, 0, 1, 1, 0, 2, 1, 2, 1, 0, 1, 0};
-  const int non_zero_num = 4;
-  const int sparse_dim = 3;
-  DDim dense_dims = phi::make_ddim({2, 3, 3});
-  using float16 = phi::dtype::float16;
-  std::vector<float16> dense_data_fp16(dense_data.size()),
-      non_zero_data_fp16(non_zero_num);
-  for (uint64_t i = 0; i < dense_data.size(); i++) {
-    dense_data_fp16[i] = static_cast<float16>(dense_data[i]);
-  }
-  for (int64_t i = 0; i < non_zero_num; i++) {
-    non_zero_data_fp16[i] = static_cast<float16>(non_zero_data[i]);
-  }
-  TestSparseCooToDense(dense_dims,
-                       dense_data_fp16,
-                       non_zero_data_fp16,
-                       indices_data,
-                       non_zero_num,
-                       sparse_dim);
-}
-
-template <typename T>
-void TestSparseCsrToDense(const DDim& dense_dims,
-                          const std::vector<T>& dense_data,
-                          const std::vector<T>& non_zero_data,
-                          const std::vector<int64_t>& crows_data,
-                          const std::vector<int64_t>& cols_data,
-                          const int64_t non_zero_num) {
-  int batchs = 1;
-  int rows = dense_dims[0];
-  if (dense_dims.size() == 3) {
-    batchs = dense_dims[0];
-    rows = dense_dims[1];
-  }
-  phi::DenseTensorMeta crows_meta(
-      DataType::INT64, phi::make_ddim({batchs * (rows + 1)}), DataLayout::NCHW);
-  phi::DenseTensorMeta cols_meta(
-      DataType::INT64, phi::make_ddim({non_zero_num}), DataLayout::NCHW);
-  phi::DenseTensorMeta values_meta(
-      paddle::experimental::CppTypeToDataType<T>::Type(),
-      phi::make_ddim({non_zero_num}),
-      DataLayout::NCHW);
-  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-
-  phi::CPUPlace place;
-  phi::DenseTensor crows(alloc.get(), crows_meta);
-  phi::DenseTensor cols(alloc.get(), cols_meta);
-  phi::DenseTensor values(alloc.get(), values_meta);
-  memcpy(crows.mutable_data<int64_t>(place),
-         crows_data.data(),
-         crows_data.size() * sizeof(int64_t));
-  memcpy(cols.mutable_data<int64_t>(place),
-         cols_data.data(),
-         cols_data.size() * sizeof(int64_t));
-  memcpy(values.mutable_data<T>(place),
-         non_zero_data.data(),
-         non_zero_data.size() * sizeof(T));
-  phi::SparseCsrTensor csr(crows, cols, values, dense_dims);
-
-  // 1. test cpu
-  phi::CPUContext dev_ctx_cpu;
-  dev_ctx_cpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  DenseTensor cpu_sparse_out = sparse::CsrToDense<T>(dev_ctx_cpu, csr);
-  int cmp_cpu = memcmp(cpu_sparse_out.data<T>(),
-                       dense_data.data(),
-                       sizeof(T) * dense_data.size());
-  ASSERT_EQ(cmp_cpu, 0);
-
-// 2. test cuda
-#if defined(PADDLE_WITH_CUDA)
-  const auto cuda_alloc =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          paddle::platform::CUDAPlace());
-  phi::GPUContext dev_ctx_gpu{phi::GPUPlace()};
-  dev_ctx_gpu.PartialInitWithoutAllocator();
-  dev_ctx_gpu.SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(dev_ctx_gpu.GetPlace(), dev_ctx_gpu.stream())
-          .get());
-  dev_ctx_gpu.SetHostAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(phi::CPUPlace())
-          .get());
-  dev_ctx_gpu.SetPinnedAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CUDAPinnedPlace())
-          .get());
-  dev_ctx_gpu.PartialInitWithAllocator();
-  phi::DenseTensor d_crows(cuda_alloc.get(), crows_meta);
-  phi::DenseTensor d_cols(cuda_alloc.get(), cols_meta);
-  phi::DenseTensor d_values(cuda_alloc.get(), values_meta);
-  phi::Copy(dev_ctx_gpu, crows, phi::GPUPlace(), true, &d_crows);
-  phi::Copy(dev_ctx_gpu, cols, phi::GPUPlace(), true, &d_cols);
-  phi::Copy(dev_ctx_gpu, values, phi::GPUPlace(), true, &d_values);
-  phi::SparseCsrTensor d_csr(d_crows, d_cols, d_values, dense_dims);
-  auto cuda_sparse_out = sparse::CsrToDense<T>(dev_ctx_gpu, d_csr);
-  phi::DenseTensor h_out(alloc.get(), cpu_sparse_out.meta());
-  phi::Copy(dev_ctx_gpu, cuda_sparse_out, phi::CPUPlace(), true, &h_out);
-  int cmp_cuda =
-      memcmp(h_out.data<T>(), dense_data.data(), sizeof(T) * dense_data.size());
-  ASSERT_EQ(cmp_cuda, 0);
-#endif
-}
-
-TEST(DEV_API, sparse_csr_to_dense) {
-  DDim dense_dims = phi::make_ddim({3, 3});
-  std::vector<float> dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0, 0.0};
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> cols_data = {1, 0, 2, 0};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4};
-  const int64_t non_zero_num = 4;
-
-  TestSparseCsrToDense(dense_dims,
-                       dense_data,
-                       non_zero_data,
-                       crows_data,
-                       cols_data,
-                       non_zero_num);
-}
-
-TEST(DEV_API, sparse_csr_to_dense_batch_and_fp16) {
-  DDim dense_dims = phi::make_ddim({2, 3, 3});
-  std::vector<float> dense_data = {0.0,
-                                   1.0,
-                                   0.0,
-                                   2.0,
-                                   0.0,
-                                   3.0,
-                                   3.2,
-                                   0.0,
-                                   0.0,
-                                   0.0,
-                                   1.0,
-                                   0.0,
-                                   2.0,
-                                   0.0,
-                                   3.0,
-                                   3.2,
-                                   0.0,
-                                   0.0};
-  std::vector<float> non_zero_data = {1.0, 2.0, 3.0, 3.2, 1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> cols_data = {1, 0, 2, 0, 1, 0, 2, 0};
-  std::vector<int64_t> crows_data = {0, 1, 3, 4, 0, 1, 3, 4};
-  const int64_t non_zero_num = 8;
-
-  using float16 = phi::dtype::float16;
-  std::vector<float16> dense_data_fp16(dense_data.size()),
-      non_zero_data_fp16(non_zero_num);
-  for (uint64_t i = 0; i < dense_data.size(); i++) {
-    dense_data_fp16[i] = static_cast<float16>(dense_data[i]);
-  }
-  for (int64_t i = 0; i < non_zero_num; i++) {
-    non_zero_data_fp16[i] = static_cast<float16>(non_zero_data[i]);
-  }
-  TestSparseCsrToDense<float16>(dense_dims,
-                                dense_data_fp16,
-                                non_zero_data_fp16,
-                                crows_data,
-                                cols_data,
-                                non_zero_num);
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_split_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_split_dev_api.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/backends/cpu/cpu_context.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/split_kernel.h"
-
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, split) {
-  // 1. create tensor
-  const auto alloc =
-      std::make_unique<paddle::experimental::DefaultAllocator>(phi::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({4, 10}),
-                                                phi::DataLayout::NCHW));
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto* dense_x_data = dev_ctx.Alloc<float>(&dense_x);
-  for (size_t i = 0; i < 4; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-    }
-  }
-  // 2. test API
-  auto out = phi::Split<float>(dev_ctx, dense_x, {2, 2}, 0);
-
-  // 3. check result
-  ASSERT_EQ(out.size(), static_cast<size_t>(2));
-  ASSERT_EQ(out[0].dims().size(), 2);
-  ASSERT_EQ(out[0].dims()[0], 2);
-  ASSERT_EQ(out[0].dims()[1], 10);
-  ASSERT_EQ(out[0].meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out[0].meta().layout, phi::DataLayout::NCHW);
-
-  ASSERT_EQ(out[1].dims().size(), 2);
-  ASSERT_EQ(out[1].dims()[0], 2);
-  ASSERT_EQ(out[1].dims()[1], 10);
-  ASSERT_EQ(out[1].meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out[1].meta().layout, phi::DataLayout::NCHW);
-
-  auto out_data_0 = out[0].data<float>();
-  auto out_data_1 = out[1].data<float>();
-  for (size_t i = 0; i < 4; ++i) {
-    if (i < 20) {
-      ASSERT_NEAR(dense_x_data[i], out_data_0[i], 1e-6);
-    } else {
-      ASSERT_NEAR(dense_x_data[i], out_data_1[i - 20], 1e-6);
-    }
-  }
-}
-
-TEST(DEV_API, split_with_num) {
-  // 1. create tensor
-  const auto alloc =
-      std::make_unique<paddle::experimental::DefaultAllocator>(phi::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({4, 10}),
-                                                phi::DataLayout::NCHW));
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-  auto* dense_x_data = dev_ctx.Alloc<float>(&dense_x);
-  for (size_t i = 0; i < 4; ++i) {
-    for (size_t j = 0; j < 10; ++j) {
-      dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
-    }
-  }
-  // 2. test API
-  auto out = phi::SplitWithNum<float>(dev_ctx, dense_x, 2, 0);
-  // 3. check result
-  ASSERT_EQ(out.size(), static_cast<size_t>(2));
-  ASSERT_EQ(out[0].dims().size(), 2);
-  ASSERT_EQ(out[0].dims()[0], 2);
-  ASSERT_EQ(out[0].dims()[1], 10);
-  ASSERT_EQ(out[0].meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out[0].meta().layout, phi::DataLayout::NCHW);
-
-  ASSERT_EQ(out[1].dims().size(), 2);
-  ASSERT_EQ(out[1].dims()[0], 2);
-  ASSERT_EQ(out[1].dims()[1], 10);
-  ASSERT_EQ(out[1].meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out[1].meta().layout, phi::DataLayout::NCHW);
-
-  auto out_data_0 = out[0].data<float>();
-  auto out_data_1 = out[1].data<float>();
-  for (size_t i = 0; i < 4; ++i) {
-    if (i < 20) {
-      ASSERT_NEAR(dense_x_data[i], out_data_0[i], 1e-6);
-    } else {
-      ASSERT_NEAR(dense_x_data[i], out_data_1[i - 20], 1e-6);
-    }
-  }
-}
-
-}  // namespace tests
-}  // namespace phi
--- a/paddle/phi/tests/kernels/test_sum_dev_api.cc
+++ b/paddle/phi/tests/kernels/test_sum_dev_api.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/memory/allocation/allocator_facade.h"
-#include "paddle/phi/api/lib/utils/allocator.h"
-#include "paddle/phi/core/dense_tensor.h"
-#include "paddle/phi/core/kernel_registry.h"
-#include "paddle/phi/kernels/reduce_sum_kernel.h"
-namespace phi {
-namespace tests {
-
-namespace framework = paddle::framework;
-using DDim = phi::DDim;
-
-TEST(DEV_API, sum) {
-  // 1. create tensor
-  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
-      paddle::platform::CPUPlace());
-  phi::DenseTensor dense_x(alloc.get(),
-                           phi::DenseTensorMeta(phi::DataType::FLOAT32,
-                                                phi::make_ddim({3, 4}),
-                                                phi::DataLayout::NCHW));
-  auto* dense_x_data =
-      dense_x.mutable_data<float>(paddle::platform::CPUPlace());
-
-  float sum = 0.0;
-  for (size_t i = 0; i < 12; ++i) {
-    dense_x_data[i] = i * 1.0;
-    sum += i * 1.0;
-  }
-
-  std::vector<int64_t> axis = {0, 1};
-  phi::CPUContext dev_ctx;
-  dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
-                           .GetAllocator(paddle::platform::CPUPlace())
-                           .get());
-
-  // 2. test API
-  auto out = phi::Sum<float>(
-      dev_ctx, dense_x, phi::IntArray(axis), phi::DataType::FLOAT32, false);
-
-  // 3. check result
-  ASSERT_EQ(out.dims().size(), 1);
-  ASSERT_EQ(out.numel(), 1);
-  ASSERT_EQ(out.meta().dtype, phi::DataType::FLOAT32);
-  ASSERT_EQ(out.meta().layout, phi::DataLayout::NCHW);
-
-  auto expect_result = sum;
-  auto actual_result = out.data<float>()[0];
-  ASSERT_NEAR(expect_result, actual_result, 1e-6f);
-}
-
-}  // namespace tests
-}  // namespace phi