remove unuse cuSparse function (#43626)

4a08c781 · zhouweiwei2014 · GitHub · 03517d8a · 4a08c781 · 03517d8a
6 changed file
--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
@@ -97,17 +97,6 @@ cc_test(
  SRCS concat_test.cc
  DEPS concat_and_split)
-if(WITH_GPU AND (NOT WITH_ROCM))
-  #currenty not yet support ROCM
-  #the generic conversion APIs of dense and sparse are only supported after cuda11.2
-  if((NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.2))
-    cc_test(
-      cusparse_conversion_api_test
-      SRCS cusparse_conversion_api_test.cc
-      DEPS tensor)
-  endif()
-endif()
 if(WITH_TESTING AND TEST im2col_test)
  set_tests_properties(im2col_test PROPERTIES TIMEOUT 120)
 endif()
--- a/paddle/fluid/operators/math/cusparse_conversion_api_test.cc
+++ b/paddle/fluid/operators/math/cusparse_conversion_api_test.cc
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include <vector>
-#include "glog/logging.h"
-#include "gtest/gtest.h"
-#include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/operators/math/sparse.h"
-template <typename T>
-void TestNNZ(const std::vector<T>& dense_data, const int correct_nnz,
-             const int rows, const int cols) {
-  paddle::platform::CUDADeviceContext* context =
-      new paddle::platform::CUDADeviceContext(paddle::platform::CUDAPlace());
-  context->SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CUDAPlace(), context->stream())
-          .get());
-  context->PartialInitWithAllocator();
-  auto sparse =
-      paddle::operators::math::GetSparse<paddle::platform::CUDADeviceContext,
-                                         T>(*context);
-  paddle::framework::Tensor dense, nnz_tensor;
-  auto dense_dims = phi::make_ddim({rows, cols});
-  auto nnz_dims = phi::make_ddim({dense_dims[0] + 1});
-  dense.mutable_data<T>(dense_dims, paddle::platform::CUDAPlace());
-  paddle::framework::TensorFromVector<T>(dense_data, *context, &dense);
-  int32_t* nnz_ptr =
-      nnz_tensor.mutable_data<int32_t>(nnz_dims, paddle::platform::CUDAPlace());
-  sparse.nnz(rows, cols, dense.data<T>(), nnz_ptr, nnz_ptr + 1);
-  std::vector<int32_t> nnz_vec(dense_dims[0] + 1);
-  paddle::framework::TensorToVector<int32_t>(nnz_tensor, *context, &nnz_vec);
-  delete context;
-  CHECK_EQ(correct_nnz, nnz_vec[0]);
-}
-TEST(sparse, nnz) {
-  std::vector<float> dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0, 0.0};
-  TestNNZ<float>(dense_data, 4, 3, 3);
-}
-TEST(sparse, nnz_double) {
-  std::vector<double> dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0};
-  TestNNZ<double>(dense_data, 4, 4, 2);
-}
-template <typename T>
-void TestDenseToSparse(const std::vector<T>& correct_dense_data,
-                       const std::vector<int64_t>& correct_rows,
-                       const std::vector<int64_t>& correct_cols,
-                       const std::vector<T>& correct_values,
-                       const int correct_nnz, const int rows, const int cols,
-                       const std::string& mode) {
-  paddle::platform::CUDADeviceContext* context =
-      new paddle::platform::CUDADeviceContext(paddle::platform::CUDAPlace());
-  context->SetAllocator(
-      paddle::memory::allocation::AllocatorFacade::Instance()
-          .GetAllocator(paddle::platform::CUDAPlace(), context->stream())
-          .get());
-  context->PartialInitWithAllocator();
-  // get sparse
-  auto sparse =
-      paddle::operators::math::GetSparse<paddle::platform::CUDADeviceContext,
-                                         T>(*context);
-  // create tensor and copy vector to tensor
-  paddle::framework::Tensor dense_tensor, rows_tensor, cols_tensor,
-      values_tensor, actual_dense_tensor;
-  auto dense_dims = phi::make_ddim({rows, cols});
-  T* dense_data =
-      dense_tensor.mutable_data<T>(dense_dims, paddle::platform::CUDAPlace());
-  T* actual_dense_data = actual_dense_tensor.mutable_data<T>(
-      dense_dims, paddle::platform::CUDAPlace());
-  paddle::framework::TensorFromVector<T>(correct_dense_data, *context,
-                                         &dense_tensor);
-  auto nnz_dims = phi::make_ddim({correct_nnz});
-  auto crows_dims = phi::make_ddim({rows + 1});
-  int64_t* rows_data = nullptr;
-  if (mode == "COO") {
-    rows_data = rows_tensor.mutable_data<int64_t>(
-        nnz_dims, paddle::platform::CUDAPlace());
-  } else {
-    rows_data = rows_tensor.mutable_data<int64_t>(
-        crows_dims, paddle::platform::CUDAPlace());
-  }
-  int64_t* cols_data = cols_tensor.mutable_data<int64_t>(
-      nnz_dims, paddle::platform::CUDAPlace());
-  T* values_data =
-      values_tensor.mutable_data<T>(nnz_dims, paddle::platform::CUDAPlace());
-  // test dense_to_sparse
-  if (mode == "COO") {
-    sparse.DenseToSparseCoo(rows, cols, dense_data, rows_data, cols_data,
-                            values_data);
-  } else {
-    sparse.DenseToSparseCsr(rows, cols, dense_data, rows_data, cols_data,
-                            values_data);
-  }
-  std::vector<int64_t> actual_rows(correct_nnz), actual_crows(rows + 1),
-      actual_cols(correct_nnz);
-  std::vector<T> actual_values(correct_nnz), actual_dense_vec(rows * cols);
-  if (mode == "COO") {
-    paddle::framework::TensorToVector<int64_t>(rows_tensor, *context,
-                                               &actual_rows);
-  } else {
-    paddle::framework::TensorToVector<int64_t>(rows_tensor, *context,
-                                               &actual_crows);
-  }
-  paddle::framework::TensorToVector<int64_t>(cols_tensor, *context,
-                                             &actual_cols);
-  paddle::framework::TensorToVector<T>(values_tensor, *context, &actual_values);
-  for (int i = 0; i < correct_nnz; i++) {
-    if (mode == "COO") {
-      CHECK_EQ(correct_rows[i], actual_rows[i]);
-    }
-    CHECK_EQ(correct_cols[i], actual_cols[i]);
-    CHECK_EQ(correct_values[i], actual_values[i]);
-  }
-  if (mode == "CSR") {
-    for (int i = 0; i < rows + 1; i++) {
-      CHECK_EQ(correct_rows[i], actual_crows[i]);
-    }
-  }
-  // test sparse_to_dense
-  if (mode == "COO") {
-    sparse.SparseCooToDense(rows, cols, correct_nnz, rows_data, cols_data,
-                            values_data, actual_dense_data);
-  } else {
-    sparse.SparseCsrToDense(rows, cols, correct_nnz, rows_data, cols_data,
-                            values_data, actual_dense_data);
-  }
-  paddle::framework::TensorToVector<T>(actual_dense_tensor, *context,
-                                       &actual_dense_vec);
-  for (uint64_t i = 0; i < correct_dense_data.size(); i++) {
-    CHECK_EQ(correct_dense_data[i], actual_dense_vec[i]);
-  }
-  delete context;
-}
-TEST(sparse, dense_to_sparse) {
-  std::vector<float> dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0, 0.0};
-  std::vector<float> values = {1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> rows = {0, 1, 1, 2};
-  std::vector<int64_t> crows = {0, 1, 3, 4};
-  std::vector<int64_t> cols = {1, 0, 2, 0};
-  TestDenseToSparse<float>(dense_data, rows, cols, values, 4, 3, 3, "COO");
-  TestDenseToSparse<float>(dense_data, crows, cols, values, 4, 3, 3, "CSR");
-}
-TEST(sparse, dense_to_sparse_double) {
-  std::vector<double> dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0};
-  std::vector<double> values = {1.0, 2.0, 3.0, 3.2};
-  std::vector<int64_t> rows = {0, 1, 2, 3};
-  std::vector<int64_t> crows = {0, 1, 2, 3, 4};
-  std::vector<int64_t> cols = {1, 1, 1, 0};
-  TestDenseToSparse<double>(dense_data, rows, cols, values, 4, 4, 2, "COO");
-  TestDenseToSparse<double>(dense_data, crows, cols, values, 4, 4, 2, "CSR");
-}
-TEST(sparse, dense_to_sparse_fp16) {
-  using float16 = paddle::platform::float16;
-  std::vector<float16> dense_data = {float16(0.0), float16(1.0), float16(0.0),
-                                     float16(2.0), float16(0.0), float16(3.0),
-                                     float16(3.2), float16(0.0)};
-  std::vector<float16> values = {float16(1.0), float16(2.0), float16(3.0),
-                                 float16(3.2)};
-  std::vector<int64_t> rows = {0, 1, 2, 3};
-  std::vector<int64_t> crows = {0, 1, 2, 3, 4};
-  std::vector<int64_t> cols = {1, 1, 1, 0};
-  TestDenseToSparse<float16>(dense_data, rows, cols, values, 4, 4, 2, "COO");
-  TestDenseToSparse<float16>(dense_data, crows, cols, values, 4, 4, 2, "CSR");
-}
--- a/paddle/fluid/operators/math/sparse.h
+++ b/paddle/fluid/operators/math/sparse.h
-//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/framework/tensor.h"
-namespace paddle {
-namespace framework {
-class ExecutionContext;
-}  // namespace framework
-}  // namespace paddle
-namespace paddle {
-namespace operators {
-namespace math {
-template <typename DeviceContext>
-class Sparse {
- public:
-  explicit Sparse(const DeviceContext& context) : context_(context) {}
-  template <typename T>
-  void nnz(const int M, const int N, const T* dense, int* nnz,
-           int* nnzPerRowColumn) const;
-  template <typename T>
-  void DenseToSparseCoo(const int M, const int N, const T* dense, int64_t* rows,
-                        int64_t* cols, T* values) const;
-  template <typename T>
-  void DenseToSparseCsr(const int M, const int N, const T* dense,
-                        int64_t* crows, int64_t* cols, T* values) const;
-  template <typename T>
-  void SparseCooToDense(const int64_t M, const int64_t N, const int64_t nnz,
-                        const int64_t* rows, const int64_t* cols,
-                        const T* values, T* dense) const;
-  template <typename T>
-  void SparseCsrToDense(const int64_t M, const int64_t N, const int64_t nnz,
-                        const int64_t* crows, const int64_t* cols,
-                        const T* values, T* dense) const;
- private:
-  const DeviceContext& context_;
-};
-template <typename DeviceContext, typename T>
-class SparseT : private Sparse<DeviceContext> {
- public:
-  using Sparse<DeviceContext>::Sparse;
-  template <typename... ARGS>
-  void nnz(ARGS... args) const {
-    Base()->template nnz<T>(args...);
-  }
-  template <typename... ARGS>
-  void DenseToSparseCoo(ARGS... args) const {
-    Base()->template DenseToSparseCoo<T>(args...);
-  }
-  template <typename... ARGS>
-  void DenseToSparseCsr(ARGS... args) const {
-    Base()->template DenseToSparseCsr<T>(args...);
-  }
-  template <typename... ARGS>
-  void SparseCooToDense(ARGS... args) const {
-    Base()->template SparseCooToDense<T>(args...);
-  }
-  template <typename... ARGS>
-  void SparseCsrToDense(ARGS... args) const {
-    Base()->template SparseCsrToDense<T>(args...);
-  }
- private:
-  const Sparse<DeviceContext>* Base() const {
-    return static_cast<const Sparse<DeviceContext>*>(this);
-  }
-};
-template <typename DeviceContext, typename T>
-inline SparseT<DeviceContext, T> GetSparse(
-    const framework::ExecutionContext& exe_ctx) {
-  return SparseT<DeviceContext, T>(
-      exe_ctx.template device_context<DeviceContext>());
-}
-template <typename DeviceContext, typename T>
-inline SparseT<DeviceContext, T> GetSparse(const DeviceContext& dev_ctx) {
-  return SparseT<DeviceContext, T>(dev_ctx);
-}
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
-#if defined(PADDLE_WITH_CUDA)
-#if CUDA_VERSION >= 11020
-#include "paddle/fluid/operators/math/sparse_impl.cu.h"
-#endif
-#endif
--- a/paddle/fluid/operators/math/sparse_impl.cu.h
+++ b/paddle/fluid/operators/math/sparse_impl.cu.h
-//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include "paddle/fluid/platform/device/gpu/gpu_info.h"
-#include "paddle/fluid/platform/dynload/cusparse.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
-namespace paddle {
-namespace operators {
-namespace math {
-template <typename T>
-cudaDataType_t GetGpuDataType() {
-  if (std::is_same<T, float>::value) {
-    return CUDA_R_32F;
-  } else if (std::is_same<T, double>::value) {
-    return CUDA_R_64F;
-  } else if (std::is_same<T, platform::float16>::value) {
-    return CUDA_R_16F;
-  }
-}
-template <>
-template <typename T>
-void Sparse<platform::CUDADeviceContext>::nnz(const int M, const int N,
-                                              const T* dense, int* nnz,
-                                              int* nnzPerRowColumn) const {}
-template <>
-template <>
-void Sparse<platform::CUDADeviceContext>::nnz(const int M, const int N,
-                                              const float* dense, int* nnz,
-                                              int* nnzPerRowColumn) const {
-  cusparseMatDescr_t descr = 0;
-  PADDLE_ENFORCE_GPU_SUCCESS(
-      paddle::platform::dynload::cusparseCreateMatDescr(&descr));
-  PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseSetMatType(
-      descr, CUSPARSE_MATRIX_TYPE_GENERAL));
-  PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseSetMatIndexBase(
-      descr, CUSPARSE_INDEX_BASE_ZERO));
-  context_.CusparseCall([&](cusparseHandle_t handle) {
-    PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseSnnz(
-        handle, CUSPARSE_DIRECTION_ROW, M, N, descr, dense, M, nnzPerRowColumn,
-        nnz));
-  });
-}
-template <>
-template <>
-void Sparse<platform::CUDADeviceContext>::nnz(const int M, const int N,
-                                              const double* dense, int* nnz,
-                                              int* nnzPerRowColumn) const {
-  cusparseMatDescr_t descr = 0;
-  PADDLE_ENFORCE_GPU_SUCCESS(
-      paddle::platform::dynload::cusparseCreateMatDescr(&descr));
-  PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseSetMatType(
-      descr, CUSPARSE_MATRIX_TYPE_GENERAL));
-  PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseSetMatIndexBase(
-      descr, CUSPARSE_INDEX_BASE_ZERO));
-  context_.CusparseCall([&](cusparseHandle_t handle) {
-    PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseDnnz(
-        handle, CUSPARSE_DIRECTION_ROW, M, N, descr, dense, M, nnzPerRowColumn,
-        nnz));
-  });
-}
-template <typename T>
-inline void DenseToSparse(const platform::CUDADeviceContext& context,
-                          const int M, const int N, const T* dense,
-                          int64_t* rows, int64_t* cols, T* values,
-                          const cusparseFormat_t format) {
-  cusparseSpMatDescr_t matB;
-  cusparseDnMatDescr_t matA;
-  cudaDataType_t dtype = GetGpuDataType<T>();
-  PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateDnMat(
-      &matA, M, N, N, const_cast<void*>(reinterpret_cast<const void*>(dense)),
-      dtype, CUSPARSE_ORDER_ROW));
-  if (format == CUSPARSE_FORMAT_COO) {
-    PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateCoo(
-        &matB, M, N, 0, nullptr, nullptr, nullptr, CUSPARSE_INDEX_64I,
-        CUSPARSE_INDEX_BASE_ZERO, dtype));
-  } else if (format == CUSPARSE_FORMAT_CSR) {
-    PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateCsr(
-        &matB, M, N, 0, rows, nullptr, nullptr, CUSPARSE_INDEX_64I,
-        CUSPARSE_INDEX_64I, CUSPARSE_INDEX_BASE_ZERO, dtype));
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "the sparse format [%s] is not supported", format));
-  }
-  size_t buffer_size = 0;
-  context.CusparseCall([&](cusparseHandle_t handle) {
-    PADDLE_ENFORCE_GPU_SUCCESS(
-        platform::dynload::cusparseDenseToSparse_bufferSize(
-            handle, matA, matB, CUSPARSE_DENSETOSPARSE_ALG_DEFAULT,
-            &buffer_size));
-  });
-  framework::Tensor buffer;
-  float* buffer_data = buffer.mutable_data<float>(
-      {static_cast<int64_t>(buffer_size)}, context.GetPlace());
-  context.CusparseCall([&](cusparseHandle_t handle) {
-    PADDLE_ENFORCE_GPU_SUCCESS(
-        platform::dynload::cusparseDenseToSparse_analysis(
-            handle, matA, matB, CUSPARSE_DENSETOSPARSE_ALG_DEFAULT,
-            buffer_data));
-  });
-  if (format == CUSPARSE_FORMAT_COO) {
-    PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCooSetPointers(
-        matB, rows, cols, reinterpret_cast<void*>(values)));
-  } else if (format == CUSPARSE_FORMAT_CSR) {
-    PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCsrSetPointers(
-        matB, rows, cols, reinterpret_cast<void*>(values)));
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "the sparse format [%s] is not supported", format));
-  }
-  context.CusparseCall([&](cusparseHandle_t handle) {
-    PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseDenseToSparse_convert(
-        handle, matA, matB, CUSPARSE_DENSETOSPARSE_ALG_DEFAULT, buffer_data));
-  });
-}
-template <>
-template <typename T>
-void Sparse<platform::CUDADeviceContext>::DenseToSparseCoo(
-    const int M, const int N, const T* dense, int64_t* rows, int64_t* cols,
-    T* values) const {
-  DenseToSparse<T>(context_, M, N, dense, rows, cols, values,
-                   CUSPARSE_FORMAT_COO);
-}
-template <>
-template <typename T>
-void Sparse<platform::CUDADeviceContext>::DenseToSparseCsr(
-    const int M, const int N, const T* dense, int64_t* crows, int64_t* cols,
-    T* values) const {
-  DenseToSparse<T>(context_, M, N, dense, crows, cols, values,
-                   CUSPARSE_FORMAT_CSR);
-}
-template <typename T>
-void SparseToDense(const platform::CUDADeviceContext& context, const int64_t M,
-                   const int64_t N, const int64_t nnz, const int64_t* rows,
-                   const int64_t* cols, const T* values, T* dense,
-                   const cusparseFormat_t format) {
-  cusparseSpMatDescr_t matA;
-  cusparseDnMatDescr_t matB;
-  cudaDataType_t dtype = GetGpuDataType<T>();
-  if (format == CUSPARSE_FORMAT_COO) {
-    PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateCoo(
-        &matA, M, N, nnz,
-        const_cast<void*>(reinterpret_cast<const void*>(rows)),
-        const_cast<void*>(reinterpret_cast<const void*>(cols)),
-        const_cast<void*>(reinterpret_cast<const void*>(values)),
-        CUSPARSE_INDEX_64I, CUSPARSE_INDEX_BASE_ZERO, dtype));
-  } else if (format == CUSPARSE_FORMAT_CSR) {
-    PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateCsr(
-        &matA, M, N, nnz,
-        const_cast<void*>(reinterpret_cast<const void*>(rows)),
-        const_cast<void*>(reinterpret_cast<const void*>(cols)),
-        const_cast<void*>(reinterpret_cast<const void*>(values)),
-        CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I, CUSPARSE_INDEX_BASE_ZERO,
-        dtype));
-  } else {
-    PADDLE_THROW(platform::errors::InvalidArgument(
-        "the sparse format [%s] is not supported", format));
-  }
-  PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateDnMat(
-      &matB, M, N, N, reinterpret_cast<void*>(dense), dtype,
-      CUSPARSE_ORDER_ROW));
-  size_t buffer_size = 0;
-  context.CusparseCall([&](cusparseHandle_t handle) {
-    PADDLE_ENFORCE_GPU_SUCCESS(
-        platform::dynload::cusparseSparseToDense_bufferSize(
-            handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT,
-            &buffer_size));
-  });
-  framework::Tensor buffer;
-  float* buffer_data = buffer.mutable_data<float>(
-      {static_cast<int64_t>(buffer_size)}, context.GetPlace());
-  context.CusparseCall([&](cusparseHandle_t handle) {
-    PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseSparseToDense(
-        handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_data));
-  });
-}
-template <>
-template <typename T>
-void Sparse<platform::CUDADeviceContext>::SparseCooToDense(
-    const int64_t M, const int64_t N, const int64_t nnz, const int64_t* rows,
-    const int64_t* cols, const T* values, T* dense) const {
-  SparseToDense<T>(context_, M, N, nnz, rows, cols, values, dense,
-                   CUSPARSE_FORMAT_COO);
-}
-template <>
-template <typename T>
-void Sparse<platform::CUDADeviceContext>::SparseCsrToDense(
-    const int64_t M, const int64_t N, const int64_t nnz, const int64_t* crows,
-    const int64_t* cols, const T* values, T* dense) const {
-  SparseToDense<T>(context_, M, N, nnz, crows, cols, values, dense,
-                   CUSPARSE_FORMAT_CSR);
-}
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/fluid/platform/dynload/cusparse.h
+++ b/paddle/fluid/platform/dynload/cusparse.h
@@ -31,30 +31,23 @@ namespace dynload {
 #if defined(PADDLE_WITH_CUDA)
 // APIs available after CUDA 11.0
 #if CUDA_VERSION >= 11000
-#define CUSPARSE_ROUTINE_EACH(__macro)       \
+#define CUSPARSE_ROUTINE_EACH(__macro)   \
-  __macro(cusparseCreate);                   \
+  __macro(cusparseCreate);               \
-  __macro(cusparseSetStream);                \
+  __macro(cusparseSetStream);            \
-  __macro(cusparseCreateMatDescr);           \
+  __macro(cusparseCreateMatDescr);       \
-  __macro(cusparseDestroy);                  \
+  __macro(cusparseDestroy);              \
-  __macro(cusparseSnnz);                     \
+  __macro(cusparseSnnz);                 \
-  __macro(cusparseDnnz);                     \
+  __macro(cusparseDnnz);                 \
-  __macro(cusparseSetMatType);               \
+  __macro(cusparseSetMatType);           \
-  __macro(cusparseSetMatIndexBase);          \
+  __macro(cusparseSetMatIndexBase);      \
-  __macro(cusparseCreateCsr);                \
+  __macro(cusparseCreateCsr);            \
-  __macro(cusparseCreateCoo);                \
+  __macro(cusparseCreateCoo);            \
-  __macro(cusparseCreateDnMat);              \
+  __macro(cusparseCreateDnMat);          \
-  __macro(cusparseSpMM_bufferSize);          \
+  __macro(cusparseSpMM_bufferSize);      \
-  __macro(cusparseSpMM);                     \
+  __macro(cusparseSpMM);                 \
-  __macro(cusparseDestroySpMat);             \
+  __macro(cusparseDestroySpMat);         \
-  __macro(cusparseDestroyDnMat);             \
+  __macro(cusparseDestroyDnMat);         \
-  __macro(cusparseCooSetPointers);           \
+  __macro(cusparseDnMatSetStridedBatch); \
-  __macro(cusparseCsrSetPointers);           \
-  __macro(cusparseDenseToSparse_bufferSize); \
-  __macro(cusparseDenseToSparse_analysis);   \
-  __macro(cusparseDenseToSparse_convert);    \
-  __macro(cusparseSparseToDense_bufferSize); \
-  __macro(cusparseSparseToDense);            \
-  __macro(cusparseDnMatSetStridedBatch);     \
  __macro(cusparseCsrSetStridedBatch);
 CUSPARSE_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)

--- a/paddle/phi/backends/dynload/cusparse.h
+++ b/paddle/phi/backends/dynload/cusparse.h
@@ -43,30 +43,23 @@ extern void *cusparse_dso_handle;
 #if defined(PADDLE_WITH_CUDA)
 // APIs available after CUDA 11.0
 #if CUDA_VERSION >= 11000
-#define CUSPARSE_ROUTINE_EACH(__macro)       \
+#define CUSPARSE_ROUTINE_EACH(__macro)   \
-  __macro(cusparseCreate);                   \
+  __macro(cusparseCreate);               \
-  __macro(cusparseSetStream);                \
+  __macro(cusparseSetStream);            \
-  __macro(cusparseCreateMatDescr);           \
+  __macro(cusparseCreateMatDescr);       \
-  __macro(cusparseDestroy);                  \
+  __macro(cusparseDestroy);              \
-  __macro(cusparseSnnz);                     \
+  __macro(cusparseSnnz);                 \
-  __macro(cusparseDnnz);                     \
+  __macro(cusparseDnnz);                 \
-  __macro(cusparseSetMatType);               \
+  __macro(cusparseSetMatType);           \
-  __macro(cusparseSetMatIndexBase);          \
+  __macro(cusparseSetMatIndexBase);      \
-  __macro(cusparseCreateCsr);                \
+  __macro(cusparseCreateCsr);            \
-  __macro(cusparseCreateCoo);                \
+  __macro(cusparseCreateCoo);            \
-  __macro(cusparseCreateDnMat);              \
+  __macro(cusparseCreateDnMat);          \
-  __macro(cusparseSpMM_bufferSize);          \
+  __macro(cusparseSpMM_bufferSize);      \
-  __macro(cusparseSpMM);                     \
+  __macro(cusparseSpMM);                 \
-  __macro(cusparseDestroySpMat);             \
+  __macro(cusparseDestroySpMat);         \
-  __macro(cusparseDestroyDnMat);             \
+  __macro(cusparseDestroyDnMat);         \
-  __macro(cusparseCooSetPointers);           \
+  __macro(cusparseDnMatSetStridedBatch); \
-  __macro(cusparseCsrSetPointers);           \
-  __macro(cusparseDenseToSparse_bufferSize); \
-  __macro(cusparseDenseToSparse_analysis);   \
-  __macro(cusparseDenseToSparse_convert);    \
-  __macro(cusparseSparseToDense_bufferSize); \
-  __macro(cusparseSparseToDense);            \
-  __macro(cusparseDnMatSetStridedBatch);     \
  __macro(cusparseCsrSetStridedBatch);
 CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)