diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index ac538cfbd5c680826bc5a78890ea2c25e8b78836..3f7206ac08bf2d3d2a7b82178b11c24f0598d8bb 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -97,17 +97,6 @@ cc_test( SRCS concat_test.cc DEPS concat_and_split) -if(WITH_GPU AND (NOT WITH_ROCM)) - #currenty not yet support ROCM - #the generic conversion APIs of dense and sparse are only supported after cuda11.2 - if((NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.2)) - cc_test( - cusparse_conversion_api_test - SRCS cusparse_conversion_api_test.cc - DEPS tensor) - endif() -endif() - if(WITH_TESTING AND TEST im2col_test) set_tests_properties(im2col_test PROPERTIES TIMEOUT 120) endif() diff --git a/paddle/fluid/operators/math/cusparse_conversion_api_test.cc b/paddle/fluid/operators/math/cusparse_conversion_api_test.cc deleted file mode 100644 index f5fa3dc23d5a215cee46d7d7a3c529d0c15ddf01..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/math/cusparse_conversion_api_test.cc +++ /dev/null @@ -1,190 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "glog/logging.h" -#include "gtest/gtest.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/math/sparse.h" - -template -void TestNNZ(const std::vector& dense_data, const int correct_nnz, - const int rows, const int cols) { - paddle::platform::CUDADeviceContext* context = - new paddle::platform::CUDADeviceContext(paddle::platform::CUDAPlace()); - context->SetAllocator( - paddle::memory::allocation::AllocatorFacade::Instance() - .GetAllocator(paddle::platform::CUDAPlace(), context->stream()) - .get()); - context->PartialInitWithAllocator(); - auto sparse = - paddle::operators::math::GetSparse(*context); - - paddle::framework::Tensor dense, nnz_tensor; - auto dense_dims = phi::make_ddim({rows, cols}); - auto nnz_dims = phi::make_ddim({dense_dims[0] + 1}); - dense.mutable_data(dense_dims, paddle::platform::CUDAPlace()); - paddle::framework::TensorFromVector(dense_data, *context, &dense); - int32_t* nnz_ptr = - nnz_tensor.mutable_data(nnz_dims, paddle::platform::CUDAPlace()); - sparse.nnz(rows, cols, dense.data(), nnz_ptr, nnz_ptr + 1); - std::vector nnz_vec(dense_dims[0] + 1); - paddle::framework::TensorToVector(nnz_tensor, *context, &nnz_vec); - delete context; - CHECK_EQ(correct_nnz, nnz_vec[0]); -} - -TEST(sparse, nnz) { - std::vector dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0, 0.0}; - TestNNZ(dense_data, 4, 3, 3); -} - -TEST(sparse, nnz_double) { - std::vector dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0}; - TestNNZ(dense_data, 4, 4, 2); -} - -template -void TestDenseToSparse(const std::vector& correct_dense_data, - const std::vector& correct_rows, - const std::vector& correct_cols, - const std::vector& correct_values, - const int correct_nnz, const int rows, const int cols, - const std::string& mode) { - paddle::platform::CUDADeviceContext* context = - new paddle::platform::CUDADeviceContext(paddle::platform::CUDAPlace()); - context->SetAllocator( - paddle::memory::allocation::AllocatorFacade::Instance() - .GetAllocator(paddle::platform::CUDAPlace(), context->stream()) - .get()); - context->PartialInitWithAllocator(); - // get sparse - auto sparse = - paddle::operators::math::GetSparse(*context); - - // create tensor and copy vector to tensor - paddle::framework::Tensor dense_tensor, rows_tensor, cols_tensor, - values_tensor, actual_dense_tensor; - auto dense_dims = phi::make_ddim({rows, cols}); - T* dense_data = - dense_tensor.mutable_data(dense_dims, paddle::platform::CUDAPlace()); - T* actual_dense_data = actual_dense_tensor.mutable_data( - dense_dims, paddle::platform::CUDAPlace()); - paddle::framework::TensorFromVector(correct_dense_data, *context, - &dense_tensor); - - auto nnz_dims = phi::make_ddim({correct_nnz}); - auto crows_dims = phi::make_ddim({rows + 1}); - int64_t* rows_data = nullptr; - if (mode == "COO") { - rows_data = rows_tensor.mutable_data( - nnz_dims, paddle::platform::CUDAPlace()); - } else { - rows_data = rows_tensor.mutable_data( - crows_dims, paddle::platform::CUDAPlace()); - } - int64_t* cols_data = cols_tensor.mutable_data( - nnz_dims, paddle::platform::CUDAPlace()); - T* values_data = - values_tensor.mutable_data(nnz_dims, paddle::platform::CUDAPlace()); - - // test dense_to_sparse - if (mode == "COO") { - sparse.DenseToSparseCoo(rows, cols, dense_data, rows_data, cols_data, - values_data); - } else { - sparse.DenseToSparseCsr(rows, cols, dense_data, rows_data, cols_data, - values_data); - } - - std::vector actual_rows(correct_nnz), actual_crows(rows + 1), - actual_cols(correct_nnz); - std::vector actual_values(correct_nnz), actual_dense_vec(rows * cols); - if (mode == "COO") { - paddle::framework::TensorToVector(rows_tensor, *context, - &actual_rows); - } else { - paddle::framework::TensorToVector(rows_tensor, *context, - &actual_crows); - } - paddle::framework::TensorToVector(cols_tensor, *context, - &actual_cols); - paddle::framework::TensorToVector(values_tensor, *context, &actual_values); - - for (int i = 0; i < correct_nnz; i++) { - if (mode == "COO") { - CHECK_EQ(correct_rows[i], actual_rows[i]); - } - CHECK_EQ(correct_cols[i], actual_cols[i]); - CHECK_EQ(correct_values[i], actual_values[i]); - } - if (mode == "CSR") { - for (int i = 0; i < rows + 1; i++) { - CHECK_EQ(correct_rows[i], actual_crows[i]); - } - } - - // test sparse_to_dense - if (mode == "COO") { - sparse.SparseCooToDense(rows, cols, correct_nnz, rows_data, cols_data, - values_data, actual_dense_data); - } else { - sparse.SparseCsrToDense(rows, cols, correct_nnz, rows_data, cols_data, - values_data, actual_dense_data); - } - paddle::framework::TensorToVector(actual_dense_tensor, *context, - &actual_dense_vec); - for (uint64_t i = 0; i < correct_dense_data.size(); i++) { - CHECK_EQ(correct_dense_data[i], actual_dense_vec[i]); - } - - delete context; -} - -TEST(sparse, dense_to_sparse) { - std::vector dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0, 0.0}; - std::vector values = {1.0, 2.0, 3.0, 3.2}; - std::vector rows = {0, 1, 1, 2}; - std::vector crows = {0, 1, 3, 4}; - std::vector cols = {1, 0, 2, 0}; - TestDenseToSparse(dense_data, rows, cols, values, 4, 3, 3, "COO"); - TestDenseToSparse(dense_data, crows, cols, values, 4, 3, 3, "CSR"); -} - -TEST(sparse, dense_to_sparse_double) { - std::vector dense_data = {0.0, 1.0, 0.0, 2.0, 0.0, 3.0, 3.2, 0.0}; - std::vector values = {1.0, 2.0, 3.0, 3.2}; - std::vector rows = {0, 1, 2, 3}; - std::vector crows = {0, 1, 2, 3, 4}; - std::vector cols = {1, 1, 1, 0}; - TestDenseToSparse(dense_data, rows, cols, values, 4, 4, 2, "COO"); - TestDenseToSparse(dense_data, crows, cols, values, 4, 4, 2, "CSR"); -} - -TEST(sparse, dense_to_sparse_fp16) { - using float16 = paddle::platform::float16; - std::vector dense_data = {float16(0.0), float16(1.0), float16(0.0), - float16(2.0), float16(0.0), float16(3.0), - float16(3.2), float16(0.0)}; - std::vector values = {float16(1.0), float16(2.0), float16(3.0), - float16(3.2)}; - std::vector rows = {0, 1, 2, 3}; - std::vector crows = {0, 1, 2, 3, 4}; - std::vector cols = {1, 1, 1, 0}; - TestDenseToSparse(dense_data, rows, cols, values, 4, 4, 2, "COO"); - TestDenseToSparse(dense_data, crows, cols, values, 4, 4, 2, "CSR"); -} diff --git a/paddle/fluid/operators/math/sparse.h b/paddle/fluid/operators/math/sparse.h deleted file mode 100644 index 7a5880bbfe7da1c7a0de1dde90146044a86649b1..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/math/sparse.h +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/tensor.h" - -namespace paddle { -namespace framework { -class ExecutionContext; -} // namespace framework -} // namespace paddle - -namespace paddle { -namespace operators { -namespace math { - -template -class Sparse { - public: - explicit Sparse(const DeviceContext& context) : context_(context) {} - - template - void nnz(const int M, const int N, const T* dense, int* nnz, - int* nnzPerRowColumn) const; - - template - void DenseToSparseCoo(const int M, const int N, const T* dense, int64_t* rows, - int64_t* cols, T* values) const; - - template - void DenseToSparseCsr(const int M, const int N, const T* dense, - int64_t* crows, int64_t* cols, T* values) const; - - template - void SparseCooToDense(const int64_t M, const int64_t N, const int64_t nnz, - const int64_t* rows, const int64_t* cols, - const T* values, T* dense) const; - template - void SparseCsrToDense(const int64_t M, const int64_t N, const int64_t nnz, - const int64_t* crows, const int64_t* cols, - const T* values, T* dense) const; - - private: - const DeviceContext& context_; -}; - -template -class SparseT : private Sparse { - public: - using Sparse::Sparse; - - template - void nnz(ARGS... args) const { - Base()->template nnz(args...); - } - - template - void DenseToSparseCoo(ARGS... args) const { - Base()->template DenseToSparseCoo(args...); - } - template - void DenseToSparseCsr(ARGS... args) const { - Base()->template DenseToSparseCsr(args...); - } - template - void SparseCooToDense(ARGS... args) const { - Base()->template SparseCooToDense(args...); - } - template - void SparseCsrToDense(ARGS... args) const { - Base()->template SparseCsrToDense(args...); - } - - private: - const Sparse* Base() const { - return static_cast*>(this); - } -}; - -template -inline SparseT GetSparse( - const framework::ExecutionContext& exe_ctx) { - return SparseT( - exe_ctx.template device_context()); -} - -template -inline SparseT GetSparse(const DeviceContext& dev_ctx) { - return SparseT(dev_ctx); -} - -} // namespace math -} // namespace operators -} // namespace paddle - -#if defined(PADDLE_WITH_CUDA) -#if CUDA_VERSION >= 11020 -#include "paddle/fluid/operators/math/sparse_impl.cu.h" -#endif -#endif diff --git a/paddle/fluid/operators/math/sparse_impl.cu.h b/paddle/fluid/operators/math/sparse_impl.cu.h deleted file mode 100644 index 03f94ed573604905869f6587a836ad5402cf3883..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/math/sparse_impl.cu.h +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/dynload/cusparse.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { -namespace math { - -template -cudaDataType_t GetGpuDataType() { - if (std::is_same::value) { - return CUDA_R_32F; - } else if (std::is_same::value) { - return CUDA_R_64F; - } else if (std::is_same::value) { - return CUDA_R_16F; - } -} - -template <> -template -void Sparse::nnz(const int M, const int N, - const T* dense, int* nnz, - int* nnzPerRowColumn) const {} - -template <> -template <> -void Sparse::nnz(const int M, const int N, - const float* dense, int* nnz, - int* nnzPerRowColumn) const { - cusparseMatDescr_t descr = 0; - PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cusparseCreateMatDescr(&descr)); - PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseSetMatType( - descr, CUSPARSE_MATRIX_TYPE_GENERAL)); - PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseSetMatIndexBase( - descr, CUSPARSE_INDEX_BASE_ZERO)); - - context_.CusparseCall([&](cusparseHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseSnnz( - handle, CUSPARSE_DIRECTION_ROW, M, N, descr, dense, M, nnzPerRowColumn, - nnz)); - }); -} - -template <> -template <> -void Sparse::nnz(const int M, const int N, - const double* dense, int* nnz, - int* nnzPerRowColumn) const { - cusparseMatDescr_t descr = 0; - PADDLE_ENFORCE_GPU_SUCCESS( - paddle::platform::dynload::cusparseCreateMatDescr(&descr)); - PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseSetMatType( - descr, CUSPARSE_MATRIX_TYPE_GENERAL)); - PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseSetMatIndexBase( - descr, CUSPARSE_INDEX_BASE_ZERO)); - - context_.CusparseCall([&](cusparseHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS(paddle::platform::dynload::cusparseDnnz( - handle, CUSPARSE_DIRECTION_ROW, M, N, descr, dense, M, nnzPerRowColumn, - nnz)); - }); -} - -template -inline void DenseToSparse(const platform::CUDADeviceContext& context, - const int M, const int N, const T* dense, - int64_t* rows, int64_t* cols, T* values, - const cusparseFormat_t format) { - cusparseSpMatDescr_t matB; - cusparseDnMatDescr_t matA; - - cudaDataType_t dtype = GetGpuDataType(); - - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateDnMat( - &matA, M, N, N, const_cast(reinterpret_cast(dense)), - dtype, CUSPARSE_ORDER_ROW)); - - if (format == CUSPARSE_FORMAT_COO) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateCoo( - &matB, M, N, 0, nullptr, nullptr, nullptr, CUSPARSE_INDEX_64I, - CUSPARSE_INDEX_BASE_ZERO, dtype)); - } else if (format == CUSPARSE_FORMAT_CSR) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateCsr( - &matB, M, N, 0, rows, nullptr, nullptr, CUSPARSE_INDEX_64I, - CUSPARSE_INDEX_64I, CUSPARSE_INDEX_BASE_ZERO, dtype)); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "the sparse format [%s] is not supported", format)); - } - - size_t buffer_size = 0; - context.CusparseCall([&](cusparseHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cusparseDenseToSparse_bufferSize( - handle, matA, matB, CUSPARSE_DENSETOSPARSE_ALG_DEFAULT, - &buffer_size)); - }); - framework::Tensor buffer; - float* buffer_data = buffer.mutable_data( - {static_cast(buffer_size)}, context.GetPlace()); - - context.CusparseCall([&](cusparseHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cusparseDenseToSparse_analysis( - handle, matA, matB, CUSPARSE_DENSETOSPARSE_ALG_DEFAULT, - buffer_data)); - }); - - if (format == CUSPARSE_FORMAT_COO) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCooSetPointers( - matB, rows, cols, reinterpret_cast(values))); - } else if (format == CUSPARSE_FORMAT_CSR) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCsrSetPointers( - matB, rows, cols, reinterpret_cast(values))); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "the sparse format [%s] is not supported", format)); - } - context.CusparseCall([&](cusparseHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseDenseToSparse_convert( - handle, matA, matB, CUSPARSE_DENSETOSPARSE_ALG_DEFAULT, buffer_data)); - }); -} -template <> -template -void Sparse::DenseToSparseCoo( - const int M, const int N, const T* dense, int64_t* rows, int64_t* cols, - T* values) const { - DenseToSparse(context_, M, N, dense, rows, cols, values, - CUSPARSE_FORMAT_COO); -} - -template <> -template -void Sparse::DenseToSparseCsr( - const int M, const int N, const T* dense, int64_t* crows, int64_t* cols, - T* values) const { - DenseToSparse(context_, M, N, dense, crows, cols, values, - CUSPARSE_FORMAT_CSR); -} - -template -void SparseToDense(const platform::CUDADeviceContext& context, const int64_t M, - const int64_t N, const int64_t nnz, const int64_t* rows, - const int64_t* cols, const T* values, T* dense, - const cusparseFormat_t format) { - cusparseSpMatDescr_t matA; - cusparseDnMatDescr_t matB; - - cudaDataType_t dtype = GetGpuDataType(); - if (format == CUSPARSE_FORMAT_COO) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateCoo( - &matA, M, N, nnz, - const_cast(reinterpret_cast(rows)), - const_cast(reinterpret_cast(cols)), - const_cast(reinterpret_cast(values)), - CUSPARSE_INDEX_64I, CUSPARSE_INDEX_BASE_ZERO, dtype)); - } else if (format == CUSPARSE_FORMAT_CSR) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateCsr( - &matA, M, N, nnz, - const_cast(reinterpret_cast(rows)), - const_cast(reinterpret_cast(cols)), - const_cast(reinterpret_cast(values)), - CUSPARSE_INDEX_64I, CUSPARSE_INDEX_64I, CUSPARSE_INDEX_BASE_ZERO, - dtype)); - } else { - PADDLE_THROW(platform::errors::InvalidArgument( - "the sparse format [%s] is not supported", format)); - } - - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseCreateDnMat( - &matB, M, N, N, reinterpret_cast(dense), dtype, - CUSPARSE_ORDER_ROW)); - - size_t buffer_size = 0; - context.CusparseCall([&](cusparseHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS( - platform::dynload::cusparseSparseToDense_bufferSize( - handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, - &buffer_size)); - }); - framework::Tensor buffer; - float* buffer_data = buffer.mutable_data( - {static_cast(buffer_size)}, context.GetPlace()); - - context.CusparseCall([&](cusparseHandle_t handle) { - PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cusparseSparseToDense( - handle, matA, matB, CUSPARSE_SPARSETODENSE_ALG_DEFAULT, buffer_data)); - }); -} - -template <> -template -void Sparse::SparseCooToDense( - const int64_t M, const int64_t N, const int64_t nnz, const int64_t* rows, - const int64_t* cols, const T* values, T* dense) const { - SparseToDense(context_, M, N, nnz, rows, cols, values, dense, - CUSPARSE_FORMAT_COO); -} - -template <> -template -void Sparse::SparseCsrToDense( - const int64_t M, const int64_t N, const int64_t nnz, const int64_t* crows, - const int64_t* cols, const T* values, T* dense) const { - SparseToDense(context_, M, N, nnz, crows, cols, values, dense, - CUSPARSE_FORMAT_CSR); -} - -} // namespace math -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/platform/dynload/cusparse.h b/paddle/fluid/platform/dynload/cusparse.h index 7f29ec0e823a414110b89a971933e593fb8f4b71..c0620a110c0d85f2a54fcc071857edabe0d2cb40 100644 --- a/paddle/fluid/platform/dynload/cusparse.h +++ b/paddle/fluid/platform/dynload/cusparse.h @@ -31,30 +31,23 @@ namespace dynload { #if defined(PADDLE_WITH_CUDA) // APIs available after CUDA 11.0 #if CUDA_VERSION >= 11000 -#define CUSPARSE_ROUTINE_EACH(__macro) \ - __macro(cusparseCreate); \ - __macro(cusparseSetStream); \ - __macro(cusparseCreateMatDescr); \ - __macro(cusparseDestroy); \ - __macro(cusparseSnnz); \ - __macro(cusparseDnnz); \ - __macro(cusparseSetMatType); \ - __macro(cusparseSetMatIndexBase); \ - __macro(cusparseCreateCsr); \ - __macro(cusparseCreateCoo); \ - __macro(cusparseCreateDnMat); \ - __macro(cusparseSpMM_bufferSize); \ - __macro(cusparseSpMM); \ - __macro(cusparseDestroySpMat); \ - __macro(cusparseDestroyDnMat); \ - __macro(cusparseCooSetPointers); \ - __macro(cusparseCsrSetPointers); \ - __macro(cusparseDenseToSparse_bufferSize); \ - __macro(cusparseDenseToSparse_analysis); \ - __macro(cusparseDenseToSparse_convert); \ - __macro(cusparseSparseToDense_bufferSize); \ - __macro(cusparseSparseToDense); \ - __macro(cusparseDnMatSetStridedBatch); \ +#define CUSPARSE_ROUTINE_EACH(__macro) \ + __macro(cusparseCreate); \ + __macro(cusparseSetStream); \ + __macro(cusparseCreateMatDescr); \ + __macro(cusparseDestroy); \ + __macro(cusparseSnnz); \ + __macro(cusparseDnnz); \ + __macro(cusparseSetMatType); \ + __macro(cusparseSetMatIndexBase); \ + __macro(cusparseCreateCsr); \ + __macro(cusparseCreateCoo); \ + __macro(cusparseCreateDnMat); \ + __macro(cusparseSpMM_bufferSize); \ + __macro(cusparseSpMM); \ + __macro(cusparseDestroySpMat); \ + __macro(cusparseDestroyDnMat); \ + __macro(cusparseDnMatSetStridedBatch); \ __macro(cusparseCsrSetStridedBatch); CUSPARSE_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) diff --git a/paddle/phi/backends/dynload/cusparse.h b/paddle/phi/backends/dynload/cusparse.h index 6160faf1f422d002dca83edc59a1b2eefb5ef6d0..2e96e5788f6c95dc5a8f3cd2c262e5ba769f545b 100644 --- a/paddle/phi/backends/dynload/cusparse.h +++ b/paddle/phi/backends/dynload/cusparse.h @@ -43,30 +43,23 @@ extern void *cusparse_dso_handle; #if defined(PADDLE_WITH_CUDA) // APIs available after CUDA 11.0 #if CUDA_VERSION >= 11000 -#define CUSPARSE_ROUTINE_EACH(__macro) \ - __macro(cusparseCreate); \ - __macro(cusparseSetStream); \ - __macro(cusparseCreateMatDescr); \ - __macro(cusparseDestroy); \ - __macro(cusparseSnnz); \ - __macro(cusparseDnnz); \ - __macro(cusparseSetMatType); \ - __macro(cusparseSetMatIndexBase); \ - __macro(cusparseCreateCsr); \ - __macro(cusparseCreateCoo); \ - __macro(cusparseCreateDnMat); \ - __macro(cusparseSpMM_bufferSize); \ - __macro(cusparseSpMM); \ - __macro(cusparseDestroySpMat); \ - __macro(cusparseDestroyDnMat); \ - __macro(cusparseCooSetPointers); \ - __macro(cusparseCsrSetPointers); \ - __macro(cusparseDenseToSparse_bufferSize); \ - __macro(cusparseDenseToSparse_analysis); \ - __macro(cusparseDenseToSparse_convert); \ - __macro(cusparseSparseToDense_bufferSize); \ - __macro(cusparseSparseToDense); \ - __macro(cusparseDnMatSetStridedBatch); \ +#define CUSPARSE_ROUTINE_EACH(__macro) \ + __macro(cusparseCreate); \ + __macro(cusparseSetStream); \ + __macro(cusparseCreateMatDescr); \ + __macro(cusparseDestroy); \ + __macro(cusparseSnnz); \ + __macro(cusparseDnnz); \ + __macro(cusparseSetMatType); \ + __macro(cusparseSetMatIndexBase); \ + __macro(cusparseCreateCsr); \ + __macro(cusparseCreateCoo); \ + __macro(cusparseCreateDnMat); \ + __macro(cusparseSpMM_bufferSize); \ + __macro(cusparseSpMM); \ + __macro(cusparseDestroySpMat); \ + __macro(cusparseDestroyDnMat); \ + __macro(cusparseDnMatSetStridedBatch); \ __macro(cusparseCsrSetStridedBatch); CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)