From 7648f429d5992d054d78de9c4b5fa47403d308f4 Mon Sep 17 00:00:00 2001 From: Zhou Wei <1183042833@qq.com> Date: Thu, 3 Nov 2022 15:10:12 +0800 Subject: [PATCH] sparse attention kernel is used from 11.8 (#47594) --- paddle/fluid/platform/dynload/cusparse.h | 2 +- paddle/phi/backends/dynload/cusparse.h | 2 +- .../phi/kernels/funcs/sparse/sparse_blas_impl.cu.h | 12 ++++++------ .../sparse/gpu/fused_attention_grad_kernel.cu | 4 ++-- .../phi/kernels/sparse/gpu/fused_attention_kernel.cu | 4 ++-- .../fluid/tests/unittests/test_sparse_addmm_op.py | 4 ++-- .../unittests/test_sparse_fused_attention_op.py | 4 ++-- .../fluid/tests/unittests/test_sparse_matmul_op.py | 8 ++++---- python/paddle/sparse/nn/functional/transformer.py | 2 +- 9 files changed, 21 insertions(+), 21 deletions(-) diff --git a/paddle/fluid/platform/dynload/cusparse.h b/paddle/fluid/platform/dynload/cusparse.h index f026197490..74f9b973a3 100644 --- a/paddle/fluid/platform/dynload/cusparse.h +++ b/paddle/fluid/platform/dynload/cusparse.h @@ -64,7 +64,7 @@ CUSPARSE_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) CUSPARSE_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) #endif -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 #define CUSPARSE_ROUTINE_EACH_R3(__macro) \ __macro(cusparseDnMatSetStridedBatch); \ __macro(cusparseCooSetStridedBatch); \ diff --git a/paddle/phi/backends/dynload/cusparse.h b/paddle/phi/backends/dynload/cusparse.h index 2f4ec151b1..fcbabd55b7 100644 --- a/paddle/phi/backends/dynload/cusparse.h +++ b/paddle/phi/backends/dynload/cusparse.h @@ -76,7 +76,7 @@ CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) CUSPARSE_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) #endif -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 #define CUSPARSE_ROUTINE_EACH_R3(__macro) \ __macro(cusparseDnMatSetStridedBatch); \ __macro(cusparseCooSetStridedBatch); \ diff --git a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h index 738f928026..81c4faeb81 100644 --- a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h +++ b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h @@ -101,7 +101,7 @@ inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x, gpu_type); }); if (batch_size > 1) { -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 dev_ctx.CusparseCall([&](cusparseHandle_t handle) { phi::dynload::cusparseCsrSetStridedBatch( *descriptor, batch_size, M + 1, batch_nnz); @@ -109,7 +109,7 @@ inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x, #else PADDLE_THROW(phi::errors::Unimplemented( "Batch Sparse matmul use 'cusparseCsrSetStridedBatch', which is " - "supported from CUDA 11.7")); + "supported from CUDA 11.8")); #endif } } @@ -155,7 +155,7 @@ inline void CreateCooDescriptor(const phi::SparseCooTensor& x, }); if (batch_size > 1) { -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 dev_ctx.CusparseCall([&](cusparseHandle_t handle) { phi::dynload::cusparseCooSetStridedBatch( *descriptor, batch_size, batch_nnz); @@ -163,7 +163,7 @@ inline void CreateCooDescriptor(const phi::SparseCooTensor& x, #else PADDLE_THROW(phi::errors::Unimplemented( "Batch Sparse matmul use 'cusparseCooSetStridedBatch', which is " - "supported from CUDA 11.7")); + "supported from CUDA 11.8")); #endif } } @@ -241,7 +241,7 @@ class CuSparseDnMatDescriptor { PADDLE_ENFORCE_EQ(x.numel(), batch_size * M * N); if (batch_size > 1) { -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { phi::dynload::cusparseDnMatSetStridedBatch( descriptor_, batch_size, M * N); @@ -249,7 +249,7 @@ class CuSparseDnMatDescriptor { #else PADDLE_THROW(phi::errors::Unimplemented( "Batch Sparse matmul use 'cusparseDnMatSetStridedBatch', which is " - "supported from CUDA 11.7")); + "supported from CUDA 11.8")); #endif } VLOG(6) << "Create cusparseDnMatDescr_t " << &descriptor_; diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu index ab3a75f897..4c83203ed0 100644 --- a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu @@ -65,7 +65,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx, DenseTensor* dquery, DenseTensor* dkey, DenseTensor* dvalue) { -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 /* Step1: Forward: softmax{CSR} * value{Dense} -> out{Dense}, reuse */ SparseCsrTensor dsoftmax; MatmulCsrDenseGradKernel( @@ -129,7 +129,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx, PADDLE_THROW( phi::errors::Unimplemented("backward of 'sparse.nn.functional.attention' " "use 'cusparseCsrSetStridedBatch', which is " - "completed supported from CUDA 11.7")); + "completed supported from CUDA 11.8")); #endif } diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu index ec100eae3a..04d143fdb3 100644 --- a/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu @@ -99,7 +99,7 @@ void FusedAttentionCsrKernel( const paddle::optional& attn_mask, DenseTensor* out, SparseCsrTensor* softmax) { -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 /* Check Shape */ auto q_dim = query.dims(); auto q_rank = q_dim.size(); @@ -217,7 +217,7 @@ void FusedAttentionCsrKernel( PADDLE_THROW( phi::errors::Unimplemented("forward of 'sparse.nn.functional.attention' " "use 'cusparseCsrSetStridedBatch', which is " - "completed supported from CUDA 11.7")); + "completed supported from CUDA 11.8")); #endif } diff --git a/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py b/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py index ca3dbe4a19..2917f96c44 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py @@ -91,8 +91,8 @@ class TestAddmm(unittest.TestCase): self.check_result([16, 10], [16, 12], [12, 10], 'csr') @unittest.skipIf( - not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070, - "only support cuda>=11.7", + not paddle.is_compiled_with_cuda() or get_cuda_version() < 11080, + "only support cuda>=11.8", ) def test_addmm_3d(self): self.check_result([8, 16, 10], [8, 16, 12], [8, 12, 10], 'coo') diff --git a/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py b/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py index dfc8806fd0..8506ac02a6 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py @@ -36,8 +36,8 @@ def get_cuda_version(): @unittest.skipIf( - not core.is_compiled_with_cuda() or get_cuda_version() < 11070, - "core is not compiled with CUDA and cuda version need larger than or equal to 11.7", + not core.is_compiled_with_cuda() or get_cuda_version() < 11080, + "core is not compiled with CUDA and cuda version need larger than or equal to 11.8", ) class TestSparseAttentionAPI1(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py b/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py index 368e9cbbd2..bc45b5ca80 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py @@ -83,8 +83,8 @@ class TestMatmul(unittest.TestCase): self.check_result([16, 12], [12, 10], 'csr') @unittest.skipIf( - not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070, - "only support cuda>=11.7", + not paddle.is_compiled_with_cuda() or get_cuda_version() < 11080, + "only support cuda>=11.8", ) def test_matmul_3d(self): self.check_result([8, 16, 12], [8, 12, 10], 'coo') @@ -131,8 +131,8 @@ class TestMaskedMatmul(unittest.TestCase): np.testing.assert_allclose(np_y_grad, y.grad.numpy(), rtol=1e-05) @unittest.skipIf( - not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070, - "only support on cuda>=11.7", + not paddle.is_compiled_with_cuda() or get_cuda_version() < 11080, + "only support on cuda>=11.8", ) def test_masked_matmul_3d(self): paddle.set_default_dtype('float32') diff --git a/python/paddle/sparse/nn/functional/transformer.py b/python/paddle/sparse/nn/functional/transformer.py index 38118ba359..bed15cd42f 100644 --- a/python/paddle/sparse/nn/functional/transformer.py +++ b/python/paddle/sparse/nn/functional/transformer.py @@ -30,7 +30,7 @@ def attention( ): r""" Note: - This API is only used from ``CUDA 11.7`` . + This API is only used from ``CUDA 11.8`` . SparseCsrTensor is used to store the intermediate result of Attention matrix in Transformer module, which can reduce memory usage and improve performance. -- GitLab