diff --git a/paddle/fluid/platform/dynload/cusparse.h b/paddle/fluid/platform/dynload/cusparse.h index f026197490d6a36bdb0d3c133d00ee6ce44f009f..74f9b973a388cbd2a75bbfc641f65dba983c2376 100644 --- a/paddle/fluid/platform/dynload/cusparse.h +++ b/paddle/fluid/platform/dynload/cusparse.h @@ -64,7 +64,7 @@ CUSPARSE_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) CUSPARSE_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) #endif -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 #define CUSPARSE_ROUTINE_EACH_R3(__macro) \ __macro(cusparseDnMatSetStridedBatch); \ __macro(cusparseCooSetStridedBatch); \ diff --git a/paddle/phi/backends/dynload/cusparse.h b/paddle/phi/backends/dynload/cusparse.h index 2f4ec151b1ece68a5a111cbdec0a40de8cf3beaa..fcbabd55b7ebbdd5f1e8fdf9340fbefdfd06f088 100644 --- a/paddle/phi/backends/dynload/cusparse.h +++ b/paddle/phi/backends/dynload/cusparse.h @@ -76,7 +76,7 @@ CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) CUSPARSE_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP) #endif -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 #define CUSPARSE_ROUTINE_EACH_R3(__macro) \ __macro(cusparseDnMatSetStridedBatch); \ __macro(cusparseCooSetStridedBatch); \ diff --git a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h index 738f92802670ed6b116e6fdab4a194d885fda5e6..81c4faeb8182fe61c3eaac890f3f2ff2bbc76da9 100644 --- a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h +++ b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h @@ -101,7 +101,7 @@ inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x, gpu_type); }); if (batch_size > 1) { -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 dev_ctx.CusparseCall([&](cusparseHandle_t handle) { phi::dynload::cusparseCsrSetStridedBatch( *descriptor, batch_size, M + 1, batch_nnz); @@ -109,7 +109,7 @@ inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x, #else PADDLE_THROW(phi::errors::Unimplemented( "Batch Sparse matmul use 'cusparseCsrSetStridedBatch', which is " - "supported from CUDA 11.7")); + "supported from CUDA 11.8")); #endif } } @@ -155,7 +155,7 @@ inline void CreateCooDescriptor(const phi::SparseCooTensor& x, }); if (batch_size > 1) { -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 dev_ctx.CusparseCall([&](cusparseHandle_t handle) { phi::dynload::cusparseCooSetStridedBatch( *descriptor, batch_size, batch_nnz); @@ -163,7 +163,7 @@ inline void CreateCooDescriptor(const phi::SparseCooTensor& x, #else PADDLE_THROW(phi::errors::Unimplemented( "Batch Sparse matmul use 'cusparseCooSetStridedBatch', which is " - "supported from CUDA 11.7")); + "supported from CUDA 11.8")); #endif } } @@ -241,7 +241,7 @@ class CuSparseDnMatDescriptor { PADDLE_ENFORCE_EQ(x.numel(), batch_size * M * N); if (batch_size > 1) { -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 dev_ctx_.CusparseCall([&](cusparseHandle_t handle) { phi::dynload::cusparseDnMatSetStridedBatch( descriptor_, batch_size, M * N); @@ -249,7 +249,7 @@ class CuSparseDnMatDescriptor { #else PADDLE_THROW(phi::errors::Unimplemented( "Batch Sparse matmul use 'cusparseDnMatSetStridedBatch', which is " - "supported from CUDA 11.7")); + "supported from CUDA 11.8")); #endif } VLOG(6) << "Create cusparseDnMatDescr_t " << &descriptor_; diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu index ab3a75f8974d87846628464e8d44c69f146f1b03..4c83203ed01acc8b24ac1e5a93497aacc0163a0c 100644 --- a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu @@ -65,7 +65,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx, DenseTensor* dquery, DenseTensor* dkey, DenseTensor* dvalue) { -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 /* Step1: Forward: softmax{CSR} * value{Dense} -> out{Dense}, reuse */ SparseCsrTensor dsoftmax; MatmulCsrDenseGradKernel( @@ -129,7 +129,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx, PADDLE_THROW( phi::errors::Unimplemented("backward of 'sparse.nn.functional.attention' " "use 'cusparseCsrSetStridedBatch', which is " - "completed supported from CUDA 11.7")); + "completed supported from CUDA 11.8")); #endif } diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu index ec100eae3a1e19272d50abb59d3d6f9a933d4415..04d143fdb33c5d2f9b78466d93f4a1c647c98d52 100644 --- a/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu @@ -99,7 +99,7 @@ void FusedAttentionCsrKernel( const paddle::optional& attn_mask, DenseTensor* out, SparseCsrTensor* softmax) { -#if CUDA_VERSION >= 11070 +#if CUDA_VERSION >= 11080 /* Check Shape */ auto q_dim = query.dims(); auto q_rank = q_dim.size(); @@ -217,7 +217,7 @@ void FusedAttentionCsrKernel( PADDLE_THROW( phi::errors::Unimplemented("forward of 'sparse.nn.functional.attention' " "use 'cusparseCsrSetStridedBatch', which is " - "completed supported from CUDA 11.7")); + "completed supported from CUDA 11.8")); #endif } diff --git a/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py b/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py index ca3dbe4a19a8a16882eab2eb04ba457f1f3dac6b..2917f96c442d72fd678f80afe7e55a1ba1d8cb6f 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py @@ -91,8 +91,8 @@ class TestAddmm(unittest.TestCase): self.check_result([16, 10], [16, 12], [12, 10], 'csr') @unittest.skipIf( - not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070, - "only support cuda>=11.7", + not paddle.is_compiled_with_cuda() or get_cuda_version() < 11080, + "only support cuda>=11.8", ) def test_addmm_3d(self): self.check_result([8, 16, 10], [8, 16, 12], [8, 12, 10], 'coo') diff --git a/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py b/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py index dfc8806fd02ad471d2b787de90d084b20ad39727..8506ac02a6f9da7e315aff6b1592264d65f27f02 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py @@ -36,8 +36,8 @@ def get_cuda_version(): @unittest.skipIf( - not core.is_compiled_with_cuda() or get_cuda_version() < 11070, - "core is not compiled with CUDA and cuda version need larger than or equal to 11.7", + not core.is_compiled_with_cuda() or get_cuda_version() < 11080, + "core is not compiled with CUDA and cuda version need larger than or equal to 11.8", ) class TestSparseAttentionAPI1(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py b/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py index 368e9cbbd2ff2655b062a87a0de766ca9b060dde..bc45b5ca80a34cc99a2e526e8458777858b0e445 100644 --- a/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py +++ b/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py @@ -83,8 +83,8 @@ class TestMatmul(unittest.TestCase): self.check_result([16, 12], [12, 10], 'csr') @unittest.skipIf( - not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070, - "only support cuda>=11.7", + not paddle.is_compiled_with_cuda() or get_cuda_version() < 11080, + "only support cuda>=11.8", ) def test_matmul_3d(self): self.check_result([8, 16, 12], [8, 12, 10], 'coo') @@ -131,8 +131,8 @@ class TestMaskedMatmul(unittest.TestCase): np.testing.assert_allclose(np_y_grad, y.grad.numpy(), rtol=1e-05) @unittest.skipIf( - not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070, - "only support on cuda>=11.7", + not paddle.is_compiled_with_cuda() or get_cuda_version() < 11080, + "only support on cuda>=11.8", ) def test_masked_matmul_3d(self): paddle.set_default_dtype('float32') diff --git a/python/paddle/sparse/nn/functional/transformer.py b/python/paddle/sparse/nn/functional/transformer.py index 38118ba359b5730331be7a3cf07bba8ed1c01885..bed15cd42f34297c8ce183b87888da81f6862636 100644 --- a/python/paddle/sparse/nn/functional/transformer.py +++ b/python/paddle/sparse/nn/functional/transformer.py @@ -30,7 +30,7 @@ def attention( ): r""" Note: - This API is only used from ``CUDA 11.7`` . + This API is only used from ``CUDA 11.8`` . SparseCsrTensor is used to store the intermediate result of Attention matrix in Transformer module, which can reduce memory usage and improve performance.