diff --git a/paddle/fluid/platform/dynload/cusparse.h b/paddle/fluid/platform/dynload/cusparse.h
index f026197490d6a36bdb0d3c133d00ee6ce44f009f..74f9b973a388cbd2a75bbfc641f65dba983c2376 100644
--- a/paddle/fluid/platform/dynload/cusparse.h
+++ b/paddle/fluid/platform/dynload/cusparse.h
@@ -64,7 +64,7 @@ CUSPARSE_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
 CUSPARSE_ROUTINE_EACH_R2(PLATFORM_DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
 #endif
 
-#if CUDA_VERSION >= 11070
+#if CUDA_VERSION >= 11080
 #define CUSPARSE_ROUTINE_EACH_R3(__macro) \
   __macro(cusparseDnMatSetStridedBatch);  \
   __macro(cusparseCooSetStridedBatch);    \
diff --git a/paddle/phi/backends/dynload/cusparse.h b/paddle/phi/backends/dynload/cusparse.h
index 2f4ec151b1ece68a5a111cbdec0a40de8cf3beaa..fcbabd55b7ebbdd5f1e8fdf9340fbefdfd06f088 100644
--- a/paddle/phi/backends/dynload/cusparse.h
+++ b/paddle/phi/backends/dynload/cusparse.h
@@ -76,7 +76,7 @@ CUSPARSE_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
 CUSPARSE_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUSPARSE_WRAP)
 #endif
 
-#if CUDA_VERSION >= 11070
+#if CUDA_VERSION >= 11080
 #define CUSPARSE_ROUTINE_EACH_R3(__macro) \
   __macro(cusparseDnMatSetStridedBatch);  \
   __macro(cusparseCooSetStridedBatch);    \
diff --git a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
index 738f92802670ed6b116e6fdab4a194d885fda5e6..81c4faeb8182fe61c3eaac890f3f2ff2bbc76da9 100644
--- a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
+++ b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
@@ -101,7 +101,7 @@ inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x,
                                     gpu_type);
   });
   if (batch_size > 1) {
-#if CUDA_VERSION >= 11070
+#if CUDA_VERSION >= 11080
     dev_ctx.CusparseCall([&](cusparseHandle_t handle) {
       phi::dynload::cusparseCsrSetStridedBatch(
           *descriptor, batch_size, M + 1, batch_nnz);
@@ -109,7 +109,7 @@ inline void CreateCsrDescriptor(const phi::SparseCsrTensor& x,
 #else
     PADDLE_THROW(phi::errors::Unimplemented(
         "Batch Sparse matmul use 'cusparseCsrSetStridedBatch', which is "
-        "supported from CUDA 11.7"));
+        "supported from CUDA 11.8"));
 #endif
   }
 }
@@ -155,7 +155,7 @@ inline void CreateCooDescriptor(const phi::SparseCooTensor& x,
   });
 
   if (batch_size > 1) {
-#if CUDA_VERSION >= 11070
+#if CUDA_VERSION >= 11080
     dev_ctx.CusparseCall([&](cusparseHandle_t handle) {
       phi::dynload::cusparseCooSetStridedBatch(
           *descriptor, batch_size, batch_nnz);
@@ -163,7 +163,7 @@ inline void CreateCooDescriptor(const phi::SparseCooTensor& x,
 #else
     PADDLE_THROW(phi::errors::Unimplemented(
         "Batch Sparse matmul use 'cusparseCooSetStridedBatch', which is "
-        "supported from CUDA 11.7"));
+        "supported from CUDA 11.8"));
 #endif
   }
 }
@@ -241,7 +241,7 @@ class CuSparseDnMatDescriptor {
 
     PADDLE_ENFORCE_EQ(x.numel(), batch_size * M * N);
     if (batch_size > 1) {
-#if CUDA_VERSION >= 11070
+#if CUDA_VERSION >= 11080
       dev_ctx_.CusparseCall([&](cusparseHandle_t handle) {
         phi::dynload::cusparseDnMatSetStridedBatch(
             descriptor_, batch_size, M * N);
@@ -249,7 +249,7 @@ class CuSparseDnMatDescriptor {
 #else
       PADDLE_THROW(phi::errors::Unimplemented(
           "Batch Sparse matmul use 'cusparseDnMatSetStridedBatch', which is "
-          "supported from CUDA 11.7"));
+          "supported from CUDA 11.8"));
 #endif
     }
     VLOG(6) << "Create cusparseDnMatDescr_t " << &descriptor_;
diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
index ab3a75f8974d87846628464e8d44c69f146f1b03..4c83203ed01acc8b24ac1e5a93497aacc0163a0c 100644
--- a/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/fused_attention_grad_kernel.cu
@@ -65,7 +65,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
                                  DenseTensor* dquery,
                                  DenseTensor* dkey,
                                  DenseTensor* dvalue) {
-#if CUDA_VERSION >= 11070
+#if CUDA_VERSION >= 11080
   /* Step1: Forward: softmax{CSR} * value{Dense} -> out{Dense}, reuse */
   SparseCsrTensor dsoftmax;
   MatmulCsrDenseGradKernel<T, Context>(
@@ -129,7 +129,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
   PADDLE_THROW(
       phi::errors::Unimplemented("backward of 'sparse.nn.functional.attention' "
                                  "use 'cusparseCsrSetStridedBatch', which is "
-                                 "completed supported from CUDA 11.7"));
+                                 "completed supported from CUDA 11.8"));
 #endif
 }
 
diff --git a/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu b/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
index ec100eae3a1e19272d50abb59d3d6f9a933d4415..04d143fdb33c5d2f9b78466d93f4a1c647c98d52 100644
--- a/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/fused_attention_kernel.cu
@@ -99,7 +99,7 @@ void FusedAttentionCsrKernel(
     const paddle::optional<DenseTensor>& attn_mask,
     DenseTensor* out,
     SparseCsrTensor* softmax) {
-#if CUDA_VERSION >= 11070
+#if CUDA_VERSION >= 11080
   /* Check Shape */
   auto q_dim = query.dims();
   auto q_rank = q_dim.size();
@@ -217,7 +217,7 @@ void FusedAttentionCsrKernel(
   PADDLE_THROW(
       phi::errors::Unimplemented("forward of 'sparse.nn.functional.attention' "
                                  "use 'cusparseCsrSetStridedBatch', which is "
-                                 "completed supported from CUDA 11.7"));
+                                 "completed supported from CUDA 11.8"));
 #endif
 }
 
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py b/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py
index ca3dbe4a19a8a16882eab2eb04ba457f1f3dac6b..2917f96c442d72fd678f80afe7e55a1ba1d8cb6f 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_addmm_op.py
@@ -91,8 +91,8 @@ class TestAddmm(unittest.TestCase):
         self.check_result([16, 10], [16, 12], [12, 10], 'csr')
 
     @unittest.skipIf(
-        not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070,
-        "only support cuda>=11.7",
+        not paddle.is_compiled_with_cuda() or get_cuda_version() < 11080,
+        "only support cuda>=11.8",
     )
     def test_addmm_3d(self):
         self.check_result([8, 16, 10], [8, 16, 12], [8, 12, 10], 'coo')
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py b/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py
index dfc8806fd02ad471d2b787de90d084b20ad39727..8506ac02a6f9da7e315aff6b1592264d65f27f02 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_fused_attention_op.py
@@ -36,8 +36,8 @@ def get_cuda_version():
 
 
 @unittest.skipIf(
-    not core.is_compiled_with_cuda() or get_cuda_version() < 11070,
-    "core is not compiled with CUDA and cuda version need larger than or equal to 11.7",
+    not core.is_compiled_with_cuda() or get_cuda_version() < 11080,
+    "core is not compiled with CUDA and cuda version need larger than or equal to 11.8",
 )
 class TestSparseAttentionAPI1(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py b/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
index 368e9cbbd2ff2655b062a87a0de766ca9b060dde..bc45b5ca80a34cc99a2e526e8458777858b0e445 100644
--- a/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_matmul_op.py
@@ -83,8 +83,8 @@ class TestMatmul(unittest.TestCase):
         self.check_result([16, 12], [12, 10], 'csr')
 
     @unittest.skipIf(
-        not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070,
-        "only support cuda>=11.7",
+        not paddle.is_compiled_with_cuda() or get_cuda_version() < 11080,
+        "only support cuda>=11.8",
     )
     def test_matmul_3d(self):
         self.check_result([8, 16, 12], [8, 12, 10], 'coo')
@@ -131,8 +131,8 @@ class TestMaskedMatmul(unittest.TestCase):
         np.testing.assert_allclose(np_y_grad, y.grad.numpy(), rtol=1e-05)
 
     @unittest.skipIf(
-        not paddle.is_compiled_with_cuda() or get_cuda_version() < 11070,
-        "only support on cuda>=11.7",
+        not paddle.is_compiled_with_cuda() or get_cuda_version() < 11080,
+        "only support on cuda>=11.8",
     )
     def test_masked_matmul_3d(self):
         paddle.set_default_dtype('float32')
diff --git a/python/paddle/sparse/nn/functional/transformer.py b/python/paddle/sparse/nn/functional/transformer.py
index 38118ba359b5730331be7a3cf07bba8ed1c01885..bed15cd42f34297c8ce183b87888da81f6862636 100644
--- a/python/paddle/sparse/nn/functional/transformer.py
+++ b/python/paddle/sparse/nn/functional/transformer.py
@@ -30,7 +30,7 @@ def attention(
 ):
     r"""
     Note:
-        This API is only used from ``CUDA 11.7`` .
+        This API is only used from ``CUDA 11.8`` .
 
     SparseCsrTensor is used to store the intermediate result of Attention matrix
     in Transformer module, which can reduce memory usage and improve performance.