From f2c96bc264854a3176890c51187f94ddad3ee44b Mon Sep 17 00:00:00 2001
From: sneaxiy <32832641+sneaxiy@users.noreply.github.com>
Date: Wed, 29 Mar 2023 09:25:21 +0800
Subject: [PATCH] Fix generate_kernels.py in CUDA 12.0 (#52232)

* fix generate_kernels.py in CUDA 12.0

* fix attrs bug
---
 cmake/cuda.cmake                                      |  2 +-
 paddle/phi/kernels/CMakeLists.txt                     | 11 ++++++++++-
 .../memory_efficient_attention/generate_kernels.py    |  3 ++-
 .../paddle/incubate/nn/memory_efficient_attention.py  |  2 +-
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 5dacd3916c4..710931d937e 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -171,7 +171,7 @@ function(select_nvcc_arch_flags out_variable out_arch_bin)
     else()
       if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
         set(cuda_arch_bin "80")
-      elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+
+      else()
         set(cuda_arch_bin "80 86")
       endif()
     endif()
diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt
index 271fbca6c3f..409c746938f 100644
--- a/paddle/phi/kernels/CMakeLists.txt
+++ b/paddle/phi/kernels/CMakeLists.txt
@@ -129,7 +129,16 @@ if(WITH_CUTLASS)
     COMMAND
       ${PYTHON_EXECUTABLE}
       ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py
-      --cuda_arch "${NVCC_ARCH_BIN}")
+      --cuda_arch "${NVCC_ARCH_BIN}"
+    RESULT_VARIABLE memory_efficient_attention_gen_res)
+
+  if(NOT memory_efficient_attention_gen_res EQUAL 0)
+    message(
+      FATAL_ERROR
+        "The memory efficient attention kernel generation errors with NVCC_ARCH_BIN=${NVCC_ARCH_BIN}"
+    )
+  endif()
+
   file(GLOB cutlass_cu "fusion/cutlass/conv2d/generated/*.cu"
        "fusion/cutlass/conv2d/*.cu" "fusion/cutlass/*.cu"
        "fusion/cutlass/memory_efficient_attention/autogen/impl/*.cu")
diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py
index 2baa7b07d98..a512f97e48e 100644
--- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py
+++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py
@@ -44,7 +44,7 @@ def find_arch_range(min_arch, max_arch):
     assert min_arch <= max_arch
     n = len(DEFAULT_ARCH)
 
-    start_idx = 0
+    start_idx = n - 1
     for i in range(n - 1):
         if DEFAULT_ARCH[i] <= min_arch and min_arch < DEFAULT_ARCH[i + 1]:
             start_idx = i
@@ -54,6 +54,7 @@ def find_arch_range(min_arch, max_arch):
     for i in range(n - 1):
         if DEFAULT_ARCH[i] <= max_arch and max_arch < DEFAULT_ARCH[i + 1]:
             end_idx = i + 1
+
     return DEFAULT_ARCH[start_idx:end_idx]
 
 
diff --git a/python/paddle/incubate/nn/memory_efficient_attention.py b/python/paddle/incubate/nn/memory_efficient_attention.py
index 76784254e41..049f00a70cf 100644
--- a/python/paddle/incubate/nn/memory_efficient_attention.py
+++ b/python/paddle/incubate/nn/memory_efficient_attention.py
@@ -134,7 +134,7 @@ def memory_efficient_attention(
             "causal_diagonal": causal_diagonal,
             "seqlen_k": seqlen_k,
         },
-        args={
+        attrs={
             "max_seqlen_q": max_seqlen_q,
             "max_seqlen_k": max_seqlen_k,
             "causal": causal,
-- 
GitLab