From f2c96bc264854a3176890c51187f94ddad3ee44b Mon Sep 17 00:00:00 2001 From: sneaxiy <32832641+sneaxiy@users.noreply.github.com> Date: Wed, 29 Mar 2023 09:25:21 +0800 Subject: [PATCH] Fix generate_kernels.py in CUDA 12.0 (#52232) * fix generate_kernels.py in CUDA 12.0 * fix attrs bug --- cmake/cuda.cmake | 2 +- paddle/phi/kernels/CMakeLists.txt | 11 ++++++++++- .../memory_efficient_attention/generate_kernels.py | 3 ++- .../paddle/incubate/nn/memory_efficient_attention.py | 2 +- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 5dacd3916c4..710931d937e 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -171,7 +171,7 @@ function(select_nvcc_arch_flags out_variable out_arch_bin) else() if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0 set(cuda_arch_bin "80") - elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+ + else() set(cuda_arch_bin "80 86") endif() endif() diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 271fbca6c3f..409c746938f 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -129,7 +129,16 @@ if(WITH_CUTLASS) COMMAND ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py - --cuda_arch "${NVCC_ARCH_BIN}") + --cuda_arch "${NVCC_ARCH_BIN}" + RESULT_VARIABLE memory_efficient_attention_gen_res) + + if(NOT memory_efficient_attention_gen_res EQUAL 0) + message( + FATAL_ERROR + "The memory efficient attention kernel generation errors with NVCC_ARCH_BIN=${NVCC_ARCH_BIN}" + ) + endif() + file(GLOB cutlass_cu "fusion/cutlass/conv2d/generated/*.cu" "fusion/cutlass/conv2d/*.cu" "fusion/cutlass/*.cu" "fusion/cutlass/memory_efficient_attention/autogen/impl/*.cu") diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py index 2baa7b07d98..a512f97e48e 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py @@ -44,7 +44,7 @@ def find_arch_range(min_arch, max_arch): assert min_arch <= max_arch n = len(DEFAULT_ARCH) - start_idx = 0 + start_idx = n - 1 for i in range(n - 1): if DEFAULT_ARCH[i] <= min_arch and min_arch < DEFAULT_ARCH[i + 1]: start_idx = i @@ -54,6 +54,7 @@ def find_arch_range(min_arch, max_arch): for i in range(n - 1): if DEFAULT_ARCH[i] <= max_arch and max_arch < DEFAULT_ARCH[i + 1]: end_idx = i + 1 + return DEFAULT_ARCH[start_idx:end_idx] diff --git a/python/paddle/incubate/nn/memory_efficient_attention.py b/python/paddle/incubate/nn/memory_efficient_attention.py index 76784254e41..049f00a70cf 100644 --- a/python/paddle/incubate/nn/memory_efficient_attention.py +++ b/python/paddle/incubate/nn/memory_efficient_attention.py @@ -134,7 +134,7 @@ def memory_efficient_attention( "causal_diagonal": causal_diagonal, "seqlen_k": seqlen_k, }, - args={ + attrs={ "max_seqlen_q": max_seqlen_q, "max_seqlen_k": max_seqlen_k, "causal": causal, -- GitLab