From 23c3d967111f04f8c99e6a0d47f954b510fe96d8 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 29 Mar 2022 20:22:28 +0800 Subject: [PATCH] [Phi] Unify kernel build targets (#41091) * unify_kernel_build_target * fix dnn kernel failed * fix dnn kernel loss target * fix xpu compile failed --- cmake/phi.cmake | 110 ++++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/cmake/phi.cmake b/cmake/phi.cmake index 8a03c8cc9e5..34d72306ab3 100644 --- a/cmake/phi.cmake +++ b/cmake/phi.cmake @@ -110,7 +110,7 @@ function(kernel_library TARGET) cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - + # used for cc_library selected_rows dir target set(target_suffix "") if ("${kernel_library_SUB_DIR}" STREQUAL "selected_rows") @@ -146,16 +146,11 @@ function(kernel_library TARGET) endif() endif() if (WITH_XPU_KP) - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu) - # Change XPU2 file suffix - # NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu - file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps) - file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) - list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) - endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc ) - list(APPEND kps_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc) - endif() + # Change XPU2 file suffix + # NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu + file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps) + file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) + list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) endif() else() # TODO(chenweihang): impl compile by source later @@ -186,7 +181,7 @@ function(kernel_library TARGET) string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) list(APPEND all_include_kernels ${include_kernels}) endif() - + foreach(include_kernel ${all_include_kernels}) if ("${kernel_library_SUB_DIR}" STREQUAL "") string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel}) @@ -219,71 +214,76 @@ function(kernel_library TARGET) list(LENGTH kps_srcs kps_srcs_len) # kernel source file level - # level 1: base device kernel - # - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs + # level 1: base device kernel (if any device or dnn kernel exists, the cpu_kernel must be exists!!!) + # - cpu_srcs / gpu_srcs / xpu_srcs / kps_srcs + # = dnn srcs: gpudnn_srcs # level 2: device-independent kernel # - common_srcs - set(base_device_kernels) - set(device_independent_kernel) - # 1. Base device kernel compile - if (${cpu_srcs_len} GREATER 0) - cc_library(${TARGET}_cpu${target_suffix} SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - list(APPEND base_device_kernels ${TARGET}_cpu${target_suffix}) - endif() - if (${gpu_srcs_len} GREATER 0) - if (WITH_GPU) - nv_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - elseif (WITH_ROCM) - hip_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - endif() - list(APPEND base_device_kernels ${TARGET}_gpu${target_suffix}) + set(partial_build_flag 0) + set(base_build_flag 0) + if (${common_srcs_len} GREATER 0) + set(partial_build_flag 1) endif() - if (${xpu_srcs_len} GREATER 0) - cc_library(${TARGET}_xpu${target_suffix} SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - list(APPEND base_device_kernels ${TARGET}_xpu${target_suffix}) + if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0) + set(base_build_flag 1) endif() + + # gpudnn or mkldnn needs to be compiled separately + set(dnn_kernels) if (${gpudnn_srcs_len} GREATER 0) if (WITH_GPU) nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) elseif (WITH_ROCM) hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) endif() - list(APPEND base_device_kernels ${TARGET}_gpudnn${target_suffix}) - endif() - if (${kps_srcs_len} GREATER 0) - # only when WITH_XPU_KP, the kps_srcs_len can be > 0 - xpu_library(${TARGET}_kps${target_suffix} SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - list(APPEND base_device_kernels ${TARGET}_kps${target_suffix}) + list(APPEND dnn_kernels ${TARGET}_gpudnn${target_suffix}) endif() + list(LENGTH dnn_kernels dnn_kernels_len) - # 2. Device-independent kernel compile - if (${common_srcs_len} GREATER 0) + if (${partial_build_flag} EQUAL 0 AND ${base_build_flag} EQUAL 1) if (WITH_GPU) - nv_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) + if (${dnn_kernels_len} GREATER 0) + nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + nv_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) + else() + nv_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + endif() elseif (WITH_ROCM) - hip_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) + if (${dnn_kernels_len} GREATER 0) + hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + hip_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) + else() + hip_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + endif() elseif (WITH_XPU_KP) - xpu_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) + xpu_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) else() - cc_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) + cc_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) endif() - list(APPEND device_independent_kernel ${TARGET}_common${target_suffix}) - endif() - - - # 3. Unify target compile - list(LENGTH base_device_kernels base_device_kernels_len) - list(LENGTH device_independent_kernel device_independent_kernel_len) - if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0) + elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 1) + if (WITH_GPU) + nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) + elseif (WITH_ROCM) + hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels}) + elseif (WITH_XPU_KP) + xpu_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix}) + else() + cc_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix}) + endif() + elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 0) if (WITH_GPU) - nv_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) + nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) elseif (WITH_ROCM) - hip_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) + hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) elseif (WITH_XPU_KP) - xpu_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) + xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) else() - cc_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) + cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) endif() else() set(target_build_flag 0) -- GitLab