From e6cbd72df7df151c5b0a68f62680749fc0517ec6 Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Thu, 24 Mar 2022 10:41:57 +0800 Subject: [PATCH] [phi] Split selected_rows CMake compilation (#40864) * [phi] Split selected_rows CMake compilation * move file back * move file back --- cmake/phi.cmake | 118 +++++++++--------- paddle/phi/kernels/CMakeLists.txt | 2 + .../phi/kernels/selected_rows/CMakeLists.txt | 3 + .../{ => impl}/isfinite_kernel_impl.h | 0 .../kernels/selected_rows/isfinite_kernel.cc | 2 +- 5 files changed, 65 insertions(+), 60 deletions(-) create mode 100644 paddle/phi/kernels/selected_rows/CMakeLists.txt rename paddle/phi/kernels/selected_rows/{ => impl}/isfinite_kernel_impl.h (100%) diff --git a/cmake/phi.cmake b/cmake/phi.cmake index 1c4dd723b9b..f1241aaa66b 100644 --- a/cmake/phi.cmake +++ b/cmake/phi.cmake @@ -100,7 +100,6 @@ function(kernel_library TARGET) set(xpu_srcs) set(gpudnn_srcs) set(kps_srcs) - set(selected_rows_srcs) # parse and save the deps kerenl targets set(all_srcs) set(kernel_deps) @@ -111,6 +110,12 @@ function(kernel_library TARGET) cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + # used for cc_library selected_rows dir target + set(target_suffix "") + if ("${kernel_library_SUB_DIR}" STREQUAL "selected_rows_kernel") + set(target_suffix "_sr") + endif() list(LENGTH kernel_library_SRCS kernel_library_SRCS_len) # one kernel only match one impl file in each backend @@ -121,9 +126,6 @@ function(kernel_library TARGET) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc AND NOT WITH_XPU_KP) list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc) endif() - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/selected_rows/${TARGET}.cc) - list(APPEND selected_rows_srcs ${CMAKE_CURRENT_SOURCE_DIR}/selected_rows/${TARGET}.cc) - endif() if (WITH_GPU OR WITH_ROCM) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu) list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu) @@ -169,26 +171,46 @@ function(kernel_library TARGET) list(APPEND all_srcs ${xpu_srcs}) list(APPEND all_srcs ${gpudnn_srcs}) list(APPEND all_srcs ${kps_srcs}) + + set(all_include_kernels) + set(all_kernel_name) + foreach(src ${all_srcs}) file(READ ${src} target_content) + # "kernels/xxx"(DenseTensor Kernel) can only include each other, but can't include "SUB_DIR/xxx" (such as selected_rows Kernel) string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) - if ("${kernel_library_SUB_DIR}" STREQUAL "") - string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) - else() + list(APPEND all_include_kernels ${include_kernels}) + + # "SUB_DIR/xxx" can include "kernels/xx" and "SUB_DIR/xxx" + if (NOT "${kernel_library_SUB_DIR}" STREQUAL "") string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) + list(APPEND all_include_kernels ${include_kernels}) endif() - foreach(include_kernel ${include_kernels}) + + foreach(include_kernel ${all_include_kernels}) if ("${kernel_library_SUB_DIR}" STREQUAL "") string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel}) + string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name}) + list(APPEND all_kernel_name ${kernel_name}) else() - string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel}) + # NOTE(dev): we should firstly match kernel_library_SUB_DIR. + if (${include_kernel} MATCHES "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/") + string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel}) + # for selected_rows directory, add ${target_suffix}. + string(REGEX REPLACE ".h\"" "${target_suffix}" kernel_name ${kernel_name}) + list(APPEND all_kernel_name ${kernel_name}) + else() + string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel}) + string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name}) + list(APPEND all_kernel_name ${kernel_name}) + endif() + message(STATUS "${TARGET} DEPS ${all_kernel_name}") endif() - string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name}) - list(APPEND kernel_deps ${kernel_name}) + list(APPEND kernel_deps ${all_kernel_name}) endforeach() endforeach() list(REMOVE_DUPLICATES kernel_deps) - list(REMOVE_ITEM kernel_deps ${TARGET}) + list(REMOVE_ITEM kernel_deps ${TARGET}${target_suffix}) list(LENGTH common_srcs common_srcs_len) list(LENGTH cpu_srcs cpu_srcs_len) @@ -196,92 +218,73 @@ function(kernel_library TARGET) list(LENGTH xpu_srcs xpu_srcs_len) list(LENGTH gpudnn_srcs gpudnn_srcs_len) list(LENGTH kps_srcs kps_srcs_len) - list(LENGTH selected_rows_srcs selected_rows_srcs_len) # kernel source file level # level 1: base device kernel # - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs # level 2: device-independent kernel # - common_srcs - # level 3: Kernel implemented by reusing device-independent kernel - # - selected_rows_srcs set(base_device_kernels) set(device_independent_kernel) - set(high_level_kernels) # 1. Base device kernel compile if (${cpu_srcs_len} GREATER 0) - cc_library(${TARGET}_cpu SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - list(APPEND base_device_kernels ${TARGET}_cpu) + cc_library(${TARGET}_cpu${target_suffix} SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + list(APPEND base_device_kernels ${TARGET}_cpu${target_suffix}) endif() if (${gpu_srcs_len} GREATER 0) if (WITH_GPU) - nv_library(${TARGET}_gpu SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + nv_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) elseif (WITH_ROCM) - hip_library(${TARGET}_gpu SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + hip_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) endif() - list(APPEND base_device_kernels ${TARGET}_gpu) + list(APPEND base_device_kernels ${TARGET}_gpu${target_suffix}) endif() if (${xpu_srcs_len} GREATER 0) - cc_library(${TARGET}_xpu SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - list(APPEND base_device_kernels ${TARGET}_xpu) + cc_library(${TARGET}_xpu${target_suffix} SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + list(APPEND base_device_kernels ${TARGET}_xpu${target_suffix}) endif() if (${gpudnn_srcs_len} GREATER 0) if (WITH_GPU) - nv_library(${TARGET}_gpudnn SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) elseif (WITH_ROCM) - hip_library(${TARGET}_gpudnn SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) endif() - list(APPEND base_device_kernels ${TARGET}_gpudnn) + list(APPEND base_device_kernels ${TARGET}_gpudnn${target_suffix}) endif() if (${kps_srcs_len} GREATER 0) # only when WITH_XPU_KP, the kps_srcs_len can be > 0 - xpu_library(${TARGET}_kps SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) - list(APPEND base_device_kernels ${TARGET}_kps) + xpu_library(${TARGET}_kps${target_suffix} SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) + list(APPEND base_device_kernels ${TARGET}_kps${target_suffix}) endif() # 2. Device-independent kernel compile if (${common_srcs_len} GREATER 0) if (WITH_GPU) - nv_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) + nv_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) elseif (WITH_ROCM) - hip_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) + hip_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) elseif (WITH_XPU_KP) - xpu_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) + xpu_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) else() - cc_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) + cc_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) endif() - list(APPEND device_independent_kernel ${TARGET}_common) + list(APPEND device_independent_kernel ${TARGET}_common${target_suffix}) endif() - # 3. Reusing kernel compile - if (${selected_rows_srcs_len} GREATER 0) - if (WITH_GPU) - nv_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) - elseif (WITH_ROCM) - hip_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) - elseif (WITH_XPU_KP) - xpu_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) - else() - cc_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) - endif() - list(APPEND high_level_kernels ${TARGET}_sr) - endif() - # 4. Unify target compile + # 3. Unify target compile list(LENGTH base_device_kernels base_device_kernels_len) list(LENGTH device_independent_kernel device_independent_kernel_len) - list(LENGTH high_level_kernels high_level_kernels_len) - if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0 OR - ${high_level_kernels_len} GREATER 0) + if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0) if (WITH_GPU) - nv_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels}) + nv_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) elseif (WITH_ROCM) - hip_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels}) + hip_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) elseif (WITH_XPU_KP) - xpu_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels}) + xpu_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) else() - cc_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels}) + cc_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) endif() else() set(target_build_flag 0) @@ -290,10 +293,10 @@ function(kernel_library TARGET) if (${target_build_flag} EQUAL 1) if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0 OR - ${gpudnn_srcs_len} GREATER 0 OR ${selected_rows_srcs_len} GREATER 0) + ${gpudnn_srcs_len} GREATER 0) # append target into PHI_KERNELS property get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) - set(phi_kernels ${phi_kernels} ${TARGET}) + set(phi_kernels ${phi_kernels} ${TARGET}${target_suffix}) set_property(GLOBAL PROPERTY PHI_KERNELS ${phi_kernels}) endif() @@ -318,9 +321,6 @@ function(kernel_library TARGET) if (${kps_srcs_len} GREATER 0) kernel_declare(${kps_srcs}) endif() - if (${selected_rows_srcs_len} GREATER 0) - kernel_declare(${selected_rows_srcs}) - endif() endif() endfunction() diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 941ede31400..0f77420809c 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -62,6 +62,8 @@ register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS $ # phi sparse kernels add_subdirectory(sparse) +# phi selected_rows kernels +add_subdirectory(selected_rows) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) diff --git a/paddle/phi/kernels/selected_rows/CMakeLists.txt b/paddle/phi/kernels/selected_rows/CMakeLists.txt new file mode 100644 index 00000000000..4e6c110c670 --- /dev/null +++ b/paddle/phi/kernels/selected_rows/CMakeLists.txt @@ -0,0 +1,3 @@ + +set(SELECTED_ROWS_KERNEL_DEPS dense_tensor selected_rows sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils math_function custom_kernel) +register_kernels(DEPS ${SELECTED_ROWS_KERNEL_DEPS} SUB_DIR "selected_rows_kernel") diff --git a/paddle/phi/kernels/selected_rows/isfinite_kernel_impl.h b/paddle/phi/kernels/selected_rows/impl/isfinite_kernel_impl.h similarity index 100% rename from paddle/phi/kernels/selected_rows/isfinite_kernel_impl.h rename to paddle/phi/kernels/selected_rows/impl/isfinite_kernel_impl.h diff --git a/paddle/phi/kernels/selected_rows/isfinite_kernel.cc b/paddle/phi/kernels/selected_rows/isfinite_kernel.cc index a507cdd0d86..630f6bcf835 100644 --- a/paddle/phi/kernels/selected_rows/isfinite_kernel.cc +++ b/paddle/phi/kernels/selected_rows/isfinite_kernel.cc @@ -19,7 +19,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #endif #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/selected_rows/isfinite_kernel_impl.h" +#include "paddle/phi/kernels/selected_rows/impl/isfinite_kernel_impl.h" namespace phi { -- GitLab