未验证 提交 e6cbd72d 编写于 作者: A Aurelius84 提交者: GitHub

[phi] Split selected_rows CMake compilation (#40864)

* [phi] Split selected_rows CMake compilation

* move file back

* move file back
上级 c1c9368f
......@@ -100,7 +100,6 @@ function(kernel_library TARGET)
set(xpu_srcs)
set(gpudnn_srcs)
set(kps_srcs)
set(selected_rows_srcs)
# parse and save the deps kerenl targets
set(all_srcs)
set(kernel_deps)
......@@ -112,6 +111,12 @@ function(kernel_library TARGET)
cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
# used for cc_library selected_rows dir target
set(target_suffix "")
if ("${kernel_library_SUB_DIR}" STREQUAL "selected_rows_kernel")
set(target_suffix "_sr")
endif()
list(LENGTH kernel_library_SRCS kernel_library_SRCS_len)
# one kernel only match one impl file in each backend
if (${kernel_library_SRCS_len} EQUAL 0)
......@@ -121,9 +126,6 @@ function(kernel_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc AND NOT WITH_XPU_KP)
list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/selected_rows/${TARGET}.cc)
list(APPEND selected_rows_srcs ${CMAKE_CURRENT_SOURCE_DIR}/selected_rows/${TARGET}.cc)
endif()
if (WITH_GPU OR WITH_ROCM)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu)
list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu)
......@@ -169,26 +171,46 @@ function(kernel_library TARGET)
list(APPEND all_srcs ${xpu_srcs})
list(APPEND all_srcs ${gpudnn_srcs})
list(APPEND all_srcs ${kps_srcs})
set(all_include_kernels)
set(all_kernel_name)
foreach(src ${all_srcs})
file(READ ${src} target_content)
# "kernels/xxx"(DenseTensor Kernel) can only include each other, but can't include "SUB_DIR/xxx" (such as selected_rows Kernel)
string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
if ("${kernel_library_SUB_DIR}" STREQUAL "")
string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
else()
list(APPEND all_include_kernels ${include_kernels})
# "SUB_DIR/xxx" can include "kernels/xx" and "SUB_DIR/xxx"
if (NOT "${kernel_library_SUB_DIR}" STREQUAL "")
string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
list(APPEND all_include_kernels ${include_kernels})
endif()
foreach(include_kernel ${include_kernels})
foreach(include_kernel ${all_include_kernels})
if ("${kernel_library_SUB_DIR}" STREQUAL "")
string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel})
string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name})
list(APPEND all_kernel_name ${kernel_name})
else()
# NOTE(dev): we should firstly match kernel_library_SUB_DIR.
if (${include_kernel} MATCHES "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/")
string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel})
endif()
# for selected_rows directory, add ${target_suffix}.
string(REGEX REPLACE ".h\"" "${target_suffix}" kernel_name ${kernel_name})
list(APPEND all_kernel_name ${kernel_name})
else()
string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel})
string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name})
list(APPEND kernel_deps ${kernel_name})
list(APPEND all_kernel_name ${kernel_name})
endif()
message(STATUS "${TARGET} DEPS ${all_kernel_name}")
endif()
list(APPEND kernel_deps ${all_kernel_name})
endforeach()
endforeach()
list(REMOVE_DUPLICATES kernel_deps)
list(REMOVE_ITEM kernel_deps ${TARGET})
list(REMOVE_ITEM kernel_deps ${TARGET}${target_suffix})
list(LENGTH common_srcs common_srcs_len)
list(LENGTH cpu_srcs cpu_srcs_len)
......@@ -196,92 +218,73 @@ function(kernel_library TARGET)
list(LENGTH xpu_srcs xpu_srcs_len)
list(LENGTH gpudnn_srcs gpudnn_srcs_len)
list(LENGTH kps_srcs kps_srcs_len)
list(LENGTH selected_rows_srcs selected_rows_srcs_len)
# kernel source file level
# level 1: base device kernel
# - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs
# level 2: device-independent kernel
# - common_srcs
# level 3: Kernel implemented by reusing device-independent kernel
# - selected_rows_srcs
set(base_device_kernels)
set(device_independent_kernel)
set(high_level_kernels)
# 1. Base device kernel compile
if (${cpu_srcs_len} GREATER 0)
cc_library(${TARGET}_cpu SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_cpu)
cc_library(${TARGET}_cpu${target_suffix} SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_cpu${target_suffix})
endif()
if (${gpu_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET}_gpu SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM)
hip_library(${TARGET}_gpu SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
list(APPEND base_device_kernels ${TARGET}_gpu)
list(APPEND base_device_kernels ${TARGET}_gpu${target_suffix})
endif()
if (${xpu_srcs_len} GREATER 0)
cc_library(${TARGET}_xpu SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_xpu)
cc_library(${TARGET}_xpu${target_suffix} SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_xpu${target_suffix})
endif()
if (${gpudnn_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET}_gpudnn SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM)
hip_library(${TARGET}_gpudnn SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
list(APPEND base_device_kernels ${TARGET}_gpudnn)
list(APPEND base_device_kernels ${TARGET}_gpudnn${target_suffix})
endif()
if (${kps_srcs_len} GREATER 0)
# only when WITH_XPU_KP, the kps_srcs_len can be > 0
xpu_library(${TARGET}_kps SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_kps)
xpu_library(${TARGET}_kps${target_suffix} SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_kps${target_suffix})
endif()
# 2. Device-independent kernel compile
if (${common_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
nv_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
elseif (WITH_ROCM)
hip_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
hip_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
elseif (WITH_XPU_KP)
xpu_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
xpu_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
else()
cc_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
cc_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
endif()
list(APPEND device_independent_kernel ${TARGET}_common)
list(APPEND device_independent_kernel ${TARGET}_common${target_suffix})
endif()
# 3. Reusing kernel compile
if (${selected_rows_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
elseif (WITH_ROCM)
hip_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
elseif (WITH_XPU_KP)
xpu_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
else()
cc_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
endif()
list(APPEND high_level_kernels ${TARGET}_sr)
endif()
# 4. Unify target compile
# 3. Unify target compile
list(LENGTH base_device_kernels base_device_kernels_len)
list(LENGTH device_independent_kernel device_independent_kernel_len)
list(LENGTH high_level_kernels high_level_kernels_len)
if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0 OR
${high_level_kernels_len} GREATER 0)
if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
nv_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
elseif (WITH_ROCM)
hip_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
hip_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
elseif (WITH_XPU_KP)
xpu_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
xpu_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
else()
cc_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
cc_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
endif()
else()
set(target_build_flag 0)
......@@ -290,10 +293,10 @@ function(kernel_library TARGET)
if (${target_build_flag} EQUAL 1)
if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR
${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0 OR
${gpudnn_srcs_len} GREATER 0 OR ${selected_rows_srcs_len} GREATER 0)
${gpudnn_srcs_len} GREATER 0)
# append target into PHI_KERNELS property
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(phi_kernels ${phi_kernels} ${TARGET})
set(phi_kernels ${phi_kernels} ${TARGET}${target_suffix})
set_property(GLOBAL PROPERTY PHI_KERNELS ${phi_kernels})
endif()
......@@ -318,9 +321,6 @@ function(kernel_library TARGET)
if (${kps_srcs_len} GREATER 0)
kernel_declare(${kps_srcs})
endif()
if (${selected_rows_srcs_len} GREATER 0)
kernel_declare(${selected_rows_srcs})
endif()
endif()
endfunction()
......
......@@ -62,6 +62,8 @@ register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS $
# phi sparse kernels
add_subdirectory(sparse)
# phi selected_rows kernels
add_subdirectory(selected_rows)
copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
......
set(SELECTED_ROWS_KERNEL_DEPS dense_tensor selected_rows sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils math_function custom_kernel)
register_kernels(DEPS ${SELECTED_ROWS_KERNEL_DEPS} SUB_DIR "selected_rows_kernel")
......@@ -19,7 +19,7 @@
#include "paddle/phi/backends/gpu/gpu_context.h"
#endif
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/selected_rows/isfinite_kernel_impl.h"
#include "paddle/phi/kernels/selected_rows/impl/isfinite_kernel_impl.h"
namespace phi {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册