未验证 提交 e6cbd72d 编写于 作者: A Aurelius84 提交者: GitHub

[phi] Split selected_rows CMake compilation (#40864)

* [phi] Split selected_rows CMake compilation

* move file back

* move file back
上级 c1c9368f
...@@ -100,7 +100,6 @@ function(kernel_library TARGET) ...@@ -100,7 +100,6 @@ function(kernel_library TARGET)
set(xpu_srcs) set(xpu_srcs)
set(gpudnn_srcs) set(gpudnn_srcs)
set(kps_srcs) set(kps_srcs)
set(selected_rows_srcs)
# parse and save the deps kerenl targets # parse and save the deps kerenl targets
set(all_srcs) set(all_srcs)
set(kernel_deps) set(kernel_deps)
...@@ -112,6 +111,12 @@ function(kernel_library TARGET) ...@@ -112,6 +111,12 @@ function(kernel_library TARGET)
cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}" cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN}) "${multiValueArgs}" ${ARGN})
# used for cc_library selected_rows dir target
set(target_suffix "")
if ("${kernel_library_SUB_DIR}" STREQUAL "selected_rows_kernel")
set(target_suffix "_sr")
endif()
list(LENGTH kernel_library_SRCS kernel_library_SRCS_len) list(LENGTH kernel_library_SRCS kernel_library_SRCS_len)
# one kernel only match one impl file in each backend # one kernel only match one impl file in each backend
if (${kernel_library_SRCS_len} EQUAL 0) if (${kernel_library_SRCS_len} EQUAL 0)
...@@ -121,9 +126,6 @@ function(kernel_library TARGET) ...@@ -121,9 +126,6 @@ function(kernel_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc AND NOT WITH_XPU_KP) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc AND NOT WITH_XPU_KP)
list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc) list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/selected_rows/${TARGET}.cc)
list(APPEND selected_rows_srcs ${CMAKE_CURRENT_SOURCE_DIR}/selected_rows/${TARGET}.cc)
endif()
if (WITH_GPU OR WITH_ROCM) if (WITH_GPU OR WITH_ROCM)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu)
list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu) list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu)
...@@ -169,26 +171,46 @@ function(kernel_library TARGET) ...@@ -169,26 +171,46 @@ function(kernel_library TARGET)
list(APPEND all_srcs ${xpu_srcs}) list(APPEND all_srcs ${xpu_srcs})
list(APPEND all_srcs ${gpudnn_srcs}) list(APPEND all_srcs ${gpudnn_srcs})
list(APPEND all_srcs ${kps_srcs}) list(APPEND all_srcs ${kps_srcs})
set(all_include_kernels)
set(all_kernel_name)
foreach(src ${all_srcs}) foreach(src ${all_srcs})
file(READ ${src} target_content) file(READ ${src} target_content)
# "kernels/xxx"(DenseTensor Kernel) can only include each other, but can't include "SUB_DIR/xxx" (such as selected_rows Kernel)
string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
if ("${kernel_library_SUB_DIR}" STREQUAL "") list(APPEND all_include_kernels ${include_kernels})
string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
else() # "SUB_DIR/xxx" can include "kernels/xx" and "SUB_DIR/xxx"
if (NOT "${kernel_library_SUB_DIR}" STREQUAL "")
string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
list(APPEND all_include_kernels ${include_kernels})
endif() endif()
foreach(include_kernel ${include_kernels})
foreach(include_kernel ${all_include_kernels})
if ("${kernel_library_SUB_DIR}" STREQUAL "") if ("${kernel_library_SUB_DIR}" STREQUAL "")
string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel}) string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel})
string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name})
list(APPEND all_kernel_name ${kernel_name})
else() else()
# NOTE(dev): we should firstly match kernel_library_SUB_DIR.
if (${include_kernel} MATCHES "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/")
string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel}) string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel})
endif() # for selected_rows directory, add ${target_suffix}.
string(REGEX REPLACE ".h\"" "${target_suffix}" kernel_name ${kernel_name})
list(APPEND all_kernel_name ${kernel_name})
else()
string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel})
string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name}) string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name})
list(APPEND kernel_deps ${kernel_name}) list(APPEND all_kernel_name ${kernel_name})
endif()
message(STATUS "${TARGET} DEPS ${all_kernel_name}")
endif()
list(APPEND kernel_deps ${all_kernel_name})
endforeach() endforeach()
endforeach() endforeach()
list(REMOVE_DUPLICATES kernel_deps) list(REMOVE_DUPLICATES kernel_deps)
list(REMOVE_ITEM kernel_deps ${TARGET}) list(REMOVE_ITEM kernel_deps ${TARGET}${target_suffix})
list(LENGTH common_srcs common_srcs_len) list(LENGTH common_srcs common_srcs_len)
list(LENGTH cpu_srcs cpu_srcs_len) list(LENGTH cpu_srcs cpu_srcs_len)
...@@ -196,92 +218,73 @@ function(kernel_library TARGET) ...@@ -196,92 +218,73 @@ function(kernel_library TARGET)
list(LENGTH xpu_srcs xpu_srcs_len) list(LENGTH xpu_srcs xpu_srcs_len)
list(LENGTH gpudnn_srcs gpudnn_srcs_len) list(LENGTH gpudnn_srcs gpudnn_srcs_len)
list(LENGTH kps_srcs kps_srcs_len) list(LENGTH kps_srcs kps_srcs_len)
list(LENGTH selected_rows_srcs selected_rows_srcs_len)
# kernel source file level # kernel source file level
# level 1: base device kernel # level 1: base device kernel
# - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs # - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs
# level 2: device-independent kernel # level 2: device-independent kernel
# - common_srcs # - common_srcs
# level 3: Kernel implemented by reusing device-independent kernel
# - selected_rows_srcs
set(base_device_kernels) set(base_device_kernels)
set(device_independent_kernel) set(device_independent_kernel)
set(high_level_kernels)
# 1. Base device kernel compile # 1. Base device kernel compile
if (${cpu_srcs_len} GREATER 0) if (${cpu_srcs_len} GREATER 0)
cc_library(${TARGET}_cpu SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) cc_library(${TARGET}_cpu${target_suffix} SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_cpu) list(APPEND base_device_kernels ${TARGET}_cpu${target_suffix})
endif() endif()
if (${gpu_srcs_len} GREATER 0) if (${gpu_srcs_len} GREATER 0)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET}_gpu SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) nv_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM) elseif (WITH_ROCM)
hip_library(${TARGET}_gpu SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) hip_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif() endif()
list(APPEND base_device_kernels ${TARGET}_gpu) list(APPEND base_device_kernels ${TARGET}_gpu${target_suffix})
endif() endif()
if (${xpu_srcs_len} GREATER 0) if (${xpu_srcs_len} GREATER 0)
cc_library(${TARGET}_xpu SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) cc_library(${TARGET}_xpu${target_suffix} SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_xpu) list(APPEND base_device_kernels ${TARGET}_xpu${target_suffix})
endif() endif()
if (${gpudnn_srcs_len} GREATER 0) if (${gpudnn_srcs_len} GREATER 0)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET}_gpudnn SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM) elseif (WITH_ROCM)
hip_library(${TARGET}_gpudnn SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif() endif()
list(APPEND base_device_kernels ${TARGET}_gpudnn) list(APPEND base_device_kernels ${TARGET}_gpudnn${target_suffix})
endif() endif()
if (${kps_srcs_len} GREATER 0) if (${kps_srcs_len} GREATER 0)
# only when WITH_XPU_KP, the kps_srcs_len can be > 0 # only when WITH_XPU_KP, the kps_srcs_len can be > 0
xpu_library(${TARGET}_kps SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) xpu_library(${TARGET}_kps${target_suffix} SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_kps) list(APPEND base_device_kernels ${TARGET}_kps${target_suffix})
endif() endif()
# 2. Device-independent kernel compile # 2. Device-independent kernel compile
if (${common_srcs_len} GREATER 0) if (${common_srcs_len} GREATER 0)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) nv_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
elseif (WITH_ROCM) elseif (WITH_ROCM)
hip_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) hip_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
elseif (WITH_XPU_KP) elseif (WITH_XPU_KP)
xpu_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) xpu_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
else() else()
cc_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) cc_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
endif() endif()
list(APPEND device_independent_kernel ${TARGET}_common) list(APPEND device_independent_kernel ${TARGET}_common${target_suffix})
endif() endif()
# 3. Reusing kernel compile
if (${selected_rows_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
elseif (WITH_ROCM)
hip_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
elseif (WITH_XPU_KP)
xpu_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
else()
cc_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
endif()
list(APPEND high_level_kernels ${TARGET}_sr)
endif()
# 4. Unify target compile # 3. Unify target compile
list(LENGTH base_device_kernels base_device_kernels_len) list(LENGTH base_device_kernels base_device_kernels_len)
list(LENGTH device_independent_kernel device_independent_kernel_len) list(LENGTH device_independent_kernel device_independent_kernel_len)
list(LENGTH high_level_kernels high_level_kernels_len) if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0)
if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0 OR
${high_level_kernels_len} GREATER 0)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels}) nv_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
elseif (WITH_ROCM) elseif (WITH_ROCM)
hip_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels}) hip_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
elseif (WITH_XPU_KP) elseif (WITH_XPU_KP)
xpu_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels}) xpu_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
else() else()
cc_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels}) cc_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
endif() endif()
else() else()
set(target_build_flag 0) set(target_build_flag 0)
...@@ -290,10 +293,10 @@ function(kernel_library TARGET) ...@@ -290,10 +293,10 @@ function(kernel_library TARGET)
if (${target_build_flag} EQUAL 1) if (${target_build_flag} EQUAL 1)
if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR
${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0 OR
${gpudnn_srcs_len} GREATER 0 OR ${selected_rows_srcs_len} GREATER 0) ${gpudnn_srcs_len} GREATER 0)
# append target into PHI_KERNELS property # append target into PHI_KERNELS property
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(phi_kernels ${phi_kernels} ${TARGET}) set(phi_kernels ${phi_kernels} ${TARGET}${target_suffix})
set_property(GLOBAL PROPERTY PHI_KERNELS ${phi_kernels}) set_property(GLOBAL PROPERTY PHI_KERNELS ${phi_kernels})
endif() endif()
...@@ -318,9 +321,6 @@ function(kernel_library TARGET) ...@@ -318,9 +321,6 @@ function(kernel_library TARGET)
if (${kps_srcs_len} GREATER 0) if (${kps_srcs_len} GREATER 0)
kernel_declare(${kps_srcs}) kernel_declare(${kps_srcs})
endif() endif()
if (${selected_rows_srcs_len} GREATER 0)
kernel_declare(${selected_rows_srcs})
endif()
endif() endif()
endfunction() endfunction()
......
...@@ -62,6 +62,8 @@ register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS $ ...@@ -62,6 +62,8 @@ register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS $
# phi sparse kernels # phi sparse kernels
add_subdirectory(sparse) add_subdirectory(sparse)
# phi selected_rows kernels
add_subdirectory(selected_rows)
copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
......
set(SELECTED_ROWS_KERNEL_DEPS dense_tensor selected_rows sparse_csr_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils math_function custom_kernel)
register_kernels(DEPS ${SELECTED_ROWS_KERNEL_DEPS} SUB_DIR "selected_rows_kernel")
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#endif #endif
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/selected_rows/isfinite_kernel_impl.h" #include "paddle/phi/kernels/selected_rows/impl/isfinite_kernel_impl.h"
namespace phi { namespace phi {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册