未验证 提交 23c3d967 编写于 作者: C Chen Weihang 提交者: GitHub

[Phi] Unify kernel build targets (#41091)

* unify_kernel_build_target

* fix dnn kernel failed

* fix dnn kernel loss target

* fix xpu compile failed
上级 35b96d48
...@@ -110,7 +110,7 @@ function(kernel_library TARGET) ...@@ -110,7 +110,7 @@ function(kernel_library TARGET)
cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}" cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN}) "${multiValueArgs}" ${ARGN})
# used for cc_library selected_rows dir target # used for cc_library selected_rows dir target
set(target_suffix "") set(target_suffix "")
if ("${kernel_library_SUB_DIR}" STREQUAL "selected_rows") if ("${kernel_library_SUB_DIR}" STREQUAL "selected_rows")
...@@ -146,16 +146,11 @@ function(kernel_library TARGET) ...@@ -146,16 +146,11 @@ function(kernel_library TARGET)
endif() endif()
endif() endif()
if (WITH_XPU_KP) if (WITH_XPU_KP)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu) # Change XPU2 file suffix
# Change XPU2 file suffix # NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu
# NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps) file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc )
list(APPEND kps_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
endif()
endif() endif()
else() else()
# TODO(chenweihang): impl compile by source later # TODO(chenweihang): impl compile by source later
...@@ -186,7 +181,7 @@ function(kernel_library TARGET) ...@@ -186,7 +181,7 @@ function(kernel_library TARGET)
string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
list(APPEND all_include_kernels ${include_kernels}) list(APPEND all_include_kernels ${include_kernels})
endif() endif()
foreach(include_kernel ${all_include_kernels}) foreach(include_kernel ${all_include_kernels})
if ("${kernel_library_SUB_DIR}" STREQUAL "") if ("${kernel_library_SUB_DIR}" STREQUAL "")
string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel}) string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel})
...@@ -219,71 +214,76 @@ function(kernel_library TARGET) ...@@ -219,71 +214,76 @@ function(kernel_library TARGET)
list(LENGTH kps_srcs kps_srcs_len) list(LENGTH kps_srcs kps_srcs_len)
# kernel source file level # kernel source file level
# level 1: base device kernel # level 1: base device kernel (if any device or dnn kernel exists, the cpu_kernel must be exists!!!)
# - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs # - cpu_srcs / gpu_srcs / xpu_srcs / kps_srcs
# = dnn srcs: gpudnn_srcs
# level 2: device-independent kernel # level 2: device-independent kernel
# - common_srcs # - common_srcs
set(base_device_kernels)
set(device_independent_kernel)
# 1. Base device kernel compile set(partial_build_flag 0)
if (${cpu_srcs_len} GREATER 0) set(base_build_flag 0)
cc_library(${TARGET}_cpu${target_suffix} SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) if (${common_srcs_len} GREATER 0)
list(APPEND base_device_kernels ${TARGET}_cpu${target_suffix}) set(partial_build_flag 1)
endif()
if (${gpu_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM)
hip_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
list(APPEND base_device_kernels ${TARGET}_gpu${target_suffix})
endif() endif()
if (${xpu_srcs_len} GREATER 0) if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0)
cc_library(${TARGET}_xpu${target_suffix} SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) set(base_build_flag 1)
list(APPEND base_device_kernels ${TARGET}_xpu${target_suffix})
endif() endif()
# gpudnn or mkldnn needs to be compiled separately
set(dnn_kernels)
if (${gpudnn_srcs_len} GREATER 0) if (${gpudnn_srcs_len} GREATER 0)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM) elseif (WITH_ROCM)
hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif() endif()
list(APPEND base_device_kernels ${TARGET}_gpudnn${target_suffix}) list(APPEND dnn_kernels ${TARGET}_gpudnn${target_suffix})
endif()
if (${kps_srcs_len} GREATER 0)
# only when WITH_XPU_KP, the kps_srcs_len can be > 0
xpu_library(${TARGET}_kps${target_suffix} SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_kps${target_suffix})
endif() endif()
list(LENGTH dnn_kernels dnn_kernels_len)
# 2. Device-independent kernel compile if (${partial_build_flag} EQUAL 0 AND ${base_build_flag} EQUAL 1)
if (${common_srcs_len} GREATER 0)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) if (${dnn_kernels_len} GREATER 0)
nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
else()
nv_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
elseif (WITH_ROCM) elseif (WITH_ROCM)
hip_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) if (${dnn_kernels_len} GREATER 0)
hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
else()
hip_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
elseif (WITH_XPU_KP) elseif (WITH_XPU_KP)
xpu_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) xpu_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
else() else()
cc_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) cc_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif() endif()
list(APPEND device_independent_kernel ${TARGET}_common${target_suffix}) elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 1)
endif() if (WITH_GPU)
nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
# 3. Unify target compile elseif (WITH_ROCM)
list(LENGTH base_device_kernels base_device_kernels_len) hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(LENGTH device_independent_kernel device_independent_kernel_len) hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0) elseif (WITH_XPU_KP)
xpu_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix})
else()
cc_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix})
endif()
elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 0)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM) elseif (WITH_ROCM)
hip_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_XPU_KP) elseif (WITH_XPU_KP)
xpu_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
else() else()
cc_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif() endif()
else() else()
set(target_build_flag 0) set(target_build_flag 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册