未验证 提交 23c3d967 编写于 作者: C Chen Weihang 提交者: GitHub

[Phi] Unify kernel build targets (#41091)

* unify_kernel_build_target

* fix dnn kernel failed

* fix dnn kernel loss target

* fix xpu compile failed
上级 35b96d48
...@@ -146,17 +146,12 @@ function(kernel_library TARGET) ...@@ -146,17 +146,12 @@ function(kernel_library TARGET)
endif() endif()
endif() endif()
if (WITH_XPU_KP) if (WITH_XPU_KP)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu)
# Change XPU2 file suffix # Change XPU2 file suffix
# NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu # NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps) file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps)
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps) list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc )
list(APPEND kps_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
endif()
endif()
else() else()
# TODO(chenweihang): impl compile by source later # TODO(chenweihang): impl compile by source later
endif() endif()
...@@ -219,71 +214,76 @@ function(kernel_library TARGET) ...@@ -219,71 +214,76 @@ function(kernel_library TARGET)
list(LENGTH kps_srcs kps_srcs_len) list(LENGTH kps_srcs kps_srcs_len)
# kernel source file level # kernel source file level
# level 1: base device kernel # level 1: base device kernel (if any device or dnn kernel exists, the cpu_kernel must be exists!!!)
# - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs # - cpu_srcs / gpu_srcs / xpu_srcs / kps_srcs
# = dnn srcs: gpudnn_srcs
# level 2: device-independent kernel # level 2: device-independent kernel
# - common_srcs # - common_srcs
set(base_device_kernels)
set(device_independent_kernel)
# 1. Base device kernel compile set(partial_build_flag 0)
if (${cpu_srcs_len} GREATER 0) set(base_build_flag 0)
cc_library(${TARGET}_cpu${target_suffix} SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) if (${common_srcs_len} GREATER 0)
list(APPEND base_device_kernels ${TARGET}_cpu${target_suffix}) set(partial_build_flag 1)
endif()
if (${gpu_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM)
hip_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
list(APPEND base_device_kernels ${TARGET}_gpu${target_suffix})
endif() endif()
if (${xpu_srcs_len} GREATER 0) if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0)
cc_library(${TARGET}_xpu${target_suffix} SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) set(base_build_flag 1)
list(APPEND base_device_kernels ${TARGET}_xpu${target_suffix})
endif() endif()
# gpudnn or mkldnn needs to be compiled separately
set(dnn_kernels)
if (${gpudnn_srcs_len} GREATER 0) if (${gpudnn_srcs_len} GREATER 0)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM) elseif (WITH_ROCM)
hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps}) hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif() endif()
list(APPEND base_device_kernels ${TARGET}_gpudnn${target_suffix}) list(APPEND dnn_kernels ${TARGET}_gpudnn${target_suffix})
endif()
if (${kps_srcs_len} GREATER 0)
# only when WITH_XPU_KP, the kps_srcs_len can be > 0
xpu_library(${TARGET}_kps${target_suffix} SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_kps${target_suffix})
endif() endif()
list(LENGTH dnn_kernels dnn_kernels_len)
# 2. Device-independent kernel compile if (${partial_build_flag} EQUAL 0 AND ${base_build_flag} EQUAL 1)
if (${common_srcs_len} GREATER 0)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) if (${dnn_kernels_len} GREATER 0)
nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
else()
nv_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
elseif (WITH_ROCM) elseif (WITH_ROCM)
hip_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) if (${dnn_kernels_len} GREATER 0)
hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
else()
hip_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
elseif (WITH_XPU_KP) elseif (WITH_XPU_KP)
xpu_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) xpu_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
else() else()
cc_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels}) cc_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif() endif()
list(APPEND device_independent_kernel ${TARGET}_common${target_suffix}) elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 1)
if (WITH_GPU)
nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
elseif (WITH_ROCM)
hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
elseif (WITH_XPU_KP)
xpu_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix})
else()
cc_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix})
endif() endif()
elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 0)
# 3. Unify target compile
list(LENGTH base_device_kernels base_device_kernels_len)
list(LENGTH device_independent_kernel device_independent_kernel_len)
if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM) elseif (WITH_ROCM)
hip_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_XPU_KP) elseif (WITH_XPU_KP)
xpu_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
else() else()
cc_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel}) cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif() endif()
else() else()
set(target_build_flag 0) set(target_build_flag 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册