未验证 提交 23c3d967 编写于 作者: C Chen Weihang 提交者: GitHub

[Phi] Unify kernel build targets (#41091)

* unify_kernel_build_target

* fix dnn kernel failed

* fix dnn kernel loss target

* fix xpu compile failed
上级 35b96d48
......@@ -146,17 +146,12 @@ function(kernel_library TARGET)
endif()
endif()
if (WITH_XPU_KP)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu)
# Change XPU2 file suffix
# NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps)
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc )
list(APPEND kps_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
endif()
endif()
else()
# TODO(chenweihang): impl compile by source later
endif()
......@@ -219,71 +214,76 @@ function(kernel_library TARGET)
list(LENGTH kps_srcs kps_srcs_len)
# kernel source file level
# level 1: base device kernel
# - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs
# level 1: base device kernel (if any device or dnn kernel exists, the cpu_kernel must be exists!!!)
# - cpu_srcs / gpu_srcs / xpu_srcs / kps_srcs
# = dnn srcs: gpudnn_srcs
# level 2: device-independent kernel
# - common_srcs
set(base_device_kernels)
set(device_independent_kernel)
# 1. Base device kernel compile
if (${cpu_srcs_len} GREATER 0)
cc_library(${TARGET}_cpu${target_suffix} SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_cpu${target_suffix})
endif()
if (${gpu_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM)
hip_library(${TARGET}_gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
list(APPEND base_device_kernels ${TARGET}_gpu${target_suffix})
set(partial_build_flag 0)
set(base_build_flag 0)
if (${common_srcs_len} GREATER 0)
set(partial_build_flag 1)
endif()
if (${xpu_srcs_len} GREATER 0)
cc_library(${TARGET}_xpu${target_suffix} SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_xpu${target_suffix})
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0)
set(base_build_flag 1)
endif()
# gpudnn or mkldnn needs to be compiled separately
set(dnn_kernels)
if (${gpudnn_srcs_len} GREATER 0)
if (WITH_GPU)
nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM)
hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
list(APPEND base_device_kernels ${TARGET}_gpudnn${target_suffix})
endif()
if (${kps_srcs_len} GREATER 0)
# only when WITH_XPU_KP, the kps_srcs_len can be > 0
xpu_library(${TARGET}_kps${target_suffix} SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
list(APPEND base_device_kernels ${TARGET}_kps${target_suffix})
list(APPEND dnn_kernels ${TARGET}_gpudnn${target_suffix})
endif()
list(LENGTH dnn_kernels dnn_kernels_len)
# 2. Device-independent kernel compile
if (${common_srcs_len} GREATER 0)
if (${partial_build_flag} EQUAL 0 AND ${base_build_flag} EQUAL 1)
if (WITH_GPU)
nv_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
if (${dnn_kernels_len} GREATER 0)
nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
else()
nv_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
elseif (WITH_ROCM)
hip_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
if (${dnn_kernels_len} GREATER 0)
hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
else()
hip_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
elseif (WITH_XPU_KP)
xpu_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
xpu_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
else()
cc_library(${TARGET}_common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
cc_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
list(APPEND device_independent_kernel ${TARGET}_common${target_suffix})
elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 1)
if (WITH_GPU)
nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
elseif (WITH_ROCM)
hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
elseif (WITH_XPU_KP)
xpu_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix})
else()
cc_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix})
endif()
# 3. Unify target compile
list(LENGTH base_device_kernels base_device_kernels_len)
list(LENGTH device_independent_kernel device_independent_kernel_len)
if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0)
elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 0)
if (WITH_GPU)
nv_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_ROCM)
hip_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
elseif (WITH_XPU_KP)
xpu_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
else()
cc_library(${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
endif()
else()
set(target_build_flag 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册