未验证 提交 5781999d 编写于 作者: L Leo Chen 提交者: GitHub

make all phi kernels to 2(host/device) static libraries directly (#43247)

* make all phi kernels to 2(host/device) static libraries directly

* fix calling kernel_declare

* fix compile

* fix cpu compile

* fix rocm compile

* fix xpu compile

* fix xpu kp compile

* fix inference compile
上级 19a7524f
......@@ -61,7 +61,9 @@ endfunction()
# call kernel_declare need to make sure whether the target of input exists
function(kernel_declare TARGET_LIST)
# message("TARGET LIST ${TARGET_LIST}")
foreach(kernel_path ${TARGET_LIST})
# message("kernel path ${kernel_path}" )
file(READ ${kernel_path} kernel_impl)
string(
REGEX
......@@ -111,6 +113,7 @@ function(kernel_declare TARGET_LIST)
endfunction()
function(kernel_library TARGET)
return()
set(common_srcs)
set(cpu_srcs)
set(gpu_srcs)
......
......@@ -37,6 +37,7 @@ endif()
# fluid_modules exclude API-interface of inference/api and inference/capi_exp
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(utils_modules stringpiece pretty_log string_helper benchmark)
add_subdirectory(api)
......@@ -50,7 +51,7 @@ set(STATIC_INFERENCE_API
reset_tensor_array
analysis_config
paddle_pass_builder
activation_functions
phi
${mkldnn_quantizer_cfg})
#windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy
......@@ -59,7 +60,7 @@ if(WIN32 AND WITH_GPU)
${utils_modules})
else()
create_static_lib(paddle_inference ${fluid_modules} ${phi_modules}
${STATIC_INFERENCE_API} ${utils_modules})
${phi_kernels} ${STATIC_INFERENCE_API} ${utils_modules})
endif()
if(NOT APPLE)
......
......@@ -108,7 +108,7 @@ register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combin
op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS})
target_link_libraries(run_program_op cuda_graph_with_memory_pool)
op_library(quantize_linear_op DEPS cast_kernel)
op_library(quantize_linear_op DEPS phi)
op_library(save_combine_op DEPS string_array)
op_library(load_combine_op DEPS string_array)
......
......@@ -38,6 +38,7 @@ set(PHI_DEPS
string_tensor
api_scalar
api_int_array)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(PHI_DEPS ${PHI_DEPS} ${phi_kernels})
......
......@@ -335,11 +335,12 @@ cc_library(
cc_library(
api_gen_utils
SRCS api_gen_utils.cc
DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor)
DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor
infermeta_utils)
cc_library(
phi_data_transform
SRCS data_transform.cc
DEPS phi_tensor_raw transfer_layout_kernel cast_kernel copy_kernel tensor)
DEPS phi_tensor_raw phi tensor)
cc_library(
api_custom_impl
SRCS api_custom_impl.cc
......@@ -404,7 +405,7 @@ cc_library(
cc_library(
tensor_copy
SRCS tensor_copy.cc
DEPS phi_tensor_raw copy_kernel kernel_dispatch api_gen_utils)
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils)
cc_library(
api_scalar
SRCS scalar.cc
......
......@@ -41,124 +41,104 @@ set(COMMON_KERNEL_DEPS
selected_rows_functor)
# remove this dep after removing fluid deps on tensor creation
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta infermeta_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} switch_autotune)
# [ 2. Kernels that most kernels depend on ]
# There are a few kernels that are very basic operations, and most of the
# kernels depend on these kernels.
set(COMMON_BAISC_KERNELS empty_kernel full_kernel)
kernel_library(empty_kernel DEPS ${COMMON_KERNEL_DEPS})
kernel_library(full_kernel DEPS ${COMMON_KERNEL_DEPS} empty_kernel)
# [ 3. Kernels with special dependencies ]
# Some kernels depend on some targets that are not commonly used.
# These targets are not suitable for common dependencies.
# In this case, you need to manually generate them here.
set(AUTOTUNE_KERNELS conv_kernel conv_grad_kernel conv_grad_grad_kernel
conv_transpose_kernel conv_transpose_grad_kernel)
set(MANUAL_BUILD_KERNELS
${AUTOTUNE_KERNELS}
cross_entropy_kernel
adam_kernel
adamw_kernel
deformable_conv_kernel
deformable_conv_grad_kernel
eigh_kernel
gumbel_softmax_kernel
gumbel_softmax_grad_kernel
hierarchical_sigmoid_kernel
hierarchical_sigmoid_grad_kernel
matrix_power_kernel
matrix_power_grad_kernel
maxout_kernel
maxout_grad_kernel
pool_kernel
put_along_axis_kernel
put_along_axis_grad_kernel
segment_pool_kernel
segment_pool_grad_kernel
softmax_kernel
softmax_grad_kernel
take_along_axis_kernel
take_along_axis_grad_kernel
triangular_solve_grad_kernel
determinant_grad_kernel
reduce_sum_kernel
reduce_mean_kernel
rnn_kernel
rnn_grad_kernel
warpctc_kernel
warpctc_grad_kernel)
foreach(src ${AUTOTUNE_KERNELS})
kernel_library(${src} DEPS ${COMMON_KERNEL_DEPS} switch_autotune)
endforeach()
kernel_library(
adam_kernel
DEPS
gflags
glog
flags
${COMMON_KERNEL_DEPS}
selected_rows_functor
threadpool
jit_kernel_helper)
kernel_library(adamw_kernel DEPS ${COMMON_KERNEL_DEPS} adam_kernel)
kernel_library(cross_entropy_kernel DEPS ${COMMON_KERNEL_DEPS} softmax
cross_entropy)
kernel_library(deformable_conv_kernel DEPS ${COMMON_KERNEL_DEPS}
deformable_conv_functor)
kernel_library(deformable_conv_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
deformable_conv_functor)
kernel_library(determinant_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_inverse)
kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function)
kernel_library(hierarchical_sigmoid_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_bit_code)
kernel_library(hierarchical_sigmoid_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_bit_code)
kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(reduce_sum_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(reduce_mean_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_inverse)
kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
kernel_library(maxout_grad_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
kernel_library(pool_kernel DEPS ${COMMON_KERNEL_DEPS} pooling)
kernel_library(put_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(put_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(segment_pool_kernel DEPS ${COMMON_KERNEL_DEPS} segment_pooling)
kernel_library(segment_pool_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
segment_pooling)
kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(take_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(take_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_reduce)
kernel_library(rnn_kernel DEPS ${COMMON_KERNEL_DEPS} concat_and_split_functor
lstm_compute gru_compute)
kernel_library(rnn_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
concat_and_split_functor lstm_compute gru_compute)
kernel_library(warpctc_kernel DEPS ${COMMON_KERNEL_DEPS} phi_dynload_warpctc
sequence_padding sequence_scale)
kernel_library(warpctc_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
phi_dynload_warpctc sequence_padding sequence_scale)
# 4. auto parse and build kernel targets by cmake
register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS
${COMMON_KERNEL_DEPS} ${COMMON_BAISC_KERNELS})
set(COMMON_KERNEL_DEPS
${COMMON_KERNEL_DEPS}
threadpool
jit_kernel_helper
softmax
cross_entropy
matrix_bit_code
lapack_function
lstm_compute
gru_compute
deformable_conv_functor
matrix_reduce
segment_pooling
gather_scatter_kernel
pooling
maxouting
matrix_inverse
phi_dynload_warpctc
sequence_padding
sequence_scale)
# phi sparse kernels
add_subdirectory(sparse)
# phi selected_rows kernels
add_subdirectory(selected_rows)
set(COMMON_KERNEL_DEPS
${COMMON_KERNEL_DEPS}
dense_tensor
string_tensor
sparse_coo_tensor
sparse_csr_tensor
kernel_context
kernel_factory
arg_map_context
convert_utils
lod_utils
custom_kernel
string_infermeta
utf8proc)
copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
# For strings kernels
add_subdirectory(strings)
file(GLOB kernel_h "*.h" "selected_rows/*.h" "sparse/*.h" "strings/*.h")
file(GLOB kernel_impl_h "impl/*.h" "selected_rows/impl/*.h")
file(GLOB kernel_primitive_h "primitive/*.h")
file(
GLOB
kernel_cc
"*.cc"
"cpu/*.cc"
"selected_rows/*.cc"
"selected_rows/cpu/*.cc"
"sparse/*.cc"
"sparse/cpu/*.cc"
"strings/*.cc"
"strings/cpu/*.cc")
file(
GLOB
kernel_cu
"gpu/*.cu"
"gpu/*.cu.cc"
"gpudnn/*.cu"
"kps/*.cu"
"selected_rows/gpu/*.cu"
"sparse/gpu/*.cu"
"strings/*.cu"
"strings/gpu/*.cu")
# file(GLOB kernel_cudnn "gpudnn/*.cu")
# file(GLOB kernel_kps "kps/*.cu")
file(GLOB kernel_xpu "xpu/*.cc")
add_library(phi_cpu ${kernel_cc})
kernel_declare("${kernel_cc}")
target_link_libraries(phi_cpu ${COMMON_KERNEL_DEPS})
set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu)
if(WITH_GPU OR WITH_ROCM)
if(WITH_GPU)
add_library(phi_gpu ${kernel_cu})
elseif(WITH_ROCM)
hip_add_library(phi_gpu STATIC ${kernel_cu})
endif()
kernel_declare("${kernel_cu}")
target_link_libraries(phi_gpu ${COMMON_KERNEL_DEPS})
set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu phi_gpu)
endif()
if(WITH_XPU)
if(WITH_XPU_KP)
file(GLOB kernel_xpu_kps "kps/*.cu")
xpu_add_library(phi_xpu STATIC ${kernel_xpu} ${kernel_xpu_kps})
else()
add_library(phi_xpu ${kernel_xpu})
endif()
kernel_declare(${kernel_xpu})
target_link_libraries(phi_xpu ${COMMON_KERNEL_DEPS})
set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu phi_xpu)
endif()
......@@ -11,7 +11,7 @@ cc_test(test_type_info SRCS test_type_info.cc)
cc_test(
test_kernel_factory
SRCS test_kernel_factory.cc
DEPS kernel_factory scale_kernel)
DEPS kernel_factory phi)
cc_test(
test_sparse_coo_tensor
SRCS test_sparse_coo_tensor.cc
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册