未验证 提交 5781999d 编写于 作者: L Leo Chen 提交者: GitHub

make all phi kernels to 2(host/device) static libraries directly (#43247)

* make all phi kernels to 2(host/device) static libraries directly

* fix calling kernel_declare

* fix compile

* fix cpu compile

* fix rocm compile

* fix xpu compile

* fix xpu kp compile

* fix inference compile
上级 19a7524f
...@@ -61,7 +61,9 @@ endfunction() ...@@ -61,7 +61,9 @@ endfunction()
# call kernel_declare need to make sure whether the target of input exists # call kernel_declare need to make sure whether the target of input exists
function(kernel_declare TARGET_LIST) function(kernel_declare TARGET_LIST)
# message("TARGET LIST ${TARGET_LIST}")
foreach(kernel_path ${TARGET_LIST}) foreach(kernel_path ${TARGET_LIST})
# message("kernel path ${kernel_path}" )
file(READ ${kernel_path} kernel_impl) file(READ ${kernel_path} kernel_impl)
string( string(
REGEX REGEX
...@@ -111,6 +113,7 @@ function(kernel_declare TARGET_LIST) ...@@ -111,6 +113,7 @@ function(kernel_declare TARGET_LIST)
endfunction() endfunction()
function(kernel_library TARGET) function(kernel_library TARGET)
return()
set(common_srcs) set(common_srcs)
set(cpu_srcs) set(cpu_srcs)
set(gpu_srcs) set(gpu_srcs)
......
...@@ -37,6 +37,7 @@ endif() ...@@ -37,6 +37,7 @@ endif()
# fluid_modules exclude API-interface of inference/api and inference/capi_exp # fluid_modules exclude API-interface of inference/api and inference/capi_exp
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES) get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(utils_modules stringpiece pretty_log string_helper benchmark) set(utils_modules stringpiece pretty_log string_helper benchmark)
add_subdirectory(api) add_subdirectory(api)
...@@ -50,7 +51,7 @@ set(STATIC_INFERENCE_API ...@@ -50,7 +51,7 @@ set(STATIC_INFERENCE_API
reset_tensor_array reset_tensor_array
analysis_config analysis_config
paddle_pass_builder paddle_pass_builder
activation_functions phi
${mkldnn_quantizer_cfg}) ${mkldnn_quantizer_cfg})
#windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy #windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy
...@@ -59,7 +60,7 @@ if(WIN32 AND WITH_GPU) ...@@ -59,7 +60,7 @@ if(WIN32 AND WITH_GPU)
${utils_modules}) ${utils_modules})
else() else()
create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} create_static_lib(paddle_inference ${fluid_modules} ${phi_modules}
${STATIC_INFERENCE_API} ${utils_modules}) ${phi_kernels} ${STATIC_INFERENCE_API} ${utils_modules})
endif() endif()
if(NOT APPLE) if(NOT APPLE)
......
...@@ -108,7 +108,7 @@ register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combin ...@@ -108,7 +108,7 @@ register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combin
op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS}) op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS})
target_link_libraries(run_program_op cuda_graph_with_memory_pool) target_link_libraries(run_program_op cuda_graph_with_memory_pool)
op_library(quantize_linear_op DEPS cast_kernel) op_library(quantize_linear_op DEPS phi)
op_library(save_combine_op DEPS string_array) op_library(save_combine_op DEPS string_array)
op_library(load_combine_op DEPS string_array) op_library(load_combine_op DEPS string_array)
......
...@@ -38,6 +38,7 @@ set(PHI_DEPS ...@@ -38,6 +38,7 @@ set(PHI_DEPS
string_tensor string_tensor
api_scalar api_scalar
api_int_array) api_int_array)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(PHI_DEPS ${PHI_DEPS} ${phi_kernels}) set(PHI_DEPS ${PHI_DEPS} ${phi_kernels})
......
...@@ -335,11 +335,12 @@ cc_library( ...@@ -335,11 +335,12 @@ cc_library(
cc_library( cc_library(
api_gen_utils api_gen_utils
SRCS api_gen_utils.cc SRCS api_gen_utils.cc
DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor) DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor
infermeta_utils)
cc_library( cc_library(
phi_data_transform phi_data_transform
SRCS data_transform.cc SRCS data_transform.cc
DEPS phi_tensor_raw transfer_layout_kernel cast_kernel copy_kernel tensor) DEPS phi_tensor_raw phi tensor)
cc_library( cc_library(
api_custom_impl api_custom_impl
SRCS api_custom_impl.cc SRCS api_custom_impl.cc
...@@ -404,7 +405,7 @@ cc_library( ...@@ -404,7 +405,7 @@ cc_library(
cc_library( cc_library(
tensor_copy tensor_copy
SRCS tensor_copy.cc SRCS tensor_copy.cc
DEPS phi_tensor_raw copy_kernel kernel_dispatch api_gen_utils) DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils)
cc_library( cc_library(
api_scalar api_scalar
SRCS scalar.cc SRCS scalar.cc
......
...@@ -41,124 +41,104 @@ set(COMMON_KERNEL_DEPS ...@@ -41,124 +41,104 @@ set(COMMON_KERNEL_DEPS
selected_rows_functor) selected_rows_functor)
# remove this dep after removing fluid deps on tensor creation # remove this dep after removing fluid deps on tensor creation
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta infermeta_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} switch_autotune)
# [ 2. Kernels that most kernels depend on ] set(COMMON_KERNEL_DEPS
# There are a few kernels that are very basic operations, and most of the
# kernels depend on these kernels.
set(COMMON_BAISC_KERNELS empty_kernel full_kernel)
kernel_library(empty_kernel DEPS ${COMMON_KERNEL_DEPS})
kernel_library(full_kernel DEPS ${COMMON_KERNEL_DEPS} empty_kernel)
# [ 3. Kernels with special dependencies ]
# Some kernels depend on some targets that are not commonly used.
# These targets are not suitable for common dependencies.
# In this case, you need to manually generate them here.
set(AUTOTUNE_KERNELS conv_kernel conv_grad_kernel conv_grad_grad_kernel
conv_transpose_kernel conv_transpose_grad_kernel)
set(MANUAL_BUILD_KERNELS
${AUTOTUNE_KERNELS}
cross_entropy_kernel
adam_kernel
adamw_kernel
deformable_conv_kernel
deformable_conv_grad_kernel
eigh_kernel
gumbel_softmax_kernel
gumbel_softmax_grad_kernel
hierarchical_sigmoid_kernel
hierarchical_sigmoid_grad_kernel
matrix_power_kernel
matrix_power_grad_kernel
maxout_kernel
maxout_grad_kernel
pool_kernel
put_along_axis_kernel
put_along_axis_grad_kernel
segment_pool_kernel
segment_pool_grad_kernel
softmax_kernel
softmax_grad_kernel
take_along_axis_kernel
take_along_axis_grad_kernel
triangular_solve_grad_kernel
determinant_grad_kernel
reduce_sum_kernel
reduce_mean_kernel
rnn_kernel
rnn_grad_kernel
warpctc_kernel
warpctc_grad_kernel)
foreach(src ${AUTOTUNE_KERNELS})
kernel_library(${src} DEPS ${COMMON_KERNEL_DEPS} switch_autotune)
endforeach()
kernel_library(
adam_kernel
DEPS
gflags
glog
flags
${COMMON_KERNEL_DEPS} ${COMMON_KERNEL_DEPS}
selected_rows_functor
threadpool threadpool
jit_kernel_helper) jit_kernel_helper
kernel_library(adamw_kernel DEPS ${COMMON_KERNEL_DEPS} adam_kernel) softmax
kernel_library(cross_entropy_kernel DEPS ${COMMON_KERNEL_DEPS} softmax cross_entropy
cross_entropy) matrix_bit_code
kernel_library(deformable_conv_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function
deformable_conv_functor) lstm_compute
kernel_library(deformable_conv_grad_kernel DEPS ${COMMON_KERNEL_DEPS} gru_compute
deformable_conv_functor) deformable_conv_functor
kernel_library(determinant_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_reduce
matrix_inverse) segment_pooling
kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function) gather_scatter_kernel
kernel_library(hierarchical_sigmoid_kernel DEPS ${COMMON_KERNEL_DEPS} pooling
matrix_bit_code) maxouting
kernel_library(hierarchical_sigmoid_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse
matrix_bit_code) phi_dynload_warpctc
kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) sequence_padding
kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) sequence_scale)
kernel_library(reduce_sum_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(reduce_mean_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_inverse)
kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
kernel_library(maxout_grad_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
kernel_library(pool_kernel DEPS ${COMMON_KERNEL_DEPS} pooling)
kernel_library(put_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(put_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(segment_pool_kernel DEPS ${COMMON_KERNEL_DEPS} segment_pooling)
kernel_library(segment_pool_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
segment_pooling)
kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(take_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(take_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
gather_scatter_kernel)
kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
matrix_reduce)
kernel_library(rnn_kernel DEPS ${COMMON_KERNEL_DEPS} concat_and_split_functor
lstm_compute gru_compute)
kernel_library(rnn_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
concat_and_split_functor lstm_compute gru_compute)
kernel_library(warpctc_kernel DEPS ${COMMON_KERNEL_DEPS} phi_dynload_warpctc
sequence_padding sequence_scale)
kernel_library(warpctc_grad_kernel DEPS ${COMMON_KERNEL_DEPS}
phi_dynload_warpctc sequence_padding sequence_scale)
# 4. auto parse and build kernel targets by cmake
register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS
${COMMON_KERNEL_DEPS} ${COMMON_BAISC_KERNELS})
# phi sparse kernels set(COMMON_KERNEL_DEPS
add_subdirectory(sparse) ${COMMON_KERNEL_DEPS}
# phi selected_rows kernels dense_tensor
add_subdirectory(selected_rows) string_tensor
sparse_coo_tensor
sparse_csr_tensor
kernel_context
kernel_factory
arg_map_context
convert_utils
lod_utils
custom_kernel
string_infermeta
utf8proc)
copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
# For strings kernels # For strings kernels
add_subdirectory(strings) add_subdirectory(strings)
file(GLOB kernel_h "*.h" "selected_rows/*.h" "sparse/*.h" "strings/*.h")
file(GLOB kernel_impl_h "impl/*.h" "selected_rows/impl/*.h")
file(GLOB kernel_primitive_h "primitive/*.h")
file(
GLOB
kernel_cc
"*.cc"
"cpu/*.cc"
"selected_rows/*.cc"
"selected_rows/cpu/*.cc"
"sparse/*.cc"
"sparse/cpu/*.cc"
"strings/*.cc"
"strings/cpu/*.cc")
file(
GLOB
kernel_cu
"gpu/*.cu"
"gpu/*.cu.cc"
"gpudnn/*.cu"
"kps/*.cu"
"selected_rows/gpu/*.cu"
"sparse/gpu/*.cu"
"strings/*.cu"
"strings/gpu/*.cu")
# file(GLOB kernel_cudnn "gpudnn/*.cu")
# file(GLOB kernel_kps "kps/*.cu")
file(GLOB kernel_xpu "xpu/*.cc")
add_library(phi_cpu ${kernel_cc})
kernel_declare("${kernel_cc}")
target_link_libraries(phi_cpu ${COMMON_KERNEL_DEPS})
set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu)
if(WITH_GPU OR WITH_ROCM)
if(WITH_GPU)
add_library(phi_gpu ${kernel_cu})
elseif(WITH_ROCM)
hip_add_library(phi_gpu STATIC ${kernel_cu})
endif()
kernel_declare("${kernel_cu}")
target_link_libraries(phi_gpu ${COMMON_KERNEL_DEPS})
set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu phi_gpu)
endif()
if(WITH_XPU)
if(WITH_XPU_KP)
file(GLOB kernel_xpu_kps "kps/*.cu")
xpu_add_library(phi_xpu STATIC ${kernel_xpu} ${kernel_xpu_kps})
else()
add_library(phi_xpu ${kernel_xpu})
endif()
kernel_declare(${kernel_xpu})
target_link_libraries(phi_xpu ${COMMON_KERNEL_DEPS})
set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu phi_xpu)
endif()
...@@ -11,7 +11,7 @@ cc_test(test_type_info SRCS test_type_info.cc) ...@@ -11,7 +11,7 @@ cc_test(test_type_info SRCS test_type_info.cc)
cc_test( cc_test(
test_kernel_factory test_kernel_factory
SRCS test_kernel_factory.cc SRCS test_kernel_factory.cc
DEPS kernel_factory scale_kernel) DEPS kernel_factory phi)
cc_test( cc_test(
test_sparse_coo_tensor test_sparse_coo_tensor
SRCS test_sparse_coo_tensor.cc SRCS test_sparse_coo_tensor.cc
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册