diff --git a/cmake/phi.cmake b/cmake/phi.cmake index 4555d892f11ce0718ad93e35b8fef1aceb547f72..82d04f0c4695d6f1cbeb8a905040f7474e353364 100644 --- a/cmake/phi.cmake +++ b/cmake/phi.cmake @@ -61,7 +61,9 @@ endfunction() # call kernel_declare need to make sure whether the target of input exists function(kernel_declare TARGET_LIST) + # message("TARGET LIST ${TARGET_LIST}") foreach(kernel_path ${TARGET_LIST}) + # message("kernel path ${kernel_path}" ) file(READ ${kernel_path} kernel_impl) string( REGEX @@ -111,6 +113,7 @@ function(kernel_declare TARGET_LIST) endfunction() function(kernel_library TARGET) + return() set(common_srcs) set(cpu_srcs) set(gpu_srcs) diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 109cb5d8fe07df3774654b6b0669da824e61bb52..a29e530b2b80c0bb46622275915a8b0d97218aa8 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -37,6 +37,7 @@ endif() # fluid_modules exclude API-interface of inference/api and inference/capi_exp get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(phi_modules GLOBAL PROPERTY PHI_MODULES) +get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) set(utils_modules stringpiece pretty_log string_helper benchmark) add_subdirectory(api) @@ -50,7 +51,7 @@ set(STATIC_INFERENCE_API reset_tensor_array analysis_config paddle_pass_builder - activation_functions + phi ${mkldnn_quantizer_cfg}) #windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy @@ -59,7 +60,7 @@ if(WIN32 AND WITH_GPU) ${utils_modules}) else() create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} - ${STATIC_INFERENCE_API} ${utils_modules}) + ${phi_kernels} ${STATIC_INFERENCE_API} ${utils_modules}) endif() if(NOT APPLE) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index b2fd59b47454e1a0020bc3f69bc9bec13a4e21e4..eb0664eb17d350dc5d05eb3a81a073272756ea11 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -108,7 +108,7 @@ register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combin op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS}) target_link_libraries(run_program_op cuda_graph_with_memory_pool) -op_library(quantize_linear_op DEPS cast_kernel) +op_library(quantize_linear_op DEPS phi) op_library(save_combine_op DEPS string_array) op_library(load_combine_op DEPS string_array) diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index 7f3dd1ddc38fb5eda880435cd75f34418b6c4d12..e20db18ea3f53ecbab33f6b04c88cf46e27a44d7 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -38,6 +38,7 @@ set(PHI_DEPS string_tensor api_scalar api_int_array) + get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) set(PHI_DEPS ${PHI_DEPS} ${phi_kernels}) diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt index a1c6989555f20d2abba470e500058e42ab8c1b15..d60e0245140a4e792627fe17de9a57757ec046b2 100644 --- a/paddle/phi/api/lib/CMakeLists.txt +++ b/paddle/phi/api/lib/CMakeLists.txt @@ -335,11 +335,12 @@ cc_library( cc_library( api_gen_utils SRCS api_gen_utils.cc - DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor) + DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor + infermeta_utils) cc_library( phi_data_transform SRCS data_transform.cc - DEPS phi_tensor_raw transfer_layout_kernel cast_kernel copy_kernel tensor) + DEPS phi_tensor_raw phi tensor) cc_library( api_custom_impl SRCS api_custom_impl.cc @@ -404,7 +405,7 @@ cc_library( cc_library( tensor_copy SRCS tensor_copy.cc - DEPS phi_tensor_raw copy_kernel kernel_dispatch api_gen_utils) + DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils) cc_library( api_scalar SRCS scalar.cc diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 67795c2a8aa6e4b0a547826d9360590a8cf27708..acfeaf21d0742debca0d9411c733e9eb9a1d4dc4 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -41,124 +41,104 @@ set(COMMON_KERNEL_DEPS selected_rows_functor) # remove this dep after removing fluid deps on tensor creation set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_api_utils) -set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta) +set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta infermeta_utils) +set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} switch_autotune) -# [ 2. Kernels that most kernels depend on ] -# There are a few kernels that are very basic operations, and most of the -# kernels depend on these kernels. -set(COMMON_BAISC_KERNELS empty_kernel full_kernel) -kernel_library(empty_kernel DEPS ${COMMON_KERNEL_DEPS}) -kernel_library(full_kernel DEPS ${COMMON_KERNEL_DEPS} empty_kernel) - -# [ 3. Kernels with special dependencies ] -# Some kernels depend on some targets that are not commonly used. -# These targets are not suitable for common dependencies. -# In this case, you need to manually generate them here. -set(AUTOTUNE_KERNELS conv_kernel conv_grad_kernel conv_grad_grad_kernel - conv_transpose_kernel conv_transpose_grad_kernel) -set(MANUAL_BUILD_KERNELS - ${AUTOTUNE_KERNELS} - cross_entropy_kernel - adam_kernel - adamw_kernel - deformable_conv_kernel - deformable_conv_grad_kernel - eigh_kernel - gumbel_softmax_kernel - gumbel_softmax_grad_kernel - hierarchical_sigmoid_kernel - hierarchical_sigmoid_grad_kernel - matrix_power_kernel - matrix_power_grad_kernel - maxout_kernel - maxout_grad_kernel - pool_kernel - put_along_axis_kernel - put_along_axis_grad_kernel - segment_pool_kernel - segment_pool_grad_kernel - softmax_kernel - softmax_grad_kernel - take_along_axis_kernel - take_along_axis_grad_kernel - triangular_solve_grad_kernel - determinant_grad_kernel - reduce_sum_kernel - reduce_mean_kernel - rnn_kernel - rnn_grad_kernel - warpctc_kernel - warpctc_grad_kernel) -foreach(src ${AUTOTUNE_KERNELS}) - kernel_library(${src} DEPS ${COMMON_KERNEL_DEPS} switch_autotune) -endforeach() -kernel_library( - adam_kernel - DEPS - gflags - glog - flags - ${COMMON_KERNEL_DEPS} - selected_rows_functor - threadpool - jit_kernel_helper) -kernel_library(adamw_kernel DEPS ${COMMON_KERNEL_DEPS} adam_kernel) -kernel_library(cross_entropy_kernel DEPS ${COMMON_KERNEL_DEPS} softmax - cross_entropy) -kernel_library(deformable_conv_kernel DEPS ${COMMON_KERNEL_DEPS} - deformable_conv_functor) -kernel_library(deformable_conv_grad_kernel DEPS ${COMMON_KERNEL_DEPS} - deformable_conv_functor) -kernel_library(determinant_grad_kernel DEPS ${COMMON_KERNEL_DEPS} - matrix_inverse) -kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function) -kernel_library(hierarchical_sigmoid_kernel DEPS ${COMMON_KERNEL_DEPS} - matrix_bit_code) -kernel_library(hierarchical_sigmoid_grad_kernel DEPS ${COMMON_KERNEL_DEPS} - matrix_bit_code) -kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) -kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) -kernel_library(reduce_sum_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel) -kernel_library(reduce_mean_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel) -kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) -kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} - matrix_inverse) -kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting) -kernel_library(maxout_grad_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting) -kernel_library(pool_kernel DEPS ${COMMON_KERNEL_DEPS} pooling) -kernel_library(put_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS} - gather_scatter_kernel) -kernel_library(put_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS} - gather_scatter_kernel) -kernel_library(segment_pool_kernel DEPS ${COMMON_KERNEL_DEPS} segment_pooling) -kernel_library(segment_pool_grad_kernel DEPS ${COMMON_KERNEL_DEPS} - segment_pooling) -kernel_library(softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) -kernel_library(softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) -kernel_library(take_along_axis_kernel DEPS ${COMMON_KERNEL_DEPS} - gather_scatter_kernel) -kernel_library(take_along_axis_grad_kernel DEPS ${COMMON_KERNEL_DEPS} - gather_scatter_kernel) -kernel_library(triangular_solve_grad_kernel DEPS ${COMMON_KERNEL_DEPS} - matrix_reduce) -kernel_library(rnn_kernel DEPS ${COMMON_KERNEL_DEPS} concat_and_split_functor - lstm_compute gru_compute) -kernel_library(rnn_grad_kernel DEPS ${COMMON_KERNEL_DEPS} - concat_and_split_functor lstm_compute gru_compute) -kernel_library(warpctc_kernel DEPS ${COMMON_KERNEL_DEPS} phi_dynload_warpctc - sequence_padding sequence_scale) -kernel_library(warpctc_grad_kernel DEPS ${COMMON_KERNEL_DEPS} - phi_dynload_warpctc sequence_padding sequence_scale) - -# 4. auto parse and build kernel targets by cmake -register_kernels(EXCLUDES ${COMMON_BAISC_KERNELS} ${MANUAL_BUILD_KERNELS} DEPS - ${COMMON_KERNEL_DEPS} ${COMMON_BAISC_KERNELS}) +set(COMMON_KERNEL_DEPS + ${COMMON_KERNEL_DEPS} + threadpool + jit_kernel_helper + softmax + cross_entropy + matrix_bit_code + lapack_function + lstm_compute + gru_compute + deformable_conv_functor + matrix_reduce + segment_pooling + gather_scatter_kernel + pooling + maxouting + matrix_inverse + phi_dynload_warpctc + sequence_padding + sequence_scale) -# phi sparse kernels -add_subdirectory(sparse) -# phi selected_rows kernels -add_subdirectory(selected_rows) +set(COMMON_KERNEL_DEPS + ${COMMON_KERNEL_DEPS} + dense_tensor + string_tensor + sparse_coo_tensor + sparse_csr_tensor + kernel_context + kernel_factory + arg_map_context + convert_utils + lod_utils + custom_kernel + string_infermeta + utf8proc) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) # For strings kernels add_subdirectory(strings) + +file(GLOB kernel_h "*.h" "selected_rows/*.h" "sparse/*.h" "strings/*.h") +file(GLOB kernel_impl_h "impl/*.h" "selected_rows/impl/*.h") +file(GLOB kernel_primitive_h "primitive/*.h") +file( + GLOB + kernel_cc + "*.cc" + "cpu/*.cc" + "selected_rows/*.cc" + "selected_rows/cpu/*.cc" + "sparse/*.cc" + "sparse/cpu/*.cc" + "strings/*.cc" + "strings/cpu/*.cc") + +file( + GLOB + kernel_cu + "gpu/*.cu" + "gpu/*.cu.cc" + "gpudnn/*.cu" + "kps/*.cu" + "selected_rows/gpu/*.cu" + "sparse/gpu/*.cu" + "strings/*.cu" + "strings/gpu/*.cu") + +# file(GLOB kernel_cudnn "gpudnn/*.cu") +# file(GLOB kernel_kps "kps/*.cu") +file(GLOB kernel_xpu "xpu/*.cc") + +add_library(phi_cpu ${kernel_cc}) +kernel_declare("${kernel_cc}") +target_link_libraries(phi_cpu ${COMMON_KERNEL_DEPS}) +set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu) + +if(WITH_GPU OR WITH_ROCM) + if(WITH_GPU) + add_library(phi_gpu ${kernel_cu}) + elseif(WITH_ROCM) + hip_add_library(phi_gpu STATIC ${kernel_cu}) + endif() + kernel_declare("${kernel_cu}") + target_link_libraries(phi_gpu ${COMMON_KERNEL_DEPS}) + set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu phi_gpu) +endif() + +if(WITH_XPU) + if(WITH_XPU_KP) + file(GLOB kernel_xpu_kps "kps/*.cu") + xpu_add_library(phi_xpu STATIC ${kernel_xpu} ${kernel_xpu_kps}) + else() + add_library(phi_xpu ${kernel_xpu}) + endif() + kernel_declare(${kernel_xpu}) + target_link_libraries(phi_xpu ${COMMON_KERNEL_DEPS}) + set_property(GLOBAL PROPERTY PHI_KERNELS phi_cpu phi_xpu) +endif() diff --git a/paddle/phi/tests/core/CMakeLists.txt b/paddle/phi/tests/core/CMakeLists.txt index 57a55963d5c66e75907369eed439f7e27aacb978..aeda9be3d2985315a0d6a9d8221250ab817b13db 100644 --- a/paddle/phi/tests/core/CMakeLists.txt +++ b/paddle/phi/tests/core/CMakeLists.txt @@ -11,7 +11,7 @@ cc_test(test_type_info SRCS test_type_info.cc) cc_test( test_kernel_factory SRCS test_kernel_factory.cc - DEPS kernel_factory scale_kernel) + DEPS kernel_factory phi) cc_test( test_sparse_coo_tensor SRCS test_sparse_coo_tensor.cc