include(operators) # clean cache and pybind_file content first when rebuild unset(GLOB_OP_LIB CACHE) unset(OP_LIBRARY CACHE) set(pybind_file ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h.tmp CACHE INTERNAL "pybind.h file") set(pybind_file_final ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h) file(WRITE ${pybind_file} "// Generated by the paddle/fluid/operators/CMakeLists.txt. DO NOT EDIT!\n\n") add_subdirectory(math) add_subdirectory(eigen) add_subdirectory(controlflow) add_subdirectory(detection) add_subdirectory(elementwise) add_subdirectory(fused) add_subdirectory(metrics) add_subdirectory(optimizers) add_subdirectory(reduce_ops) add_subdirectory(sequence_ops) add_subdirectory(string) add_subdirectory(jit) if(WITH_MKLDNN) add_subdirectory(mkldnn) endif() if(WITH_DISTRIBUTE) add_subdirectory(collective) endif() if (WITH_PSCORE) add_subdirectory(pscore) endif() add_subdirectory(amp) add_subdirectory(reader) if (NOT WIN32) add_subdirectory(nccl) endif() if (WITH_GPU AND TENSORRT_FOUND) add_subdirectory(tensorrt) endif() if (WITH_DLNNE) add_subdirectory(dlnne) endif() if (WITH_LITE) add_subdirectory(lite) endif() SET(OP_HEADER_DEPS xxhash executor) if (WITH_GPU) if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub) endif() endif() if (WITH_POCKETFFT) SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} pocketfft) endif() SET(OP_MKL_DEPS "") if (NOT WITH_MKL OR NOT WITH_AVX) SET(OP_MKL_DEPS ${OP_MKL_DEPS} match_matrix_tensor_op) SET(OP_MKL_DEPS ${OP_MKL_DEPS} var_conv_2d_op) endif() if(WITH_COVERAGE OR WIN32 OR WITH_NV_JETSON) SET(OP_MKL_DEPS ${OP_MKL_DEPS} pyramid_hash_op) endif() if(WITH_UNITY_BUILD) # Load Unity Build rules for operators in paddle/fluid/operators. include(unity_build_rule.cmake) endif() set(OP_HEADER_DEPS ${OP_HEADER_DEPS} pten) register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combine_op lstm_op run_program_op eye_op recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op spectral_op cinn_launch_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS}) op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS}) op_library(save_combine_op DEPS string_array) op_library(load_combine_op DEPS string_array) if (WITH_GPU OR WITH_ROCM) if(WITH_ROCM) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu) # warpctc_op needs cudnn 7 above elseif(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu) else() op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) endif() op_library(sync_batch_norm_op) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(sync_batch_norm);\n") if ((NOT WIN32) AND (NOT WITH_ROCM) AND (NOT PADDLE_WITH_ARM) AND (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.2) ) op_library(sparse_attention_op) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(sparse_attention);\n") endif() else() op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) endif() if (WITH_GPU OR WITH_ROCM) if (MKL_FOUND AND WITH_ONEMKL) op_library(spectral_op SRCS spectral_op.cc spectral_op.cu DEPS dynload_cuda dynload_mklrt ${OP_HEADER_DEPS}) target_include_directories(spectral_op PRIVATE ${MKL_INCLUDE}) else() op_library(spectral_op SRCS spectral_op.cc spectral_op.cu DEPS dynload_cuda ${OP_HEADER_DEPS}) endif() else() if (MKL_FOUND AND WITH_ONEMKL) op_library(spectral_op SRCS spectral_op.cc DEPS dynload_mklrt ${OP_HEADER_DEPS}) target_include_directories(spectral_op PRIVATE ${MKL_INCLUDE}) else() op_library(spectral_op SRCS spectral_op.cc DEPS ${OP_HEADER_DEPS}) endif() endif() if (WITH_ASCEND_CL) op_library(sync_batch_norm_op) file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(sync_batch_norm);\n") endif() op_library(lstm_op DEPS ${OP_HEADER_DEPS} lstm_compute) op_library(eye_op DEPS ${OP_HEADER_DEPS}) op_library(recurrent_op DEPS ${OP_HEADER_DEPS}) set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) if (WITH_DGC) op_library(dgc_op DEPS dgc) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(dgc);\n") set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dgc) endif() cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEPS operator) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lapack_function lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling segment_pooling executor device_memory_aligment generator) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc matrix_inverse matrix_solve) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper boost ps_gpu_wrapper) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} common_infer_shape_functions) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} eigen_function) if (WITH_GPU OR WITH_ROCM) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu bert_encoder_functor) endif() set(COMMON_OP_DEPS ${COMMON_OP_DEPS} device_memory_aligment) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} layer) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} tensor_formatter) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} op_version_registry) if (WITH_ASCEND) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} ascend_wrapper) endif() if (WITH_ASCEND_CL) cc_test(assign_op_npu_test SRCS assign_op_npu_test.cc DEPS assign_op) cc_library(npu_op_runner SRCS npu_op_runner.cc DEPS operator npu_info) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} npu_op_runner) endif() if (WITH_CINN) cc_library(cinn_launch_op_helper SRCS cinn_launch_op_helper.cc DEPS operator cinn) cc_test(cinn_launch_op_helper_test SRCS cinn_launch_op_helper_test.cc DEPS cinn_launch_op_helper) op_library(cinn_launch_op SRCS cinn_launch_op.cc cinn_launch_op.cu.cc DEPS cinn_compiler cinn_launch_op_helper cinn ${OP_HEADER_DEPS}) if (WITH_GPU) nv_test(cinn_launch_op_test SRCS cinn_launch_op_test.cc DEPS cinn_compiler cinn_launch_op elementwise_add_op) endif() endif() # FIXME(typhoonzero): operator deps may not needed. # op_library(lod_tensor_to_array_op DEPS lod_rank_table_op) # op_library(array_to_lod_tensor_op DEPS lod_rank_table_op) # op_library(unsqueeze_op DEPS reshape_op) # op_library(squeeze_op DEPS reshape_op) # op_library(flatten_op DEPS reshape_op) # op_library(unstack_op DEPS stack_op) # op_library(tensor_array_to_tensor_op DEPS concat_op) set(OPERATOR_DEPS ${OPERATOR_DEPS} ${COMMON_OP_DEPS}) set(GLOB_OPERATOR_DEPS ${OPERATOR_DEPS} CACHE INTERNAL "Global Op dependencies") cc_test(test_common_infer_shape_functions SRCS test_common_infer_shape_functions.cc DEPS common_infer_shape_functions ${COMMON_OP_DEPS} activation_op elementwise_add_op softmax_op softmax) cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(assign_op_test SRCS assign_op_test.cc DEPS assign_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor math_function) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory) cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op) if (WITH_GPU) nv_test(dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor generator) nv_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc test_leaky_relu_grad_grad_functor.cu DEPS tensor device_context eigen3) nv_test(feed_forward_test SRCS feed_forward_test.cu DEPS elementwise_add_op matmul_op tensor generator) elseif(WITH_ROCM) hip_test(dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor generator) hip_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc test_leaky_relu_grad_grad_functor.cu DEPS tensor device_context eigen3) else() cc_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc DEPS tensor device_context eigen3) endif() cc_library(tensor_formatter SRCS tensor_formatter.cc DEPS ${OP_HEADER_DEPS}) if (WITH_PYTHON) cc_library(py_func_op SRCS py_func_op.cc DEPS op_registry python pybind) cc_library(py_layer_op SRCS py_layer_op.cc DEPS op_registry python pybind) endif() if (WITH_ASCEND_CL) cc_test(range_op_npu_test SRCS range_op_npu_test.cc DEPS op_registry range_op scope device_context enforce executor) cc_test(expand_op_npu_test SRCS expand_op_npu_test.cc DEPS op_registry expand_op eigen_function scope device_context enforce executor compare_op) endif() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") add_subdirectory(benchmark) cc_test(op_debug_string_test SRCS op_debug_string_test.cc DEPS elementwise_add_op) if (WITH_ASCEND_CL) cc_test(transpose_op_npu_test SRCS transpose_op_npu_test.cc DEPS op_registry transpose_op scope device_context enforce executor) endif() if(WITH_MKLDNN) include(mkldnn/inplace_op_tests.cmake) include(mkldnn/caching_tests.cmake) include(mkldnn/nhwc_op_tests.cmake) endif() if(WITH_UNITY_BUILD) # Using Unity Build to compile operators, `register_operator` will cause # the unity library to lose some symbols. # The specified link dependency needs to be displayed here. target_link_libraries(paddle_operators_unity ${OP_HEADER_DEPS} ${COMMON_OP_DEPS}) endif() if(WITH_ASCEND_CL) cc_test(gelu_op_npu_test SRCS gelu_op_npu_test.cc DEPS op_registry gelu_op scope device_context enforce executor) endif() if (WITH_GPU OR WITH_ASCEND_CL) cc_test(copy_cross_scope_test SRCS copy_cross_scope_test.cc DEPS op_registry copy_cross_scope_op scope device_context enforce executor) endif() copy_if_different(${pybind_file} ${pybind_file_final})