include(operators) # clean cache and pybind_file content first when rebuild unset(GLOB_OP_LIB CACHE) unset(OP_LIBRARY CACHE) set(pybind_file ${PADDLE_BINARY_DIR}/paddle/fluid/pybind/pybind.h CACHE INTERNAL "pybind.h file") file(WRITE ${pybind_file} "// Generated by the paddle/fluid/operator/CMakeLists.txt. DO NOT EDIT!\n\n") add_subdirectory(math) add_subdirectory(controlflow) add_subdirectory(detection) add_subdirectory(elementwise) add_subdirectory(fused) add_subdirectory(metrics) add_subdirectory(ngraph) add_subdirectory(optimizers) add_subdirectory(reduce_ops) add_subdirectory(sequence_ops) add_subdirectory(jit) if(WITH_DISTRIBUTE) add_subdirectory(distributed) add_subdirectory(distributed_ops) add_subdirectory(collective) endif() add_subdirectory(reader) if (NOT WIN32) add_subdirectory(nccl) endif() if (WITH_GPU AND TENSORRT_FOUND) add_subdirectory(tensorrt) endif() if (ANAKIN_SUBGRAPH) add_subdirectory(anakin) endif() SET(OP_HEADER_DEPS xxhash executor) if (WITH_GPU) SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub) endif() SET(OP_PREFETCH_DEPS "") if (WITH_DISTRIBUTE) SET(OP_PREFETCH_DEPS ${OP_PREFETCH_DEPS} parameter_prefetch) endif() SET(OP_ONLY_MKL "") if (NOT WITH_MKL) SET(OP_ONLY_MKL ${OP_ONLY_MKL} match_matrix_tensor_op) SET(OP_ONLY_MKL ${OP_ONLY_MKL} var_conv_2d_op) endif() register_operators(EXCLUDES py_func_op warpctc_op dgc_op conv_fusion_op sync_batch_norm_op ${OP_ONLY_MKL} DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS}) if (WITH_GPU) # warpctc_op needs cudnn 7 above if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc) else() op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) endif() # conv_fusion_op needs cudnn 7 above if (NOT ${CUDNN_VERSION} VERSION_LESS 7100) op_library(conv_fusion_op) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n") endif() if (NOT WIN32) op_library(sync_batch_norm_op) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(sync_batch_norm);\n") endif() else() op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) endif() set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) if (WITH_DGC) op_library(dgc_op DEPS dgc) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(dgc);\n") set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dgc) endif() set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper) if (WITH_GPU) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu) endif() set(COMMON_OP_DEPS ${COMMON_OP_DEPS} device_memory_aligment) # FIXME(typhoonzero): operator deps may not needed. # op_library(lod_tensor_to_array_op DEPS lod_rank_table_op) # op_library(array_to_lod_tensor_op DEPS lod_rank_table_op) # op_library(unsqueeze_op DEPS reshape_op) # op_library(squeeze_op DEPS reshape_op) # op_library(flatten_op DEPS reshape_op) # op_library(unstack_op DEPS stack_op) # op_library(tensor_array_to_tensor_op DEPS concat_op) set(OPERATOR_DEPS ${OPERATOR_DEPS} ${COMMON_OP_DEPS}) set(GLOB_OPERATOR_DEPS ${OPERATOR_DEPS} CACHE INTERNAL "Global Op dependencies") cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor math_function) cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory) cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op) nv_test(dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor) if (WITH_GPU) nv_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc test_leaky_relu_grad_grad_functor.cu DEPS tensor device_context eigen3) else() cc_test(test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc DEPS tensor device_context eigen3) endif() if (WITH_PYTHON) cc_library(py_func_op SRCS py_func_op.cc DEPS op_registry python pybind) endif() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") add_subdirectory(benchmark)