From 36419766e4e5182407b57e659e3ce279135662b4 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Sat, 24 Aug 2019 10:35:14 +0800 Subject: [PATCH] Refactor op kernel compile system (#1831) --- CMakeLists.txt | 2 + cmake/lite.cmake | 122 +++++++- lite/CMakeLists.txt | 1 - lite/api/CMakeLists.txt | 5 +- ...le_use_kernels.h => _paddle_use_kernels.h} | 0 .../{paddle_use_ops.h => _paddle_use_ops.h} | 0 lite/arm/CMakeLists.txt | 1 - lite/core/CMakeLists.txt | 45 ++- lite/kernels/CMakeLists.txt | 2 +- lite/kernels/arm/CMakeLists.txt | 184 ++++--------- lite/kernels/fpga/CMakeLists.txt | 42 +-- lite/kernels/host/CMakeLists.txt | 18 +- lite/kernels/npu/CMakeLists.txt | 8 +- lite/kernels/opencl/CMakeLists.txt | 32 +-- lite/kernels/x86/CMakeLists.txt | 22 +- lite/operators/CMakeLists.txt | 260 ++++++------------ lite/tests/kernels/CMakeLists.txt | 8 +- lite/tools/build.sh | 1 + .../cmake_tools/parse_kernel_registry.py | 59 ++++ lite/tools/cmake_tools/parse_op_registry.py | 45 +++ 20 files changed, 457 insertions(+), 400 deletions(-) rename lite/api/{paddle_use_kernels.h => _paddle_use_kernels.h} (100%) rename lite/api/{paddle_use_ops.h => _paddle_use_ops.h} (100%) create mode 100644 lite/tools/cmake_tools/parse_kernel_registry.py create mode 100644 lite/tools/cmake_tools/parse_op_registry.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a23d869aa..b10fdd7333 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,6 +80,8 @@ option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF) option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF) option(LITE_SHUTDOWN_LOG "Shutdown log system or not." OFF) option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF) +# publish options +option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF) set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") diff --git a/cmake/lite.cmake b/cmake/lite.cmake index 03d6cafcf9..89918b7cb9 100644 --- a/cmake/lite.cmake +++ b/cmake/lite.cmake @@ -57,6 +57,8 @@ function (lite_deps TARGET) endforeach(var) endif() + + if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) foreach(var ${lite_deps_HVY_DEPS}) set(deps ${deps} ${var}) @@ -182,9 +184,16 @@ function(lite_cc_test TARGET) set(oneValueArgs "") set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS - ARGS) + ARGS + COMPILE_LEVEL # (basic|extra) + ) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if (args_COMPILE_LEVEL STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA)) + MESSAGE(STATUS "Ignore test ${TARGET} due to compile level ${args_COMPILE_LEVEL}") + return() + endif() + set(deps "") lite_deps(deps DEPS ${args_DEPS} @@ -207,6 +216,117 @@ function(lite_cc_test TARGET) endif() endfunction() +set(arm_kernels CACHE INTERNAL "arm kernels") +set(x86_kernels CACHE INTERNAL "x86 kernels") +set(fpga_kernels CACHE INTERNAL "fpga kernels") +set(npu_kernels CACHE INTERNAL "npu kernels") +set(opencl_kernels CACHE INTERNAL "opencl kernels") +set(host_kernels CACHE INTERNAL "host kernels") + +set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt") +file(WRITE ${kernels_src_list} "") # clean +# add a kernel for some specific device +# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA) +# level: one of (basic, extra) +function(add_kernel TARGET device level) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS + LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS + ARGS) + cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA)) + return() + endif() + + if ("${device}" STREQUAL "Host") + set(host_kernels "${host_kernels};${TARGET}" CACHE INTERNAL "") + endif() + if ("${device}" STREQUAL "ARM") + if (NOT LITE_WITH_ARM) + return() + endif() + set(arm_kernels "${arm_kernels};${TARGET}" CACHE INTERNAL "") + endif() + if ("${device}" STREQUAL "X86") + if (NOT LITE_WITH_X86) + return() + endif() + set(x86_kernels "${x86_kernels};${TARGET}" CACHE INTERNAL "") + endif() + if ("${device}" STREQUAL "NPU") + if (NOT LITE_WITH_NPU) + return() + endif() + set(npu_kernels "${npu_kernels};${TARGET}" CACHE INTERNAL "") + endif() + if ("${device}" STREQUAL "FPGA") + if (NOT LITE_WITH_FPGA) + return() + endif() + set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "") + endif() + if ("${device}" STREQUAL "OPENCL") + if (NOT LITE_WITH_OPENCL) + return() + endif() + set(opencl_kernels "${opencl_kernels};${TARGET}" CACHE INTERNAL "") + endif() + + foreach(src ${args_SRCS}) + file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() + + lite_cc_library(${TARGET} SRCS ${args_SRCS} + DEPS ${args_DEPS} + X86_DEPS ${args_X86_DEPS} + CUDA_DEPS ${args_CUDA_DEPS} + CL_DEPS ${args_CL_DEPS} + ARM_DEPS ${args_ARM_DEPS} + FPGA_DEPS ${args_FPGA_DEPS} + PROFILE_DEPS ${args_PROFILE_DEPS} + LIGHT_DEPS ${args_LIGHT_DEPS} + HVY_DEPS ${args_HVY_DEPS} + ) +endfunction() + +set(ops CACHE INTERNAL "ops") +set(ops_src_list "${CMAKE_BINARY_DIR}/ops_src_list.txt") +file(WRITE ${ops_src_list} "") # clean +# add an operator +# level: one of (basic, extra) +function(add_operator TARGET level) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS + LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS + ARGS) + cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA)) + return() + endif() + + set(ops "${ops};${TARGET}" CACHE INTERNAL "source") + + foreach(src ${args_SRCS}) + file(APPEND ${ops_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n") + endforeach() + + lite_cc_library(${TARGET} SRCS ${args_SRCS} + DEPS ${args_DEPS} + X86_DEPS ${args_X86_DEPS} + CUDA_DEPS ${args_CUDA_DEPS} + CL_DEPS ${args_CL_DEPS} + ARM_DEPS ${args_ARM_DEPS} + FPGA_DEPS ${args_FPGA_DEPS} + PROFILE_DEPS ${args_PROFILE_DEPS} + LIGHT_DEPS ${args_LIGHT_DEPS} + HVY_DEPS ${args_HVY_DEPS} + ) +endfunction() + # Bundle several static libraries into one. function(bundle_static_library tgt_name bundled_tgt_name fake_target) diff --git a/lite/CMakeLists.txt b/lite/CMakeLists.txt index be23ff2540..35448d4ed0 100644 --- a/lite/CMakeLists.txt +++ b/lite/CMakeLists.txt @@ -13,7 +13,6 @@ set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install") set(LITE_ON_MOBILE ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}) - add_subdirectory(utils) add_subdirectory(operators) add_subdirectory(kernels) diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt index 8a99bea428..55c8f28188 100644 --- a/lite/api/CMakeLists.txt +++ b/lite/api/CMakeLists.txt @@ -17,6 +17,7 @@ if(LITE_WITH_FPGA) endif() message(STATUS "get ops ${ops}") +message(STATUS "get X86 kernels ${x86_kernels}") message(STATUS "get Host kernels ${host_kernels}") message(STATUS "get ARM kernels ${arm_kernels}") message(STATUS "get NPU kernels ${npu_kernels}") @@ -117,7 +118,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING) add_dependencies(test_mobilenetv1 extern_lite_download_mobilenet_v1_tar_gz) set(LINK_FLAGS "-Wl,--version-script ${PADDLE_SOURCE_DIR}/lite/core/lite.map") set_target_properties(test_mobilenetv1 PROPERTIES LINK_FLAGS "${LINK_FLAGS}") - + lite_cc_test(test_mobilenetv2 SRCS mobilenetv2_test.cc DEPS ${lite_model_test_DEPS} CL_DEPS ${opencl_kernels} @@ -125,7 +126,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING) --model_dir=${LITE_MODEL_DIR}/mobilenet_v2_relu SERIAL) add_dependencies(test_mobilenetv2 extern_lite_download_mobilenet_v2_relu_tar_gz) set_target_properties(test_mobilenetv2 PROPERTIES LINK_FLAGS "${LINK_FLAGS}") - + lite_cc_test(test_resnet50 SRCS resnet50_test.cc DEPS ${lite_model_test_DEPS} CL_DEPS ${opencl_kernels} diff --git a/lite/api/paddle_use_kernels.h b/lite/api/_paddle_use_kernels.h similarity index 100% rename from lite/api/paddle_use_kernels.h rename to lite/api/_paddle_use_kernels.h diff --git a/lite/api/paddle_use_ops.h b/lite/api/_paddle_use_ops.h similarity index 100% rename from lite/api/paddle_use_ops.h rename to lite/api/_paddle_use_ops.h diff --git a/lite/arm/CMakeLists.txt b/lite/arm/CMakeLists.txt index 8abd04b523..2767b4e7ae 100644 --- a/lite/arm/CMakeLists.txt +++ b/lite/arm/CMakeLists.txt @@ -1,2 +1 @@ - add_subdirectory(math) diff --git a/lite/core/CMakeLists.txt b/lite/core/CMakeLists.txt index cc80637dd4..35b235221c 100644 --- a/lite/core/CMakeLists.txt +++ b/lite/core/CMakeLists.txt @@ -37,9 +37,36 @@ lite_cc_library(context SRCS context.cc DEPS tensor any cpu_info CL_DEPS cl_cont else() lite_cc_library(context SRCS context.cc DEPS tensor any cpu_info eigen3 CL_DEPS cl_context gflags) endif() -lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor) + +#----------------------------------------------- NOT CHANGE ----------------------------------------------- +# A trick to generate the paddle_use_kernels.h +add_custom_command( + COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py + ${kernels_src_list} + ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h + OUTPUT ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h + ) +# A trick to generate the paddle_use_ops.h +add_custom_command( + COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_op_registry.py + ${ops_src_list} + ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h + OUTPUT ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h + ) +add_custom_target(op_list_h DEPENDS ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h) +add_custom_target(kernel_list_h DEPENDS ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h) + +#----------------------------------------------- NOT CHANGE ----------------------------------------------- +lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor + ) lite_cc_library(op SRCS op_lite.cc DEPS scope op_registry target_wrapper kernel - cpp_op_desc tensor) + cpp_op_desc tensor + ) + +add_dependencies(kernel kernel_list_h) +add_dependencies(op op_list_h) + + lite_cc_library(type_system SRCS type_system.cc DEPS tensor target_wrapper) lite_cc_library(program SRCS program.cc @@ -73,3 +100,17 @@ lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils) lite_cc_test(test_types SRCS types_test.cc DEPS types) lite_cc_test(test_memory SRCS memory_test.cc DEPS memory) lite_cc_test(test_context SRCS context_test.cc DEPS context) + + +# # A trick to generate the paddle_use_kernels.h +# execute_process( +# COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py +# ${kernels_src_list} +# ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h +# ) +# # A trick to generate the paddle_use_ops.h +# execute_process( +# COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_op_registry.py +# ${ops_src_list} +# ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h +# ) diff --git a/lite/kernels/CMakeLists.txt b/lite/kernels/CMakeLists.txt index d5a3f6d9f0..1996f50133 100644 --- a/lite/kernels/CMakeLists.txt +++ b/lite/kernels/CMakeLists.txt @@ -1,6 +1,6 @@ message(STATUS "add lite kernels") -set(lite_kernel_deps type_system kernel op op_registry context tensor CACHE INTERNAL "" FORCE) +set(lite_kernel_deps type_system kernel op op_registry context tensor any CACHE INTERNAL "" FORCE) add_subdirectory(host) add_subdirectory(arm) diff --git a/lite/kernels/arm/CMakeLists.txt b/lite/kernels/arm/CMakeLists.txt index 99098102dc..524a235ef4 100644 --- a/lite/kernels/arm/CMakeLists.txt +++ b/lite/kernels/arm/CMakeLists.txt @@ -4,64 +4,66 @@ endif() message(STATUS "compile with lite ARM kernels") -lite_cc_library(fc_compute_arm SRCS fc_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(activation_compute_arm SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(matmul_compute_arm SRCS matmul_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(softmax_compute_arm SRCS softmax_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(conv_compute_arm SRCS conv_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(batch_norm_compute_arm SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(elementwise_compute_arm SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(lrn_compute_arm SRCS lrn_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(decode_bboxes_compute_arm SRCS decode_bboxes_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(multiclass_nms_compute_arm SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(pool_compute_arm SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(split_compute_arm SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(concat_compute_arm SRCS concat_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(pad2d_compute_arm SRCS pad2d_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(prior_box_compute_arm SRCS prior_box_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(density_prior_box_compute_arm SRCS density_prior_box_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(negative_compute_arm SRCS negative_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(crop_compute_arm SRCS crop_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(dropout_compute_arm SRCS dropout_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(calib_compute_arm SRCS calib_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(transpose_compute_arm SRCS transpose_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(power_compute_arm SRCS power_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(yolo_box_compute_arm SRCS yolo_box_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(shuffle_channel_compute_arm SRCS shuffle_channel_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(argmax_compute_arm SRCS argmax_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(axpy_compute_arm SRCS axpy_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(conv_transpose_compute_arm SRCS conv_transpose_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(gru_unit_compute_arm SRCS gru_unit_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(gru_compute_arm SRCS gru_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(beam_search_decode_compute_arm SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(lookup_table_compute_arm SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(im2sequence_compute_arm SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(sequence_softmax_compute_arm SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(norm_compute_arm SRCS norm_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(interpolate_compute_arm SRCS interpolate_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(logical_compute_arm SRCS logical_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(less_than_arm SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(while_compute_arm SRCS while_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(compare_compute_arm SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(topk_compute_arm SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(increment_compute_arm SRCS increment_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(write_to_array_compute_arm SRCS write_to_array_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(read_from_array_compute_arm SRCS read_from_array_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(beam_search_compute_arm SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(fill_constant_compute_arm SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(lod_reset_compute_arm SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(box_coder_compute_arm SRCS box_coder_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(sequence_pool_compute_arm SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(sequence_expand_compute_arm SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(reduce_max_compute_arm SRCS reduce_max_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(is_empty_compute_arm SRCS is_empty_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(shape_compute_arm SRCS shape_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(slice_compute_arm SRCS slice_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(cast_compute_arm SRCS cast_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(squeeze_compute_arm SRCS squeeze_compute.cc DEPS ${lite_kernel_deps} math_arm) -lite_cc_library(expand_compute_arm SRCS expand_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(fc_compute_arm ARM basic SRCS fc_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(activation_compute_arm ARM basic SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(mul_compute_arm ARM basic SRCS mul_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(matmul_compute_arm ARM basic SRCS matmul_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(scale_compute_arm ARM basic SRCS scale_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(softmax_compute_arm ARM basic SRCS softmax_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(conv_compute_arm ARM basic SRCS conv_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(batch_norm_compute_arm ARM basic SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(elementwise_compute_arm ARM basic SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(lrn_compute_arm ARM basic SRCS lrn_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(decode_bboxes_compute_arm ARM basic SRCS decode_bboxes_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(multiclass_nms_compute_arm ARM basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(pool_compute_arm ARM basic SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(split_compute_arm ARM basic SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(concat_compute_arm ARM basic SRCS concat_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(pad2d_compute_arm ARM basic SRCS pad2d_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(prior_box_compute_arm ARM basic SRCS prior_box_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(density_prior_box_compute_arm ARM basic SRCS density_prior_box_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(negative_compute_arm ARM basic SRCS negative_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(crop_compute_arm ARM basic SRCS crop_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(dropout_compute_arm ARM basic SRCS dropout_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(calib_compute_arm ARM basic SRCS calib_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(transpose_compute_arm ARM basic SRCS transpose_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(power_compute_arm ARM basic SRCS power_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(yolo_box_compute_arm ARM basic SRCS yolo_box_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(shuffle_channel_compute_arm ARM basic SRCS shuffle_channel_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(argmax_compute_arm ARM basic SRCS argmax_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(axpy_compute_arm ARM basic SRCS axpy_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(conv_transpose_compute_arm ARM basic SRCS conv_transpose_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(norm_compute_arm ARM basic SRCS norm_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(interpolate_compute_arm ARM basic SRCS interpolate_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(box_coder_compute_arm ARM basic SRCS box_coder_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(shape_compute_arm ARM basic SRCS shape_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(slice_compute_arm ARM basic SRCS slice_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(cast_compute_arm ARM basic SRCS cast_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(squeeze_compute_arm ARM basic SRCS squeeze_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(expand_compute_arm ARM basic SRCS expand_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(reduce_max_compute_arm ARM basic SRCS reduce_max_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(sequence_expand_compute_arm ARM basic SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm) + +# for OCR specific +add_kernel(im2sequence_compute_arm ARM extra SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(sequence_pool_compute_arm ARM extra SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(gru_unit_compute_arm ARM extra SRCS gru_unit_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(gru_compute_arm ARM extra SRCS gru_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(beam_search_decode_compute_arm ARM extra SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(lookup_table_compute_arm ARM extra SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(logical_compute_arm ARM extra SRCS logical_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(less_than_arm ARM extra SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(while_compute_arm ARM extra SRCS while_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(compare_compute_arm ARM extra SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(increment_compute_arm ARM extra SRCS increment_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(write_to_array_compute_arm ARM extra SRCS write_to_array_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(read_from_array_compute_arm ARM extra SRCS read_from_array_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(beam_search_compute_arm ARM extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(fill_constant_compute_arm ARM extra SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(lod_reset_compute_arm ARM extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps} math_arm) +add_kernel(is_empty_compute_arm ARM extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps} math_arm) lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm) lite_cc_test(test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_compute_arm) @@ -77,71 +79,7 @@ lite_cc_test(test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm) lite_cc_test(test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm) lite_cc_test(test_concat_compute_arm SRCS concat_compute_test.cc DEPS concat_compute_arm) lite_cc_test(test_dropout_compute_arm SRCS dropout_compute_test.cc DEPS dropout_compute_arm) -lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS transpose_compute_arm) +lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS transpose_compute_arm COMPILE_LEVEL extra) lite_cc_test(test_argmax_compute_arm SRCS argmax_compute_test.cc DEPS argmax_compute_arm) lite_cc_test(test_axpy_compute_arm SRCS axpy_compute_test.cc DEPS axpy_compute_arm) lite_cc_test(test_conv_transpose_compute_arm SRCS conv_transpose_compute_test.cc DEPS conv_transpose_compute_arm) - - -set(arm_kernels - fc_compute_arm - activation_compute_arm - mul_compute_arm - matmul_compute_arm - scale_compute_arm - softmax_compute_arm - conv_compute_arm - batch_norm_compute_arm - elementwise_compute_arm - lrn_compute_arm - decode_bboxes_compute_arm - multiclass_nms_compute_arm - pool_compute_arm - split_compute_arm - concat_compute_arm - pad2d_compute_arm - prior_box_compute_arm - density_prior_box_compute_arm - negative_compute_arm - crop_compute_arm - dropout_compute_arm - transpose_compute_arm - calib_compute_arm - argmax_compute_arm - axpy_compute_arm - conv_transpose_compute_arm - gru_unit_compute_arm - gru_compute_arm - beam_search_decode_compute_arm - lookup_table_compute_arm - im2sequence_compute_arm - sequence_softmax_compute_arm - norm_compute_arm - power_compute_arm - shuffle_channel_compute_arm - yolo_box_compute_arm - interpolate_compute_arm - logical_compute_arm - less_than_arm - while_compute_arm - compare_compute_arm - topk_compute_arm - increment_compute_arm - write_to_array_compute_arm - read_from_array_compute_arm - beam_search_compute_arm - fill_constant_compute_arm - lod_reset_compute_arm - box_coder_compute_arm - reduce_max_compute_arm - sequence_expand_compute_arm - sequence_pool_compute_arm - is_empty_compute_arm - shape_compute_arm - slice_compute_arm - cast_compute_arm - squeeze_compute_arm - expand_compute_arm - ) - -set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels") diff --git a/lite/kernels/fpga/CMakeLists.txt b/lite/kernels/fpga/CMakeLists.txt index 36d6ccc255..dc88601880 100644 --- a/lite/kernels/fpga/CMakeLists.txt +++ b/lite/kernels/fpga/CMakeLists.txt @@ -1,50 +1,32 @@ if (NOT LITE_WITH_FPGA) return() endif() -message("fpga : ${lite_kernel_deps}") set(fpga_deps fpga_target_wrapper kernel_fpga) -lite_cc_library(activation_compute_fpga SRCS activation_compute.cc DEPS ${fpga_deps}) +add_kernel(activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS ${fpga_deps}) lite_cc_test(test_acivation_fpga SRCS activation_compute_test.cc DEPS ${lite_kernel_deps} activation_compute_fpga ${fpga_deps}) -lite_cc_library(conv_compute_fpga SRCS conv_compute.cc DEPS ${fpga_deps}) +add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps}) lite_cc_test(test_conv_fpga SRCS conv_compute_test.cc DEPS ${lite_kernel_deps} conv_compute_fpga ${fpga_deps}) -lite_cc_library(elementwise_compute_fpga SRCS elementwise_compute.cc DEPS ${fpga_deps}) +add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps}) lite_cc_test(test_elementwise_fpga SRCS elementwise_compute_test.cc DEPS ${lite_kernel_deps} elementwise_compute_fpga ${fpga_deps}) -lite_cc_library(pooling_compute_fpga SRCS pooling_compute.cc DEPS ${fpga_deps}) +add_kernel(pooling_compute_fpga FPGA basic SRCS pooling_compute.cc DEPS ${fpga_deps}) lite_cc_test(test_pooling_compute_fpga SRCS pooling_compute_test.cc DEPS ${lite_kernel_deps} pooling_compute_fpga ${fpga_deps}) -lite_cc_library(scale_compute_fpga SRCS scale_compute.cc DEPS ${fpga_deps}) +add_kernel(scale_compute_fpga FPGA basic SRCS scale_compute.cc DEPS ${fpga_deps}) -lite_cc_library(softmax_compute_fpga SRCS softmax_compute.cc DEPS ${fpga_deps}) +add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps}) lite_cc_test(test_softmax_compute_fpga SRCS softmax_compute_test.cc DEPS ${lite_kernel_deps} softmax_compute_fpga ${fpga_deps}) -lite_cc_library(fc_compute_fpga SRCS fc_compute.cc DEPS ${fpga_deps}) +add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps}) lite_cc_test(test_fc_compute_fpga SRCS fc_compute_test.cc DEPS ${lite_kernel_deps} fc_compute_fpga ${fpga_deps}) -lite_cc_library(io_copy_compute_fpga SRCS io_copy_compute.cc DEPS ${fpga_deps}) -lite_cc_library(calib_compute_fpga SRCS calib_compute.cc DEPS ${fpga_deps}) -lite_cc_library(layout_compute_fpga SRCS layout_compute.cc DEPS ${fpga_deps}) -lite_cc_library(feed_compute_fpga SRCS feed_compute.cc DEPS ${fpga_deps}) -lite_cc_library(fetch_compute_fpga SRCS fetch_compute.cc DEPS ${fpga_deps}) - -set (fpga_kernels - activation_compute_fpga - conv_compute_fpga - elementwise_compute_fpga - pooling_compute_fpga - scale_compute_fpga - softmax_compute_fpga - fc_compute_fpga - io_copy_compute_fpga - calib_compute_fpga - layout_compute_fpga - feed_compute_fpga - fetch_compute_fpga -) - -set(fpga_kernels "${fpga_kernels}" CACHE INTERNAL "fpga kernels") +add_kernel(io_copy_compute_fpga FPGA basic SRCS io_copy_compute.cc DEPS ${fpga_deps}) +add_kernel(calib_compute_fpga FPGA basic SRCS calib_compute.cc DEPS ${fpga_deps}) +add_kernel(layout_compute_fpga FPGA basic SRCS layout_compute.cc DEPS ${fpga_deps}) +add_kernel(feed_compute_fpga FPGA basic SRCS feed_compute.cc DEPS ${fpga_deps}) +add_kernel(fetch_compute_fpga FPGA basic SRCS fetch_compute.cc DEPS ${fpga_deps}) diff --git a/lite/kernels/host/CMakeLists.txt b/lite/kernels/host/CMakeLists.txt index 5f93051f2b..abd96317cc 100644 --- a/lite/kernels/host/CMakeLists.txt +++ b/lite/kernels/host/CMakeLists.txt @@ -1,17 +1,7 @@ message(STATUS "compile with lite host kernels") -lite_cc_library(feed_compute_host SRCS feed_compute.cc DEPS ${lite_kernel_deps}) -lite_cc_library(fetch_compute_host SRCS fetch_compute.cc DEPS ${lite_kernel_deps}) -lite_cc_library(reshape_compute_host SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op) +add_kernel(feed_compute_host Host basic SRCS feed_compute.cc DEPS ${lite_kernel_deps}) +add_kernel(fetch_compute_host Host basic SRCS fetch_compute.cc DEPS ${lite_kernel_deps}) +add_kernel(reshape_compute_host Host basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op) -lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host) - -set(host_kernels - feed_compute_host - fetch_compute_host - reshape_compute_host - ) - -set(host_kernels "${host_kernels}" CACHE GLOBAL "host kernels") - - +lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host any) diff --git a/lite/kernels/npu/CMakeLists.txt b/lite/kernels/npu/CMakeLists.txt index 2ef9bf03b2..960dbff8db 100644 --- a/lite/kernels/npu/CMakeLists.txt +++ b/lite/kernels/npu/CMakeLists.txt @@ -2,12 +2,8 @@ if(NOT LITE_WITH_NPU) return () endif() - + message(STATUS "compile with lite NPU kernels") -lite_cc_library(graph_compute_npu SRCS graph_compute.cc DEPS ${lite_kernel_deps} ${npu_ddk_libs}) +add_kernel(graph_compute_npu NPU basic SRCS graph_compute.cc DEPS ${lite_kernel_deps} ${npu_ddk_libs}) # lite_cc_test(test_graph_compute_npu SRCS graph_compute_test.cc DEPS graph_compute_npu) - -set(npu_kernels graph_compute_npu) -set(npu_kernels "${npu_kernels}" CACHE INTERNAL "npu kernels") - diff --git a/lite/kernels/opencl/CMakeLists.txt b/lite/kernels/opencl/CMakeLists.txt index 68662bf17e..dc1ff6b97f 100644 --- a/lite/kernels/opencl/CMakeLists.txt +++ b/lite/kernels/opencl/CMakeLists.txt @@ -4,17 +4,17 @@ endif() set(cl_kernel_deps op_params cl_runtime cl_context cl_wrapper cl_target_wrapper) -lite_cc_library(fc_opencl SRCS fc_compute.cc DEPS ${cl_kernel_deps}) -lite_cc_library(mul_opencl SRCS mul_compute.cc DEPS ${cl_kernel_deps}) -lite_cc_library(elementwise_add_opencl SRCS elementwise_add_compute.cc DEPS ${cl_kernel_deps}) -lite_cc_library(fusion_elementwise_add_activation_opencl - SRCS fusion_elementwise_add_activation_compute.cc +add_kernel(fc_opencl OPENCL basic SRCS fc_compute.cc DEPS ${cl_kernel_deps}) +add_kernel(mul_opencl OPENCL basic SRCS mul_compute.cc DEPS ${cl_kernel_deps}) +add_kernel(elementwise_add_opencl OPENCL basic SRCS elementwise_add_compute.cc DEPS ${cl_kernel_deps}) +add_kernel(fusion_elementwise_add_activation_opencl + OPENCL basic SRCS fusion_elementwise_add_activation_compute.cc DEPS elementwise_add_opencl ${cl_kernel_deps}) -lite_cc_library(pool_opencl SRCS pool_compute.cc DEPS ${cl_kernel_deps}) -lite_cc_library(io_copy_compute_opencl SRCS io_copy_compute.cc DEPS ${tensor_lite} ${cl_kernel_deps}) -lite_cc_library(relu_opencl SRCS relu_compute.cc DEPS ${cl_kernel_deps}) -lite_cc_library(depthwise_conv2d_opencl SRCS depthwise_conv2d_compute.cc DEPS ${cl_kernel_deps}) -lite_cc_library(conv_opencl SRCS conv_compute.cc DEPS ${cl_kernel_deps}) +add_kernel(pool_opencl OPENCL basic SRCS pool_compute.cc DEPS ${cl_kernel_deps}) +add_kernel(io_copy_compute_opencl OPENCL basic SRCS io_copy_compute.cc DEPS ${tensor_lite} ${cl_kernel_deps}) +add_kernel(relu_opencl OPENCL basic SRCS relu_compute.cc DEPS ${cl_kernel_deps}) +add_kernel(depthwise_conv2d_opencl OPENCL basic SRCS depthwise_conv2d_compute.cc DEPS ${cl_kernel_deps}) +add_kernel(conv_opencl OPENCL basic SRCS conv_compute.cc DEPS ${cl_kernel_deps}) lite_cc_test(test_elementwise_add_opencl SRCS elementwise_add_compute_test.cc DEPS elementwise_add_opencl fusion_elementwise_add_activation_opencl op_registry program context @@ -47,15 +47,3 @@ lite_cc_test(test_depthwise_conv2d_opencl SRCS depthwise_conv2d_compute_test.cc lite_cc_test(test_conv_opencl SRCS conv_compute_test.cc DEPS conv_opencl op_registry program context ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/opencl) - -set(opencl_kernels - io_copy_compute_opencl - elementwise_add_opencl - fusion_elementwise_add_activation_opencl - pool_opencl - relu_opencl - mul_opencl - fc_opencl - depthwise_conv2d_opencl - conv_opencl - CACHE INTERNAL "opencl_kernels") diff --git a/lite/kernels/x86/CMakeLists.txt b/lite/kernels/x86/CMakeLists.txt index 3a6d72f5fb..7080cc8c55 100644 --- a/lite/kernels/x86/CMakeLists.txt +++ b/lite/kernels/x86/CMakeLists.txt @@ -10,7 +10,7 @@ endif() # lite_cc_library(fc_compute_x86 SRCS fc_compute.cc DEPS ${lite_kernel_deps}) # lite_cc_library(mul_compute_x86 SRCS mul_compute.cc DEPS ${lite_kernel_deps}) # lite_cc_library(relu_compute_x86 SRCS relu_compute.cc DEPS ${lite_kernel_deps}) -lite_cc_library(scale_compute_x86 SRCS scale_compute.cc DEPS ${lite_kernel_deps}) +add_kernel(scale_compute_x86 X86 basic SRCS scale_compute.cc DEPS ${lite_kernel_deps}) # lite_cc_library(elementwise_compute_x86 SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_sub_op elementwise_add_op) # lite_cc_library(softmax_compute_x86 SRCS softmax_compute.cc DEPS ${lite_kernel_deps} softmax) # lite_cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps} ) @@ -31,23 +31,3 @@ lite_cc_library(scale_compute_x86 SRCS scale_compute.cc DEPS ${lite_kernel_deps} # lite_cc_test(test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86) # lite_cc_test(test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86) # lite_cc_test(test_batch_norm_compute_x86 SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_x86) - - -set(x86_kernels -# activation_compute_x86 -# elementwise_compute_x86 -# mean_compute_x86 -# fill_constant_compute_x86 -# mul_compute_x86 -# relu_compute_x86 -# fc_compute_x86 - scale_compute_x86 -# softmax_compute_x86 -# dropout_compute_x86 -# concat_compute_x86 -# conv_compute_x86 -# pool_compute_x86 -# batch_norm_compute_x86 -# uniform_random_compute_x86 -# sgd_compute_x86 - CACHE INTERNAL "x86 kernels") diff --git a/lite/operators/CMakeLists.txt b/lite/operators/CMakeLists.txt index 4e7f7436e3..f46c0f02d6 100644 --- a/lite/operators/CMakeLists.txt +++ b/lite/operators/CMakeLists.txt @@ -1,180 +1,92 @@ -set(op_DEPS tensor op op_params) - -lite_cc_library(conv_op SRCS conv_op.cc DEPS ${op_DEPS}) -lite_cc_library(pool_op SRCS pool_op.cc DEPS ${op_DEPS}) -lite_cc_library(fc_op SRCS fc_op.cc DEPS ${op_DEPS}) -lite_cc_library(relu_op SRCS relu_op.cc DEPS ${op_DEPS}) -lite_cc_library(mul_op SRCS mul_op.cc DEPS ${op_DEPS}) -lite_cc_library(matmul_op SRCS matmul_op.cc DEPS ${op_DEPS}) -lite_cc_library(scale_op SRCS scale_op.cc DEPS ${op_DEPS}) -lite_cc_library(softmax_op SRCS softmax_op.cc DEPS ${op_DEPS}) -lite_cc_library(reshape_op SRCS reshape_op.cc DEPS ${op_DEPS} ) -lite_cc_library(flatten_op SRCS flatten_op.cc DEPS ${op_DEPS} ) -lite_cc_library(batch_norm_op SRCS batch_norm_op.cc DEPS ${op_DEPS}) -lite_cc_library(feed_op SRCS feed_op.cc DEPS ${op_DEPS}) -lite_cc_library(fetch_op SRCS fetch_op.cc DEPS ${op_DEPS}) -lite_cc_library(io_copy_op SRCS io_copy_op.cc DEPS ${op_DEPS}) -lite_cc_library(io_copy_once_op SRCS io_copy_once_op.cc DEPS io_copy_op ${op_DEPS}) -lite_cc_library(activation_ops SRCS activation_ops.cc DEPS ${op_DEPS}) -lite_cc_library(elementwise_ops SRCS elementwise_ops.cc DEPS ${op_DEPS}) -lite_cc_library(lrn_op_lite SRCS lrn_op.cc DEPS ${op_DEPS}) -lite_cc_library(decode_bboxes_op_lite SRCS decode_bboxes_op.cc DEPS ${op_DEPS}) -lite_cc_library(box_coder_op_lite SRCS box_coder_op.cc DEPS ${op_DEPS}) -lite_cc_library(multiclass_nms_op_lite SRCS multiclass_nms_op.cc DEPS ${op_DEPS}) -lite_cc_library(fusion_elementwise_activation_ops SRCS fusion_elementwise_activation_ops.cc DEPS elementwise_ops ${op_DEPS}) -lite_cc_library(mean_op SRCS mean_op.cc DEPS ${op_DEPS}) -lite_cc_library(fill_constant_op SRCS fill_constant_op.cc DEPS ${op_DEPS}) -lite_cc_library(sgd_op SRCS sgd_op.cc DEPS ${op_DEPS}) -lite_cc_library(uniform_random_op SRCS uniform_random_op.cc DEPS ${op_DEPS}) -lite_cc_library(power_op SRCS power_op.cc DEPS ${op_DEPS}) -lite_cc_library(shuffle_channel_op SRCS shuffle_channel_op.cc DEPS ${op_DEPS}) -lite_cc_library(yolo_box_op SRCS yolo_box_op.cc DEPS ${op_DEPS}) -lite_cc_library(interpolate_op SRCS interpolate_op.cc DEPS ${op_DEPS}) -lite_cc_library(argmax_op SRCS argmax_op.cc DEPS ${op_DEPS}) -lite_cc_library(axpy_op SRCS axpy_op.cc DEPS ${op_DEPS}) -lite_cc_library(gru_unit_op SRCS gru_unit_op.cc DEPS ${op_DEPS}) -lite_cc_library(gru_op SRCS gru_op.cc DEPS ${op_DEPS}) -lite_cc_library(layout_op SRCS layout_op.cc DEPS ${op_DEPS}) -lite_cc_library(layout_once_op SRCS layout_once_op.cc DEPS ${op_DEPS}) -lite_cc_library(while_op SRCS while_op.cc DEPS ${op_DEPS}) -lite_cc_library(lookup_table_op SRCS lookup_table_op.cc DEPS ${op_DEPS}) -lite_cc_library(beam_search_decode_op SRCS beam_search_decode_op.cc DEPS ${op_DEPS}) -lite_cc_library(prior_box_op SRCS prior_box_op.cc DEPS ${op_DEPS}) -lite_cc_library(density_prior_box_op SRCS density_prior_box_op.cc DEPS ${op_DEPS}) +set(op_DEPS tensor op op_params scope memory) lite_cc_library(op_params SRCS op_params.cc DEPS tensor any) -lite_cc_library(dropout_op SRCS dropout_op.cc DEPS ${op_DEPS}) -lite_cc_library(concat_op SRCS concat_op.cc DEPS ${op_DEPS}) -lite_cc_library(pad2d_op SRCS pad2d_op.cc DEPS ${op_DEPS}) -lite_cc_library(negative_op SRCS negative_op.cc DEPS ${op_DEPS}) -lite_cc_library(crop_op SRCS crop_op.cc DEPS ${op_DEPS}) -lite_cc_library(calib_op SRCS calib_op.cc DEPS ${op_DEPS}) -lite_cc_library(calib_once_op SRCS calib_once_op.cc DEPS ${op_DEPS}) -lite_cc_library(split_op SRCS split_op.cc DEPS ${op_DEPS}) -lite_cc_library(transpose_op SRCS transpose_op.cc DEPS ${op_DEPS}) -lite_cc_library(fake_quant SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op_DEPS}) -lite_cc_library(fake_quant_range SRCS fake_quantize_range_abs_max.cc DEPS ${op_DEPS}) -lite_cc_library(fake_dequant SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS}) -lite_cc_library(conv_transpose_op SRCS conv_transpose_op.cc DEPS ${op_DEPS}) -lite_cc_library(im2sequence_op SRCS im2sequence_op.cc DEPS ${op_DEPS}) -lite_cc_library(sequence_softmax_op SRCS sequence_softmax_op.cc DEPS ${op_DEPS}) -lite_cc_library(norm_op SRCS norm_op.cc DEPS ${op_DEPS}) -lite_cc_library(graph_op SRCS graph_op.cc DEPS ${op_DEPS}) -lite_cc_library(topk_op SRCS topk_op.cc DEPS ${op_DEPS}) -lite_cc_library(increment_op SRCS increment_op.cc DEPS ${op_DEPS}) -lite_cc_library(write_to_array_op SRCS write_to_array_op.cc DEPS ${op_DEPS}) -lite_cc_library(graph_op_lite SRCS graph_op.cc DEPS ${op_DEPS}) -lite_cc_library(logical_xor SRCS logical_op.cc DEPS ${op_DEPS}) -lite_cc_library(logical_and SRCS logical_op.cc DEPS ${op_DEPS}) -lite_cc_library(logical_or SRCS logical_op.cc DEPS ${op_DEPS}) -lite_cc_library(logical_not SRCS logical_op.cc DEPS ${op_DEPS}) -lite_cc_library(less_than SRCS compare_op.cc DEPS ${op_DEPS}) -lite_cc_library(equal SRCS compare_op.cc DEPS ${op_DEPS}) -lite_cc_library(not_equal SRCS compare_op.cc DEPS ${op_DEPS}) -lite_cc_library(less_equal SRCS compare_op.cc DEPS ${op_DEPS}) -lite_cc_library(greater_than SRCS compare_op.cc DEPS ${op_DEPS}) -lite_cc_library(greater_equal SRCS compare_op.cc DEPS ${op_DEPS}) -lite_cc_library(read_from_array_op SRCS read_from_array_op.cc DEPS ${op_DEPS}) -lite_cc_library(beam_search_op SRCS beam_search_op.cc DEPS ${op_DEPS}) -lite_cc_library(sequence_pool_op_lite SRCS sequence_pool_op.cc DEPS ${op_DEPS}) -lite_cc_library(sequence_expand_op_lite SRCS sequence_expand_op.cc DEPS ${op_DEPS}) -lite_cc_library(reduce_max_op_lite SRCS reduce_max_op.cc DEPS ${op_DEPS}) -lite_cc_library(lod_reset_op SRCS lod_reset_op.cc DEPS ${op_DEPS}) -lite_cc_library(is_empty SRCS is_empty_op.cc DEPS ${op_DEPS}) -lite_cc_library(shape_op_lite SRCS shape_op.cc DEPS ${op_DEPS}) -lite_cc_library(cast_op_lite SRCS cast_op.cc DEPS ${op_DEPS}) -lite_cc_library(slice_op_lite SRCS slice_op.cc DEPS ${op_DEPS}) -lite_cc_library(squeeze_op_lite SRCS squeeze_op.cc DEPS ${op_DEPS}) -lite_cc_library(expand_op_lite SRCS expand_op.cc DEPS ${op_DEPS}) +add_operator(conv_op basic SRCS conv_op.cc DEPS ${op_DEPS}) +add_operator(pool_op basic SRCS pool_op.cc DEPS ${op_DEPS}) +add_operator(fc_op basic SRCS fc_op.cc DEPS ${op_DEPS}) +add_operator(relu_op basic SRCS relu_op.cc DEPS ${op_DEPS}) +add_operator(mul_op basic SRCS mul_op.cc DEPS ${op_DEPS}) +add_operator(matmul_op basic SRCS matmul_op.cc DEPS ${op_DEPS}) +add_operator(scale_op basic SRCS scale_op.cc DEPS ${op_DEPS}) +add_operator(softmax_op basic SRCS softmax_op.cc DEPS ${op_DEPS}) +add_operator(reshape_op basic SRCS reshape_op.cc DEPS ${op_DEPS} ) +add_operator(batch_norm_op basic SRCS batch_norm_op.cc DEPS ${op_DEPS}) +add_operator(feed_op basic SRCS feed_op.cc DEPS ${op_DEPS}) +add_operator(fetch_op basic SRCS fetch_op.cc DEPS ${op_DEPS}) +add_operator(io_copy_op basic SRCS io_copy_op.cc DEPS ${op_DEPS}) +add_operator(io_copy_once_op basic SRCS io_copy_once_op.cc DEPS io_copy_op ${op_DEPS}) +add_operator(activation_ops basic SRCS activation_ops.cc DEPS ${op_DEPS}) +add_operator(elementwise_ops basic SRCS elementwise_ops.cc DEPS ${op_DEPS}) +add_operator(lrn_op_lite basic SRCS lrn_op.cc DEPS ${op_DEPS}) +add_operator(decode_bboxes_op_lite basic SRCS decode_bboxes_op.cc DEPS ${op_DEPS}) +add_operator(box_coder_op_lite basic SRCS box_coder_op.cc DEPS ${op_DEPS}) +add_operator(multiclass_nms_op_lite basic SRCS multiclass_nms_op.cc DEPS ${op_DEPS}) +add_operator(fusion_elementwise_activation_ops basic SRCS fusion_elementwise_activation_ops.cc DEPS elementwise_ops ${op_DEPS}) +add_operator(mean_op basic SRCS mean_op.cc DEPS ${op_DEPS}) +add_operator(fill_constant_op basic SRCS fill_constant_op.cc DEPS ${op_DEPS}) +#add_operator(sgd_op basic SRCS sgd_op.cc DEPS ${op_DEPS}) +add_operator(uniform_random_op basic SRCS uniform_random_op.cc DEPS ${op_DEPS}) +add_operator(power_op basic SRCS power_op.cc DEPS ${op_DEPS}) +add_operator(shuffle_channel_op basic SRCS shuffle_channel_op.cc DEPS ${op_DEPS}) +add_operator(yolo_box_op basic SRCS yolo_box_op.cc DEPS ${op_DEPS}) +add_operator(interpolate_op basic SRCS interpolate_op.cc DEPS ${op_DEPS}) +add_operator(argmax_op basic SRCS argmax_op.cc DEPS ${op_DEPS}) +add_operator(axpy_op basic SRCS axpy_op.cc DEPS ${op_DEPS}) +add_operator(gru_unit_op basic SRCS gru_unit_op.cc DEPS ${op_DEPS}) +add_operator(gru_op basic SRCS gru_op.cc DEPS ${op_DEPS}) +add_operator(layout_op basic SRCS layout_op.cc DEPS ${op_DEPS}) +add_operator(layout_once_op basic SRCS layout_once_op.cc DEPS ${op_DEPS}) +add_operator(prior_box_op basic SRCS prior_box_op.cc DEPS ${op_DEPS}) +add_operator(density_prior_box_op basic SRCS density_prior_box_op.cc DEPS ${op_DEPS}) +add_operator(dropout_op basic SRCS dropout_op.cc DEPS ${op_DEPS}) +add_operator(concat_op basic SRCS concat_op.cc DEPS ${op_DEPS}) +add_operator(pad2d_op basic SRCS pad2d_op.cc DEPS ${op_DEPS}) +add_operator(negative_op basic SRCS negative_op.cc DEPS ${op_DEPS}) +add_operator(crop_op basic SRCS crop_op.cc DEPS ${op_DEPS}) +add_operator(calib_op basic SRCS calib_op.cc DEPS ${op_DEPS}) +add_operator(calib_once_op basic SRCS calib_once_op.cc DEPS ${op_DEPS}) +add_operator(split_op basic SRCS split_op.cc DEPS ${op_DEPS}) +add_operator(transpose_op basic SRCS transpose_op.cc DEPS ${op_DEPS}) +add_operator(fake_quant basic SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op_DEPS}) +add_operator(fake_dequant basic SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS}) +add_operator(conv_transpose_op basic SRCS conv_transpose_op.cc DEPS ${op_DEPS}) +add_operator(graph_op basic SRCS graph_op.cc DEPS ${op_DEPS}) +add_operator(expand_op_lite basic SRCS expand_op.cc DEPS ${op_DEPS}) +add_operator(reduce_max_op_lite basic SRCS reduce_max_op.cc DEPS ${op_DEPS}) +add_operator(norm_op basic SRCS norm_op.cc DEPS ${op_DEPS}) +add_operator(shape_op_lite basic SRCS shape_op.cc DEPS ${op_DEPS}) +add_operator(sequence_expand_op_lite basic SRCS sequence_expand_op.cc DEPS ${op_DEPS}) +add_operator(squeeze_op_lite basic SRCS squeeze_op.cc DEPS ${op_DEPS}) + +# for OCR specific +add_operator(im2sequence_op extra SRCS im2sequence_op.cc DEPS ${op_DEPS}) +add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS}) +add_operator(lookup_table_op extra SRCS lookup_table_op.cc DEPS ${op_DEPS}) +add_operator(beam_search_decode_op extra SRCS beam_search_decode_op.cc DEPS ${op_DEPS}) +add_operator(graph_op_lite extra SRCS graph_op.cc DEPS ${op_DEPS}) +add_operator(logical_xor extra SRCS logical_op.cc DEPS ${op_DEPS}) +add_operator(logical_and extra SRCS logical_op.cc DEPS ${op_DEPS}) +add_operator(logical_or extra SRCS logical_op.cc DEPS ${op_DEPS}) +add_operator(logical_not extra SRCS logical_op.cc DEPS ${op_DEPS}) +add_operator(less_than extra SRCS compare_op.cc DEPS ${op_DEPS}) +add_operator(equal extra SRCS compare_op.cc DEPS ${op_DEPS}) +add_operator(not_equal extra SRCS compare_op.cc DEPS ${op_DEPS}) +add_operator(less_equal extra SRCS compare_op.cc DEPS ${op_DEPS}) +add_operator(greater_than extra SRCS compare_op.cc DEPS ${op_DEPS}) +add_operator(greater_equal extra SRCS compare_op.cc DEPS ${op_DEPS}) +add_operator(read_from_array_op extra SRCS read_from_array_op.cc DEPS ${op_DEPS}) +add_operator(beam_search_op extra SRCS beam_search_op.cc DEPS ${op_DEPS}) +add_operator(sequence_pool_op_lite extra SRCS sequence_pool_op.cc DEPS ${op_DEPS}) +add_operator(lod_reset_op extra SRCS lod_reset_op.cc DEPS ${op_DEPS}) +add_operator(is_empty extra SRCS is_empty_op.cc DEPS ${op_DEPS}) +add_operator(cast_op_lite extra SRCS cast_op.cc DEPS ${op_DEPS}) +add_operator(slice_op_lite extra SRCS slice_op.cc DEPS ${op_DEPS}) +add_operator(write_to_array_op extra SRCS write_to_array_op.cc DEPS ${op_DEPS}) +add_operator(topk_op extra SRCS topk_op.cc DEPS ${op_DEPS}) +add_operator(increment_op extra SRCS increment_op.cc DEPS ${op_DEPS}) +add_operator(sequence_softmax_op extra SRCS sequence_softmax_op.cc DEPS ${op_DEPS}) -set(ops - conv_op - pool_op - fc_op - relu_op - mul_op - matmul_op - scale_op - softmax_op - reshape_op - flatten_op - batch_norm_op - feed_op - fetch_op - gru_unit_op - gru_op - beam_search_decode_op - lookup_table_op - io_copy_op - io_copy_once_op - elementwise_ops - fusion_elementwise_activation_ops - lrn_op_lite - decode_bboxes_op_lite - multiclass_nms_op_lite - decode_bboxes_op_lite - box_coder_op_lite - multiclass_nms_op_lite - mean_op - fill_constant_op - activation_ops - dropout_op - concat_op - pad2d_op - crop_op - prior_box_op - density_prior_box_op - negative_op - calib_op - calib_once_op - split_op - transpose_op - fake_quant - fake_quant_range - fake_dequant - sgd_op - uniform_random_op - power_op - yolo_box_op - shuffle_channel_op - argmax_op - axpy_op - conv_transpose_op - im2sequence_op - sequence_softmax_op - norm_op - layout_op - layout_once_op - interpolate_op - logical_xor - logical_and - logical_or - logical_not - equal - not_equal - less_than - while_op - less_equal - greater_than - greater_equal - graph_op - topk_op - increment_op - write_to_array_op - read_from_array_op - beam_search_op - sequence_pool_op_lite - sequence_expand_op_lite - reduce_max_op_lite - lod_reset_op - is_empty - shape_op_lite - cast_op_lite - slice_op_lite - squeeze_op_lite - expand_op_lite - CACHE INTERNAL "ops lite") if (NOT LITE_WITH_X86) lite_cc_test(test_fc_op SRCS fc_op_test.cc @@ -188,7 +100,7 @@ if (NOT LITE_WITH_X86) lite_cc_test(test_softmax_op SRCS softmax_op_test.cc DEPS softmax_op memory) #lite_cc_test(test_reshape_op SRCS reshape_op_test.cc DEPS reshape_op memory) lite_cc_test(test_batch_norm_op SRCS batch_norm_op_test.cc DEPS batch_norm_op memory) - lite_cc_test(test_concat_op SRCS concat_op_test.cc DEPS concat_op memory) + lite_cc_test(test_concat_op SRCS concat_op_test.cc DEPS concat_op memory scope) lite_cc_test(test_calib_op SRCS calib_op_test.cc DEPS calib_op memory ARM_DEPS calib_compute_arm) lite_cc_test(test_fusion_elementwise_activation_ops SRCS fusion_elementwise_activation_ops_test.cc diff --git a/lite/tests/kernels/CMakeLists.txt b/lite/tests/kernels/CMakeLists.txt index 777e4408c0..8ef2532ac0 100644 --- a/lite/tests/kernels/CMakeLists.txt +++ b/lite/tests/kernels/CMakeLists.txt @@ -21,7 +21,13 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH #lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) #lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + +if(LITE_BUILD_EXTRA) lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) + lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) +endif() + lite_cc_test(test_sgemm SRCS test_sgemm.cc DEPS ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) @@ -31,9 +37,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH lite_cc_test(test_kernel_nearest_interp_compute SRCS nearest_interp_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) - lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels}) diff --git a/lite/tools/build.sh b/lite/tools/build.sh index 1fe8fd7dc4..f31319998d 100755 --- a/lite/tools/build.sh +++ b/lite/tools/build.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -ex readonly CMAKE_COMMON_OPTIONS="-DWITH_GPU=OFF \ -DWITH_MKL=OFF \ diff --git a/lite/tools/cmake_tools/parse_kernel_registry.py b/lite/tools/cmake_tools/parse_kernel_registry.py new file mode 100644 index 0000000000..99804748f3 --- /dev/null +++ b/lite/tools/cmake_tools/parse_kernel_registry.py @@ -0,0 +1,59 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import logging + +ops_list_path = sys.argv[1] +dest_path = sys.argv[2] + +out_lines = [ + '#pragma once', + '#include "paddle_lite_factory_helper.h"', + '', +] + +with open(ops_list_path) as f: + for line in f: + path = line.strip() + + status = '' + with open(path) as g: + lines = [v for v in g] + for i in range(len(lines)): + line = lines[i].strip() + + if not status: + key = 'REGISTER_LITE_KERNEL' + if line.startswith(key): + forward = i + min(7, len(lines) - i) + remaining = line[len(key) + 1:] + ' '.join( + [v.strip() for v in lines[i + 1:forward]]) + + x = remaining.find('.') + if x > 0: + remaining = remaining[:x] + + fs = [v.strip() for v in remaining.split(',')] + assert (len(fs) >= 4) + op, target, precision, layout, __, alias = fs[:6] + alias = alias.replace(')', '') + + key = "USE_LITE_KERNEL(%s, %s, %s, %s, %s);" % ( + op, target, precision, layout, alias) + out_lines.append(key) + +with open(dest_path, 'w') as f: + logging.info("write kernel list to %s" % dest_path) + f.write('\n'.join(out_lines)) diff --git a/lite/tools/cmake_tools/parse_op_registry.py b/lite/tools/cmake_tools/parse_op_registry.py new file mode 100644 index 0000000000..423036f6e8 --- /dev/null +++ b/lite/tools/cmake_tools/parse_op_registry.py @@ -0,0 +1,45 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +''' Collect op registry information. ''' + +import sys +import logging + +ops_list_path = sys.argv[1] +dest_path = sys.argv[2] + +out_lines = [ + '#pragma once', + '#include "paddle_lite_factory_helper.h"', + '', +] + +with open(ops_list_path) as f: + for line in f: + path = line.strip() + + with open(path) as g: + for line in g: + key = 'REGISTER_LITE_OP' + if line.startswith(key): + end = line.find(',') + op = line[len(key) + 1:end] + if not op: continue + if "_grad" in op: continue + out = "USE_LITE_OP(%s);" % op + out_lines.append(out) + +with open(dest_path, 'w') as f: + logging.info("write op list to %s" % dest_path) + f.write('\n'.join(out_lines)) -- GitLab