未验证 提交 36419766 编写于 作者: Y Yan Chunwei 提交者: GitHub

Refactor op kernel compile system (#1831)

上级 2f3f75f5
......@@ -80,6 +80,8 @@ option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF)
option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF)
option(LITE_SHUTDOWN_LOG "Shutdown log system or not." OFF)
option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF)
# publish options
option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF)
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.")
......
......@@ -57,6 +57,8 @@ function (lite_deps TARGET)
endforeach(var)
endif()
if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
foreach(var ${lite_deps_HVY_DEPS})
set(deps ${deps} ${var})
......@@ -182,9 +184,16 @@ function(lite_cc_test TARGET)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS)
ARGS
COMPILE_LEVEL # (basic|extra)
)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (args_COMPILE_LEVEL STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
MESSAGE(STATUS "Ignore test ${TARGET} due to compile level ${args_COMPILE_LEVEL}")
return()
endif()
set(deps "")
lite_deps(deps
DEPS ${args_DEPS}
......@@ -207,6 +216,117 @@ function(lite_cc_test TARGET)
endif()
endfunction()
set(arm_kernels CACHE INTERNAL "arm kernels")
set(x86_kernels CACHE INTERNAL "x86 kernels")
set(fpga_kernels CACHE INTERNAL "fpga kernels")
set(npu_kernels CACHE INTERNAL "npu kernels")
set(opencl_kernels CACHE INTERNAL "opencl kernels")
set(host_kernels CACHE INTERNAL "host kernels")
set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt")
file(WRITE ${kernels_src_list} "") # clean
# add a kernel for some specific device
# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA)
# level: one of (basic, extra)
function(add_kernel TARGET device level)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
return()
endif()
if ("${device}" STREQUAL "Host")
set(host_kernels "${host_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "ARM")
if (NOT LITE_WITH_ARM)
return()
endif()
set(arm_kernels "${arm_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "X86")
if (NOT LITE_WITH_X86)
return()
endif()
set(x86_kernels "${x86_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "NPU")
if (NOT LITE_WITH_NPU)
return()
endif()
set(npu_kernels "${npu_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "FPGA")
if (NOT LITE_WITH_FPGA)
return()
endif()
set(fpga_kernels "${fpga_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "OPENCL")
if (NOT LITE_WITH_OPENCL)
return()
endif()
set(opencl_kernels "${opencl_kernels};${TARGET}" CACHE INTERNAL "")
endif()
foreach(src ${args_SRCS})
file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
lite_cc_library(${TARGET} SRCS ${args_SRCS}
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
)
endfunction()
set(ops CACHE INTERNAL "ops")
set(ops_src_list "${CMAKE_BINARY_DIR}/ops_src_list.txt")
file(WRITE ${ops_src_list} "") # clean
# add an operator
# level: one of (basic, extra)
function(add_operator TARGET level)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
return()
endif()
set(ops "${ops};${TARGET}" CACHE INTERNAL "source")
foreach(src ${args_SRCS})
file(APPEND ${ops_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
lite_cc_library(${TARGET} SRCS ${args_SRCS}
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
)
endfunction()
# Bundle several static libraries into one.
function(bundle_static_library tgt_name bundled_tgt_name fake_target)
......
......@@ -13,7 +13,6 @@ set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install")
set(LITE_ON_MOBILE ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK})
add_subdirectory(utils)
add_subdirectory(operators)
add_subdirectory(kernels)
......
......@@ -17,6 +17,7 @@ if(LITE_WITH_FPGA)
endif()
message(STATUS "get ops ${ops}")
message(STATUS "get X86 kernels ${x86_kernels}")
message(STATUS "get Host kernels ${host_kernels}")
message(STATUS "get ARM kernels ${arm_kernels}")
message(STATUS "get NPU kernels ${npu_kernels}")
......
......@@ -37,9 +37,36 @@ lite_cc_library(context SRCS context.cc DEPS tensor any cpu_info CL_DEPS cl_cont
else()
lite_cc_library(context SRCS context.cc DEPS tensor any cpu_info eigen3 CL_DEPS cl_context gflags)
endif()
lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor)
#----------------------------------------------- NOT CHANGE -----------------------------------------------
# A trick to generate the paddle_use_kernels.h
add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py
${kernels_src_list}
${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h
OUTPUT ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h
)
# A trick to generate the paddle_use_ops.h
add_custom_command(
COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_op_registry.py
${ops_src_list}
${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h
OUTPUT ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h
)
add_custom_target(op_list_h DEPENDS ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h)
add_custom_target(kernel_list_h DEPENDS ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h)
#----------------------------------------------- NOT CHANGE -----------------------------------------------
lite_cc_library(kernel SRCS kernel.cc DEPS context type_system target_wrapper any op_params tensor
)
lite_cc_library(op SRCS op_lite.cc DEPS scope op_registry target_wrapper kernel
cpp_op_desc tensor)
cpp_op_desc tensor
)
add_dependencies(kernel kernel_list_h)
add_dependencies(op op_list_h)
lite_cc_library(type_system SRCS type_system.cc DEPS tensor target_wrapper)
lite_cc_library(program SRCS program.cc
......@@ -73,3 +100,17 @@ lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils)
lite_cc_test(test_types SRCS types_test.cc DEPS types)
lite_cc_test(test_memory SRCS memory_test.cc DEPS memory)
lite_cc_test(test_context SRCS context_test.cc DEPS context)
# # A trick to generate the paddle_use_kernels.h
# execute_process(
# COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_kernel_registry.py
# ${kernels_src_list}
# ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_kernels.h
# )
# # A trick to generate the paddle_use_ops.h
# execute_process(
# COMMAND python ${CMAKE_SOURCE_DIR}/lite/tools/cmake_tools/parse_op_registry.py
# ${ops_src_list}
# ${CMAKE_SOURCE_DIR}/lite/api/paddle_use_ops.h
# )
message(STATUS "add lite kernels")
set(lite_kernel_deps type_system kernel op op_registry context tensor CACHE INTERNAL "" FORCE)
set(lite_kernel_deps type_system kernel op op_registry context tensor any CACHE INTERNAL "" FORCE)
add_subdirectory(host)
add_subdirectory(arm)
......
......@@ -4,64 +4,66 @@ endif()
message(STATUS "compile with lite ARM kernels")
lite_cc_library(fc_compute_arm SRCS fc_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(activation_compute_arm SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(matmul_compute_arm SRCS matmul_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(softmax_compute_arm SRCS softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(conv_compute_arm SRCS conv_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(batch_norm_compute_arm SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(elementwise_compute_arm SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(lrn_compute_arm SRCS lrn_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(decode_bboxes_compute_arm SRCS decode_bboxes_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(multiclass_nms_compute_arm SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(pool_compute_arm SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(split_compute_arm SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(concat_compute_arm SRCS concat_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(pad2d_compute_arm SRCS pad2d_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(prior_box_compute_arm SRCS prior_box_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(density_prior_box_compute_arm SRCS density_prior_box_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(negative_compute_arm SRCS negative_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(crop_compute_arm SRCS crop_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(dropout_compute_arm SRCS dropout_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(calib_compute_arm SRCS calib_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(transpose_compute_arm SRCS transpose_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(power_compute_arm SRCS power_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(yolo_box_compute_arm SRCS yolo_box_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(shuffle_channel_compute_arm SRCS shuffle_channel_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(argmax_compute_arm SRCS argmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(axpy_compute_arm SRCS axpy_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(conv_transpose_compute_arm SRCS conv_transpose_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(gru_unit_compute_arm SRCS gru_unit_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(gru_compute_arm SRCS gru_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(beam_search_decode_compute_arm SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(lookup_table_compute_arm SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(im2sequence_compute_arm SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(sequence_softmax_compute_arm SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(norm_compute_arm SRCS norm_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(interpolate_compute_arm SRCS interpolate_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(logical_compute_arm SRCS logical_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(less_than_arm SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(while_compute_arm SRCS while_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(compare_compute_arm SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(topk_compute_arm SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(increment_compute_arm SRCS increment_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(write_to_array_compute_arm SRCS write_to_array_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(read_from_array_compute_arm SRCS read_from_array_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(beam_search_compute_arm SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(fill_constant_compute_arm SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(lod_reset_compute_arm SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(box_coder_compute_arm SRCS box_coder_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(sequence_pool_compute_arm SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(sequence_expand_compute_arm SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(reduce_max_compute_arm SRCS reduce_max_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(is_empty_compute_arm SRCS is_empty_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(shape_compute_arm SRCS shape_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(slice_compute_arm SRCS slice_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(cast_compute_arm SRCS cast_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(squeeze_compute_arm SRCS squeeze_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_library(expand_compute_arm SRCS expand_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(fc_compute_arm ARM basic SRCS fc_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(activation_compute_arm ARM basic SRCS activation_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(mul_compute_arm ARM basic SRCS mul_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(matmul_compute_arm ARM basic SRCS matmul_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(scale_compute_arm ARM basic SRCS scale_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(softmax_compute_arm ARM basic SRCS softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(conv_compute_arm ARM basic SRCS conv_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(batch_norm_compute_arm ARM basic SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(elementwise_compute_arm ARM basic SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lrn_compute_arm ARM basic SRCS lrn_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(decode_bboxes_compute_arm ARM basic SRCS decode_bboxes_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(multiclass_nms_compute_arm ARM basic SRCS multiclass_nms_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(pool_compute_arm ARM basic SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(split_compute_arm ARM basic SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(concat_compute_arm ARM basic SRCS concat_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(pad2d_compute_arm ARM basic SRCS pad2d_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(prior_box_compute_arm ARM basic SRCS prior_box_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(density_prior_box_compute_arm ARM basic SRCS density_prior_box_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(negative_compute_arm ARM basic SRCS negative_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(crop_compute_arm ARM basic SRCS crop_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(dropout_compute_arm ARM basic SRCS dropout_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(calib_compute_arm ARM basic SRCS calib_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(transpose_compute_arm ARM basic SRCS transpose_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(power_compute_arm ARM basic SRCS power_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(yolo_box_compute_arm ARM basic SRCS yolo_box_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(shuffle_channel_compute_arm ARM basic SRCS shuffle_channel_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(argmax_compute_arm ARM basic SRCS argmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(axpy_compute_arm ARM basic SRCS axpy_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(conv_transpose_compute_arm ARM basic SRCS conv_transpose_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(norm_compute_arm ARM basic SRCS norm_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(interpolate_compute_arm ARM basic SRCS interpolate_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(box_coder_compute_arm ARM basic SRCS box_coder_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(shape_compute_arm ARM basic SRCS shape_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(slice_compute_arm ARM basic SRCS slice_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(cast_compute_arm ARM basic SRCS cast_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(squeeze_compute_arm ARM basic SRCS squeeze_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(expand_compute_arm ARM basic SRCS expand_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(reduce_max_compute_arm ARM basic SRCS reduce_max_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_expand_compute_arm ARM basic SRCS sequence_expand_compute.cc DEPS ${lite_kernel_deps} math_arm)
# for OCR specific
add_kernel(im2sequence_compute_arm ARM extra SRCS im2sequence_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_pool_compute_arm ARM extra SRCS sequence_pool_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(gru_unit_compute_arm ARM extra SRCS gru_unit_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(gru_compute_arm ARM extra SRCS gru_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(beam_search_decode_compute_arm ARM extra SRCS beam_search_decode_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lookup_table_compute_arm ARM extra SRCS lookup_table_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(logical_compute_arm ARM extra SRCS logical_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(sequence_softmax_compute_arm ARM extra SRCS sequence_softmax_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(less_than_arm ARM extra SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(while_compute_arm ARM extra SRCS while_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(compare_compute_arm ARM extra SRCS compare_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(topk_compute_arm ARM extra SRCS topk_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(increment_compute_arm ARM extra SRCS increment_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(write_to_array_compute_arm ARM extra SRCS write_to_array_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(read_from_array_compute_arm ARM extra SRCS read_from_array_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(beam_search_compute_arm ARM extra SRCS beam_search_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(fill_constant_compute_arm ARM extra SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(lod_reset_compute_arm ARM extra SRCS lod_reset_compute.cc DEPS ${lite_kernel_deps} math_arm)
add_kernel(is_empty_compute_arm ARM extra SRCS is_empty_compute.cc DEPS ${lite_kernel_deps} math_arm)
lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm)
lite_cc_test(test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_compute_arm)
......@@ -77,71 +79,7 @@ lite_cc_test(test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm)
lite_cc_test(test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm)
lite_cc_test(test_concat_compute_arm SRCS concat_compute_test.cc DEPS concat_compute_arm)
lite_cc_test(test_dropout_compute_arm SRCS dropout_compute_test.cc DEPS dropout_compute_arm)
lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS transpose_compute_arm)
lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS transpose_compute_arm COMPILE_LEVEL extra)
lite_cc_test(test_argmax_compute_arm SRCS argmax_compute_test.cc DEPS argmax_compute_arm)
lite_cc_test(test_axpy_compute_arm SRCS axpy_compute_test.cc DEPS axpy_compute_arm)
lite_cc_test(test_conv_transpose_compute_arm SRCS conv_transpose_compute_test.cc DEPS conv_transpose_compute_arm)
set(arm_kernels
fc_compute_arm
activation_compute_arm
mul_compute_arm
matmul_compute_arm
scale_compute_arm
softmax_compute_arm
conv_compute_arm
batch_norm_compute_arm
elementwise_compute_arm
lrn_compute_arm
decode_bboxes_compute_arm
multiclass_nms_compute_arm
pool_compute_arm
split_compute_arm
concat_compute_arm
pad2d_compute_arm
prior_box_compute_arm
density_prior_box_compute_arm
negative_compute_arm
crop_compute_arm
dropout_compute_arm
transpose_compute_arm
calib_compute_arm
argmax_compute_arm
axpy_compute_arm
conv_transpose_compute_arm
gru_unit_compute_arm
gru_compute_arm
beam_search_decode_compute_arm
lookup_table_compute_arm
im2sequence_compute_arm
sequence_softmax_compute_arm
norm_compute_arm
power_compute_arm
shuffle_channel_compute_arm
yolo_box_compute_arm
interpolate_compute_arm
logical_compute_arm
less_than_arm
while_compute_arm
compare_compute_arm
topk_compute_arm
increment_compute_arm
write_to_array_compute_arm
read_from_array_compute_arm
beam_search_compute_arm
fill_constant_compute_arm
lod_reset_compute_arm
box_coder_compute_arm
reduce_max_compute_arm
sequence_expand_compute_arm
sequence_pool_compute_arm
is_empty_compute_arm
shape_compute_arm
slice_compute_arm
cast_compute_arm
squeeze_compute_arm
expand_compute_arm
)
set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels")
if (NOT LITE_WITH_FPGA)
return()
endif()
message("fpga : ${lite_kernel_deps}")
set(fpga_deps fpga_target_wrapper kernel_fpga)
lite_cc_library(activation_compute_fpga SRCS activation_compute.cc DEPS ${fpga_deps})
add_kernel(activation_compute_fpga FPGA basic SRCS activation_compute.cc DEPS ${fpga_deps})
lite_cc_test(test_acivation_fpga SRCS activation_compute_test.cc DEPS ${lite_kernel_deps} activation_compute_fpga ${fpga_deps})
lite_cc_library(conv_compute_fpga SRCS conv_compute.cc DEPS ${fpga_deps})
add_kernel(conv_compute_fpga FPGA basic SRCS conv_compute.cc DEPS ${fpga_deps})
lite_cc_test(test_conv_fpga SRCS conv_compute_test.cc DEPS ${lite_kernel_deps} conv_compute_fpga ${fpga_deps})
lite_cc_library(elementwise_compute_fpga SRCS elementwise_compute.cc DEPS ${fpga_deps})
add_kernel(elementwise_compute_fpga FPGA basic SRCS elementwise_compute.cc DEPS ${fpga_deps})
lite_cc_test(test_elementwise_fpga SRCS elementwise_compute_test.cc DEPS ${lite_kernel_deps} elementwise_compute_fpga ${fpga_deps})
lite_cc_library(pooling_compute_fpga SRCS pooling_compute.cc DEPS ${fpga_deps})
add_kernel(pooling_compute_fpga FPGA basic SRCS pooling_compute.cc DEPS ${fpga_deps})
lite_cc_test(test_pooling_compute_fpga SRCS pooling_compute_test.cc DEPS ${lite_kernel_deps} pooling_compute_fpga ${fpga_deps})
lite_cc_library(scale_compute_fpga SRCS scale_compute.cc DEPS ${fpga_deps})
add_kernel(scale_compute_fpga FPGA basic SRCS scale_compute.cc DEPS ${fpga_deps})
lite_cc_library(softmax_compute_fpga SRCS softmax_compute.cc DEPS ${fpga_deps})
add_kernel(softmax_compute_fpga FPGA basic SRCS softmax_compute.cc DEPS ${fpga_deps})
lite_cc_test(test_softmax_compute_fpga SRCS softmax_compute_test.cc DEPS ${lite_kernel_deps} softmax_compute_fpga ${fpga_deps})
lite_cc_library(fc_compute_fpga SRCS fc_compute.cc DEPS ${fpga_deps})
add_kernel(fc_compute_fpga FPGA basic SRCS fc_compute.cc DEPS ${fpga_deps})
lite_cc_test(test_fc_compute_fpga SRCS fc_compute_test.cc DEPS ${lite_kernel_deps} fc_compute_fpga ${fpga_deps})
lite_cc_library(io_copy_compute_fpga SRCS io_copy_compute.cc DEPS ${fpga_deps})
lite_cc_library(calib_compute_fpga SRCS calib_compute.cc DEPS ${fpga_deps})
lite_cc_library(layout_compute_fpga SRCS layout_compute.cc DEPS ${fpga_deps})
lite_cc_library(feed_compute_fpga SRCS feed_compute.cc DEPS ${fpga_deps})
lite_cc_library(fetch_compute_fpga SRCS fetch_compute.cc DEPS ${fpga_deps})
set (fpga_kernels
activation_compute_fpga
conv_compute_fpga
elementwise_compute_fpga
pooling_compute_fpga
scale_compute_fpga
softmax_compute_fpga
fc_compute_fpga
io_copy_compute_fpga
calib_compute_fpga
layout_compute_fpga
feed_compute_fpga
fetch_compute_fpga
)
set(fpga_kernels "${fpga_kernels}" CACHE INTERNAL "fpga kernels")
add_kernel(io_copy_compute_fpga FPGA basic SRCS io_copy_compute.cc DEPS ${fpga_deps})
add_kernel(calib_compute_fpga FPGA basic SRCS calib_compute.cc DEPS ${fpga_deps})
add_kernel(layout_compute_fpga FPGA basic SRCS layout_compute.cc DEPS ${fpga_deps})
add_kernel(feed_compute_fpga FPGA basic SRCS feed_compute.cc DEPS ${fpga_deps})
add_kernel(fetch_compute_fpga FPGA basic SRCS fetch_compute.cc DEPS ${fpga_deps})
message(STATUS "compile with lite host kernels")
lite_cc_library(feed_compute_host SRCS feed_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(fetch_compute_host SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(reshape_compute_host SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op)
lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host)
set(host_kernels
feed_compute_host
fetch_compute_host
reshape_compute_host
)
set(host_kernels "${host_kernels}" CACHE GLOBAL "host kernels")
add_kernel(feed_compute_host Host basic SRCS feed_compute.cc DEPS ${lite_kernel_deps})
add_kernel(fetch_compute_host Host basic SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
add_kernel(reshape_compute_host Host basic SRCS reshape_compute.cc DEPS ${lite_kernel_deps} reshape_op)
lite_cc_test(test_reshape_compute_host SRCS reshape_compute_test.cc DEPS reshape_compute_host any)
......@@ -5,9 +5,5 @@ endif()
message(STATUS "compile with lite NPU kernels")
lite_cc_library(graph_compute_npu SRCS graph_compute.cc DEPS ${lite_kernel_deps} ${npu_ddk_libs})
add_kernel(graph_compute_npu NPU basic SRCS graph_compute.cc DEPS ${lite_kernel_deps} ${npu_ddk_libs})
# lite_cc_test(test_graph_compute_npu SRCS graph_compute_test.cc DEPS graph_compute_npu)
set(npu_kernels graph_compute_npu)
set(npu_kernels "${npu_kernels}" CACHE INTERNAL "npu kernels")
......@@ -4,17 +4,17 @@ endif()
set(cl_kernel_deps op_params cl_runtime cl_context cl_wrapper cl_target_wrapper)
lite_cc_library(fc_opencl SRCS fc_compute.cc DEPS ${cl_kernel_deps})
lite_cc_library(mul_opencl SRCS mul_compute.cc DEPS ${cl_kernel_deps})
lite_cc_library(elementwise_add_opencl SRCS elementwise_add_compute.cc DEPS ${cl_kernel_deps})
lite_cc_library(fusion_elementwise_add_activation_opencl
SRCS fusion_elementwise_add_activation_compute.cc
add_kernel(fc_opencl OPENCL basic SRCS fc_compute.cc DEPS ${cl_kernel_deps})
add_kernel(mul_opencl OPENCL basic SRCS mul_compute.cc DEPS ${cl_kernel_deps})
add_kernel(elementwise_add_opencl OPENCL basic SRCS elementwise_add_compute.cc DEPS ${cl_kernel_deps})
add_kernel(fusion_elementwise_add_activation_opencl
OPENCL basic SRCS fusion_elementwise_add_activation_compute.cc
DEPS elementwise_add_opencl ${cl_kernel_deps})
lite_cc_library(pool_opencl SRCS pool_compute.cc DEPS ${cl_kernel_deps})
lite_cc_library(io_copy_compute_opencl SRCS io_copy_compute.cc DEPS ${tensor_lite} ${cl_kernel_deps})
lite_cc_library(relu_opencl SRCS relu_compute.cc DEPS ${cl_kernel_deps})
lite_cc_library(depthwise_conv2d_opencl SRCS depthwise_conv2d_compute.cc DEPS ${cl_kernel_deps})
lite_cc_library(conv_opencl SRCS conv_compute.cc DEPS ${cl_kernel_deps})
add_kernel(pool_opencl OPENCL basic SRCS pool_compute.cc DEPS ${cl_kernel_deps})
add_kernel(io_copy_compute_opencl OPENCL basic SRCS io_copy_compute.cc DEPS ${tensor_lite} ${cl_kernel_deps})
add_kernel(relu_opencl OPENCL basic SRCS relu_compute.cc DEPS ${cl_kernel_deps})
add_kernel(depthwise_conv2d_opencl OPENCL basic SRCS depthwise_conv2d_compute.cc DEPS ${cl_kernel_deps})
add_kernel(conv_opencl OPENCL basic SRCS conv_compute.cc DEPS ${cl_kernel_deps})
lite_cc_test(test_elementwise_add_opencl SRCS elementwise_add_compute_test.cc
DEPS elementwise_add_opencl fusion_elementwise_add_activation_opencl op_registry program context
......@@ -47,15 +47,3 @@ lite_cc_test(test_depthwise_conv2d_opencl SRCS depthwise_conv2d_compute_test.cc
lite_cc_test(test_conv_opencl SRCS conv_compute_test.cc
DEPS conv_opencl op_registry program context
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/opencl)
set(opencl_kernels
io_copy_compute_opencl
elementwise_add_opencl
fusion_elementwise_add_activation_opencl
pool_opencl
relu_opencl
mul_opencl
fc_opencl
depthwise_conv2d_opencl
conv_opencl
CACHE INTERNAL "opencl_kernels")
......@@ -10,7 +10,7 @@ endif()
# lite_cc_library(fc_compute_x86 SRCS fc_compute.cc DEPS ${lite_kernel_deps})
# lite_cc_library(mul_compute_x86 SRCS mul_compute.cc DEPS ${lite_kernel_deps})
# lite_cc_library(relu_compute_x86 SRCS relu_compute.cc DEPS ${lite_kernel_deps})
lite_cc_library(scale_compute_x86 SRCS scale_compute.cc DEPS ${lite_kernel_deps})
add_kernel(scale_compute_x86 X86 basic SRCS scale_compute.cc DEPS ${lite_kernel_deps})
# lite_cc_library(elementwise_compute_x86 SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_sub_op elementwise_add_op)
# lite_cc_library(softmax_compute_x86 SRCS softmax_compute.cc DEPS ${lite_kernel_deps} softmax)
# lite_cc_library(dropout_compute_x86 SRCS dropout_compute.cc DEPS ${lite_kernel_deps} )
......@@ -31,23 +31,3 @@ lite_cc_library(scale_compute_x86 SRCS scale_compute.cc DEPS ${lite_kernel_deps}
# lite_cc_test(test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86)
# lite_cc_test(test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86)
# lite_cc_test(test_batch_norm_compute_x86 SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_x86)
set(x86_kernels
# activation_compute_x86
# elementwise_compute_x86
# mean_compute_x86
# fill_constant_compute_x86
# mul_compute_x86
# relu_compute_x86
# fc_compute_x86
scale_compute_x86
# softmax_compute_x86
# dropout_compute_x86
# concat_compute_x86
# conv_compute_x86
# pool_compute_x86
# batch_norm_compute_x86
# uniform_random_compute_x86
# sgd_compute_x86
CACHE INTERNAL "x86 kernels")
set(op_DEPS tensor op op_params)
lite_cc_library(conv_op SRCS conv_op.cc DEPS ${op_DEPS})
lite_cc_library(pool_op SRCS pool_op.cc DEPS ${op_DEPS})
lite_cc_library(fc_op SRCS fc_op.cc DEPS ${op_DEPS})
lite_cc_library(relu_op SRCS relu_op.cc DEPS ${op_DEPS})
lite_cc_library(mul_op SRCS mul_op.cc DEPS ${op_DEPS})
lite_cc_library(matmul_op SRCS matmul_op.cc DEPS ${op_DEPS})
lite_cc_library(scale_op SRCS scale_op.cc DEPS ${op_DEPS})
lite_cc_library(softmax_op SRCS softmax_op.cc DEPS ${op_DEPS})
lite_cc_library(reshape_op SRCS reshape_op.cc DEPS ${op_DEPS} )
lite_cc_library(flatten_op SRCS flatten_op.cc DEPS ${op_DEPS} )
lite_cc_library(batch_norm_op SRCS batch_norm_op.cc DEPS ${op_DEPS})
lite_cc_library(feed_op SRCS feed_op.cc DEPS ${op_DEPS})
lite_cc_library(fetch_op SRCS fetch_op.cc DEPS ${op_DEPS})
lite_cc_library(io_copy_op SRCS io_copy_op.cc DEPS ${op_DEPS})
lite_cc_library(io_copy_once_op SRCS io_copy_once_op.cc DEPS io_copy_op ${op_DEPS})
lite_cc_library(activation_ops SRCS activation_ops.cc DEPS ${op_DEPS})
lite_cc_library(elementwise_ops SRCS elementwise_ops.cc DEPS ${op_DEPS})
lite_cc_library(lrn_op_lite SRCS lrn_op.cc DEPS ${op_DEPS})
lite_cc_library(decode_bboxes_op_lite SRCS decode_bboxes_op.cc DEPS ${op_DEPS})
lite_cc_library(box_coder_op_lite SRCS box_coder_op.cc DEPS ${op_DEPS})
lite_cc_library(multiclass_nms_op_lite SRCS multiclass_nms_op.cc DEPS ${op_DEPS})
lite_cc_library(fusion_elementwise_activation_ops SRCS fusion_elementwise_activation_ops.cc DEPS elementwise_ops ${op_DEPS})
lite_cc_library(mean_op SRCS mean_op.cc DEPS ${op_DEPS})
lite_cc_library(fill_constant_op SRCS fill_constant_op.cc DEPS ${op_DEPS})
lite_cc_library(sgd_op SRCS sgd_op.cc DEPS ${op_DEPS})
lite_cc_library(uniform_random_op SRCS uniform_random_op.cc DEPS ${op_DEPS})
lite_cc_library(power_op SRCS power_op.cc DEPS ${op_DEPS})
lite_cc_library(shuffle_channel_op SRCS shuffle_channel_op.cc DEPS ${op_DEPS})
lite_cc_library(yolo_box_op SRCS yolo_box_op.cc DEPS ${op_DEPS})
lite_cc_library(interpolate_op SRCS interpolate_op.cc DEPS ${op_DEPS})
lite_cc_library(argmax_op SRCS argmax_op.cc DEPS ${op_DEPS})
lite_cc_library(axpy_op SRCS axpy_op.cc DEPS ${op_DEPS})
lite_cc_library(gru_unit_op SRCS gru_unit_op.cc DEPS ${op_DEPS})
lite_cc_library(gru_op SRCS gru_op.cc DEPS ${op_DEPS})
lite_cc_library(layout_op SRCS layout_op.cc DEPS ${op_DEPS})
lite_cc_library(layout_once_op SRCS layout_once_op.cc DEPS ${op_DEPS})
lite_cc_library(while_op SRCS while_op.cc DEPS ${op_DEPS})
lite_cc_library(lookup_table_op SRCS lookup_table_op.cc DEPS ${op_DEPS})
lite_cc_library(beam_search_decode_op SRCS beam_search_decode_op.cc DEPS ${op_DEPS})
lite_cc_library(prior_box_op SRCS prior_box_op.cc DEPS ${op_DEPS})
lite_cc_library(density_prior_box_op SRCS density_prior_box_op.cc DEPS ${op_DEPS})
set(op_DEPS tensor op op_params scope memory)
lite_cc_library(op_params SRCS op_params.cc DEPS tensor any)
lite_cc_library(dropout_op SRCS dropout_op.cc DEPS ${op_DEPS})
lite_cc_library(concat_op SRCS concat_op.cc DEPS ${op_DEPS})
lite_cc_library(pad2d_op SRCS pad2d_op.cc DEPS ${op_DEPS})
lite_cc_library(negative_op SRCS negative_op.cc DEPS ${op_DEPS})
lite_cc_library(crop_op SRCS crop_op.cc DEPS ${op_DEPS})
lite_cc_library(calib_op SRCS calib_op.cc DEPS ${op_DEPS})
lite_cc_library(calib_once_op SRCS calib_once_op.cc DEPS ${op_DEPS})
lite_cc_library(split_op SRCS split_op.cc DEPS ${op_DEPS})
lite_cc_library(transpose_op SRCS transpose_op.cc DEPS ${op_DEPS})
lite_cc_library(fake_quant SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op_DEPS})
lite_cc_library(fake_quant_range SRCS fake_quantize_range_abs_max.cc DEPS ${op_DEPS})
lite_cc_library(fake_dequant SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS})
lite_cc_library(conv_transpose_op SRCS conv_transpose_op.cc DEPS ${op_DEPS})
lite_cc_library(im2sequence_op SRCS im2sequence_op.cc DEPS ${op_DEPS})
lite_cc_library(sequence_softmax_op SRCS sequence_softmax_op.cc DEPS ${op_DEPS})
lite_cc_library(norm_op SRCS norm_op.cc DEPS ${op_DEPS})
lite_cc_library(graph_op SRCS graph_op.cc DEPS ${op_DEPS})
lite_cc_library(topk_op SRCS topk_op.cc DEPS ${op_DEPS})
lite_cc_library(increment_op SRCS increment_op.cc DEPS ${op_DEPS})
lite_cc_library(write_to_array_op SRCS write_to_array_op.cc DEPS ${op_DEPS})
lite_cc_library(graph_op_lite SRCS graph_op.cc DEPS ${op_DEPS})
lite_cc_library(logical_xor SRCS logical_op.cc DEPS ${op_DEPS})
lite_cc_library(logical_and SRCS logical_op.cc DEPS ${op_DEPS})
lite_cc_library(logical_or SRCS logical_op.cc DEPS ${op_DEPS})
lite_cc_library(logical_not SRCS logical_op.cc DEPS ${op_DEPS})
lite_cc_library(less_than SRCS compare_op.cc DEPS ${op_DEPS})
lite_cc_library(equal SRCS compare_op.cc DEPS ${op_DEPS})
lite_cc_library(not_equal SRCS compare_op.cc DEPS ${op_DEPS})
lite_cc_library(less_equal SRCS compare_op.cc DEPS ${op_DEPS})
lite_cc_library(greater_than SRCS compare_op.cc DEPS ${op_DEPS})
lite_cc_library(greater_equal SRCS compare_op.cc DEPS ${op_DEPS})
lite_cc_library(read_from_array_op SRCS read_from_array_op.cc DEPS ${op_DEPS})
lite_cc_library(beam_search_op SRCS beam_search_op.cc DEPS ${op_DEPS})
lite_cc_library(sequence_pool_op_lite SRCS sequence_pool_op.cc DEPS ${op_DEPS})
lite_cc_library(sequence_expand_op_lite SRCS sequence_expand_op.cc DEPS ${op_DEPS})
lite_cc_library(reduce_max_op_lite SRCS reduce_max_op.cc DEPS ${op_DEPS})
lite_cc_library(lod_reset_op SRCS lod_reset_op.cc DEPS ${op_DEPS})
lite_cc_library(is_empty SRCS is_empty_op.cc DEPS ${op_DEPS})
lite_cc_library(shape_op_lite SRCS shape_op.cc DEPS ${op_DEPS})
lite_cc_library(cast_op_lite SRCS cast_op.cc DEPS ${op_DEPS})
lite_cc_library(slice_op_lite SRCS slice_op.cc DEPS ${op_DEPS})
lite_cc_library(squeeze_op_lite SRCS squeeze_op.cc DEPS ${op_DEPS})
lite_cc_library(expand_op_lite SRCS expand_op.cc DEPS ${op_DEPS})
add_operator(conv_op basic SRCS conv_op.cc DEPS ${op_DEPS})
add_operator(pool_op basic SRCS pool_op.cc DEPS ${op_DEPS})
add_operator(fc_op basic SRCS fc_op.cc DEPS ${op_DEPS})
add_operator(relu_op basic SRCS relu_op.cc DEPS ${op_DEPS})
add_operator(mul_op basic SRCS mul_op.cc DEPS ${op_DEPS})
add_operator(matmul_op basic SRCS matmul_op.cc DEPS ${op_DEPS})
add_operator(scale_op basic SRCS scale_op.cc DEPS ${op_DEPS})
add_operator(softmax_op basic SRCS softmax_op.cc DEPS ${op_DEPS})
add_operator(reshape_op basic SRCS reshape_op.cc DEPS ${op_DEPS} )
add_operator(batch_norm_op basic SRCS batch_norm_op.cc DEPS ${op_DEPS})
add_operator(feed_op basic SRCS feed_op.cc DEPS ${op_DEPS})
add_operator(fetch_op basic SRCS fetch_op.cc DEPS ${op_DEPS})
add_operator(io_copy_op basic SRCS io_copy_op.cc DEPS ${op_DEPS})
add_operator(io_copy_once_op basic SRCS io_copy_once_op.cc DEPS io_copy_op ${op_DEPS})
add_operator(activation_ops basic SRCS activation_ops.cc DEPS ${op_DEPS})
add_operator(elementwise_ops basic SRCS elementwise_ops.cc DEPS ${op_DEPS})
add_operator(lrn_op_lite basic SRCS lrn_op.cc DEPS ${op_DEPS})
add_operator(decode_bboxes_op_lite basic SRCS decode_bboxes_op.cc DEPS ${op_DEPS})
add_operator(box_coder_op_lite basic SRCS box_coder_op.cc DEPS ${op_DEPS})
add_operator(multiclass_nms_op_lite basic SRCS multiclass_nms_op.cc DEPS ${op_DEPS})
add_operator(fusion_elementwise_activation_ops basic SRCS fusion_elementwise_activation_ops.cc DEPS elementwise_ops ${op_DEPS})
add_operator(mean_op basic SRCS mean_op.cc DEPS ${op_DEPS})
add_operator(fill_constant_op basic SRCS fill_constant_op.cc DEPS ${op_DEPS})
#add_operator(sgd_op basic SRCS sgd_op.cc DEPS ${op_DEPS})
add_operator(uniform_random_op basic SRCS uniform_random_op.cc DEPS ${op_DEPS})
add_operator(power_op basic SRCS power_op.cc DEPS ${op_DEPS})
add_operator(shuffle_channel_op basic SRCS shuffle_channel_op.cc DEPS ${op_DEPS})
add_operator(yolo_box_op basic SRCS yolo_box_op.cc DEPS ${op_DEPS})
add_operator(interpolate_op basic SRCS interpolate_op.cc DEPS ${op_DEPS})
add_operator(argmax_op basic SRCS argmax_op.cc DEPS ${op_DEPS})
add_operator(axpy_op basic SRCS axpy_op.cc DEPS ${op_DEPS})
add_operator(gru_unit_op basic SRCS gru_unit_op.cc DEPS ${op_DEPS})
add_operator(gru_op basic SRCS gru_op.cc DEPS ${op_DEPS})
add_operator(layout_op basic SRCS layout_op.cc DEPS ${op_DEPS})
add_operator(layout_once_op basic SRCS layout_once_op.cc DEPS ${op_DEPS})
add_operator(prior_box_op basic SRCS prior_box_op.cc DEPS ${op_DEPS})
add_operator(density_prior_box_op basic SRCS density_prior_box_op.cc DEPS ${op_DEPS})
add_operator(dropout_op basic SRCS dropout_op.cc DEPS ${op_DEPS})
add_operator(concat_op basic SRCS concat_op.cc DEPS ${op_DEPS})
add_operator(pad2d_op basic SRCS pad2d_op.cc DEPS ${op_DEPS})
add_operator(negative_op basic SRCS negative_op.cc DEPS ${op_DEPS})
add_operator(crop_op basic SRCS crop_op.cc DEPS ${op_DEPS})
add_operator(calib_op basic SRCS calib_op.cc DEPS ${op_DEPS})
add_operator(calib_once_op basic SRCS calib_once_op.cc DEPS ${op_DEPS})
add_operator(split_op basic SRCS split_op.cc DEPS ${op_DEPS})
add_operator(transpose_op basic SRCS transpose_op.cc DEPS ${op_DEPS})
add_operator(fake_quant basic SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op_DEPS})
add_operator(fake_dequant basic SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS})
add_operator(conv_transpose_op basic SRCS conv_transpose_op.cc DEPS ${op_DEPS})
add_operator(graph_op basic SRCS graph_op.cc DEPS ${op_DEPS})
add_operator(expand_op_lite basic SRCS expand_op.cc DEPS ${op_DEPS})
add_operator(reduce_max_op_lite basic SRCS reduce_max_op.cc DEPS ${op_DEPS})
add_operator(norm_op basic SRCS norm_op.cc DEPS ${op_DEPS})
add_operator(shape_op_lite basic SRCS shape_op.cc DEPS ${op_DEPS})
add_operator(sequence_expand_op_lite basic SRCS sequence_expand_op.cc DEPS ${op_DEPS})
add_operator(squeeze_op_lite basic SRCS squeeze_op.cc DEPS ${op_DEPS})
# for OCR specific
add_operator(im2sequence_op extra SRCS im2sequence_op.cc DEPS ${op_DEPS})
add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS})
add_operator(lookup_table_op extra SRCS lookup_table_op.cc DEPS ${op_DEPS})
add_operator(beam_search_decode_op extra SRCS beam_search_decode_op.cc DEPS ${op_DEPS})
add_operator(graph_op_lite extra SRCS graph_op.cc DEPS ${op_DEPS})
add_operator(logical_xor extra SRCS logical_op.cc DEPS ${op_DEPS})
add_operator(logical_and extra SRCS logical_op.cc DEPS ${op_DEPS})
add_operator(logical_or extra SRCS logical_op.cc DEPS ${op_DEPS})
add_operator(logical_not extra SRCS logical_op.cc DEPS ${op_DEPS})
add_operator(less_than extra SRCS compare_op.cc DEPS ${op_DEPS})
add_operator(equal extra SRCS compare_op.cc DEPS ${op_DEPS})
add_operator(not_equal extra SRCS compare_op.cc DEPS ${op_DEPS})
add_operator(less_equal extra SRCS compare_op.cc DEPS ${op_DEPS})
add_operator(greater_than extra SRCS compare_op.cc DEPS ${op_DEPS})
add_operator(greater_equal extra SRCS compare_op.cc DEPS ${op_DEPS})
add_operator(read_from_array_op extra SRCS read_from_array_op.cc DEPS ${op_DEPS})
add_operator(beam_search_op extra SRCS beam_search_op.cc DEPS ${op_DEPS})
add_operator(sequence_pool_op_lite extra SRCS sequence_pool_op.cc DEPS ${op_DEPS})
add_operator(lod_reset_op extra SRCS lod_reset_op.cc DEPS ${op_DEPS})
add_operator(is_empty extra SRCS is_empty_op.cc DEPS ${op_DEPS})
add_operator(cast_op_lite extra SRCS cast_op.cc DEPS ${op_DEPS})
add_operator(slice_op_lite extra SRCS slice_op.cc DEPS ${op_DEPS})
add_operator(write_to_array_op extra SRCS write_to_array_op.cc DEPS ${op_DEPS})
add_operator(topk_op extra SRCS topk_op.cc DEPS ${op_DEPS})
add_operator(increment_op extra SRCS increment_op.cc DEPS ${op_DEPS})
add_operator(sequence_softmax_op extra SRCS sequence_softmax_op.cc DEPS ${op_DEPS})
set(ops
conv_op
pool_op
fc_op
relu_op
mul_op
matmul_op
scale_op
softmax_op
reshape_op
flatten_op
batch_norm_op
feed_op
fetch_op
gru_unit_op
gru_op
beam_search_decode_op
lookup_table_op
io_copy_op
io_copy_once_op
elementwise_ops
fusion_elementwise_activation_ops
lrn_op_lite
decode_bboxes_op_lite
multiclass_nms_op_lite
decode_bboxes_op_lite
box_coder_op_lite
multiclass_nms_op_lite
mean_op
fill_constant_op
activation_ops
dropout_op
concat_op
pad2d_op
crop_op
prior_box_op
density_prior_box_op
negative_op
calib_op
calib_once_op
split_op
transpose_op
fake_quant
fake_quant_range
fake_dequant
sgd_op
uniform_random_op
power_op
yolo_box_op
shuffle_channel_op
argmax_op
axpy_op
conv_transpose_op
im2sequence_op
sequence_softmax_op
norm_op
layout_op
layout_once_op
interpolate_op
logical_xor
logical_and
logical_or
logical_not
equal
not_equal
less_than
while_op
less_equal
greater_than
greater_equal
graph_op
topk_op
increment_op
write_to_array_op
read_from_array_op
beam_search_op
sequence_pool_op_lite
sequence_expand_op_lite
reduce_max_op_lite
lod_reset_op
is_empty
shape_op_lite
cast_op_lite
slice_op_lite
squeeze_op_lite
expand_op_lite
CACHE INTERNAL "ops lite")
if (NOT LITE_WITH_X86)
lite_cc_test(test_fc_op SRCS fc_op_test.cc
......@@ -188,7 +100,7 @@ if (NOT LITE_WITH_X86)
lite_cc_test(test_softmax_op SRCS softmax_op_test.cc DEPS softmax_op memory)
#lite_cc_test(test_reshape_op SRCS reshape_op_test.cc DEPS reshape_op memory)
lite_cc_test(test_batch_norm_op SRCS batch_norm_op_test.cc DEPS batch_norm_op memory)
lite_cc_test(test_concat_op SRCS concat_op_test.cc DEPS concat_op memory)
lite_cc_test(test_concat_op SRCS concat_op_test.cc DEPS concat_op memory scope)
lite_cc_test(test_calib_op SRCS calib_op_test.cc DEPS calib_op memory ARM_DEPS calib_compute_arm)
lite_cc_test(test_fusion_elementwise_activation_ops
SRCS fusion_elementwise_activation_ops_test.cc
......
......@@ -21,7 +21,13 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
#lite_cc_test(test_kernel_increment_compute SRCS increment_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_write_to_array_compute SRCS write_to_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
#lite_cc_test(test_kernel_read_from_array_compute SRCS read_from_array_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
if(LITE_BUILD_EXTRA)
lite_cc_test(test_gru_unit SRCS gru_unit_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
endif()
lite_cc_test(test_sgemm SRCS test_sgemm.cc DEPS ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_pad2d_compute SRCS pad2d_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_prior_box_compute SRCS prior_box_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......@@ -31,9 +37,7 @@ if((NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND (LITE_WITH_X86 OR LITE_WITH
lite_cc_test(test_kernel_nearest_interp_compute SRCS nearest_interp_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_shape_compute SRCS shape_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_crop_compute SRCS crop_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_pool_compute SRCS sequence_pool_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_sequence_expand_compute SRCS sequence_expand_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_reduce_max_compute SRCS reduce_max_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_squeeze_compute SRCS squeeze_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_expand_compute SRCS expand_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
lite_cc_test(test_kernel_matmul_compute SRCS matmul_compute_test.cc DEPS arena_framework ${x86_kernels} ${arm_kernels} ${lite_ops} ${host_kernels})
......
#!/bin/bash
set -ex
readonly CMAKE_COMMON_OPTIONS="-DWITH_GPU=OFF \
-DWITH_MKL=OFF \
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import logging
ops_list_path = sys.argv[1]
dest_path = sys.argv[2]
out_lines = [
'#pragma once',
'#include "paddle_lite_factory_helper.h"',
'',
]
with open(ops_list_path) as f:
for line in f:
path = line.strip()
status = ''
with open(path) as g:
lines = [v for v in g]
for i in range(len(lines)):
line = lines[i].strip()
if not status:
key = 'REGISTER_LITE_KERNEL'
if line.startswith(key):
forward = i + min(7, len(lines) - i)
remaining = line[len(key) + 1:] + ' '.join(
[v.strip() for v in lines[i + 1:forward]])
x = remaining.find('.')
if x > 0:
remaining = remaining[:x]
fs = [v.strip() for v in remaining.split(',')]
assert (len(fs) >= 4)
op, target, precision, layout, __, alias = fs[:6]
alias = alias.replace(')', '')
key = "USE_LITE_KERNEL(%s, %s, %s, %s, %s);" % (
op, target, precision, layout, alias)
out_lines.append(key)
with open(dest_path, 'w') as f:
logging.info("write kernel list to %s" % dest_path)
f.write('\n'.join(out_lines))
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
''' Collect op registry information. '''
import sys
import logging
ops_list_path = sys.argv[1]
dest_path = sys.argv[2]
out_lines = [
'#pragma once',
'#include "paddle_lite_factory_helper.h"',
'',
]
with open(ops_list_path) as f:
for line in f:
path = line.strip()
with open(path) as g:
for line in g:
key = 'REGISTER_LITE_OP'
if line.startswith(key):
end = line.find(',')
op = line[len(key) + 1:end]
if not op: continue
if "_grad" in op: continue
out = "USE_LITE_OP(%s);" % op
out_lines.append(out)
with open(dest_path, 'w') as f:
logging.info("write op list to %s" % dest_path)
f.write('\n'.join(out_lines))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册