提交 04a36e78 编写于 作者: B baolei.an

* adjust follow npu/xpu

* fix code_style test=develop
上级 d6709eb9
...@@ -74,7 +74,7 @@ lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF) ...@@ -74,7 +74,7 @@ lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF)
lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF) lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF)
lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF) lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF)
# cv build options # cv build options
lite_option(LITE_WITH_CV "Enable build cv image in lite" OFF IF NOT LITE_WITH_ARM) lite_option(LITE_WITH_CV "Enable build cv image in lite" OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter. # TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
...@@ -170,6 +170,10 @@ endif() ...@@ -170,6 +170,10 @@ endif()
######################################################################################## ########################################################################################
if(LITE_WITH_XPU)
include(xpu)
endif()
include(external/mklml) # download mklml package include(external/mklml) # download mklml package
include(external/xbyak) # download xbyak package include(external/xbyak) # download xbyak package
include(external/libxsmm) # download, build, install libxsmm include(external/libxsmm) # download, build, install libxsmm
...@@ -189,14 +193,9 @@ if(LITE_WITH_CUDA) ...@@ -189,14 +193,9 @@ if(LITE_WITH_CUDA)
include(cuda) include(cuda)
endif() endif()
if(LITE_WITH_XPU)
include(xpu)
endif()
if(LITE_WITH_BM) if(LITE_WITH_BM)
include(bm) include(bm)
endif() endif()
include(generic) # simplify cmake module include(generic) # simplify cmake module
include(ccache) # set ccache for compilation include(ccache) # set ccache for compilation
include(util) # set unittest and link libs include(util) # set unittest and link libs
......
...@@ -34,7 +34,7 @@ Paddle Lite为Paddle-Mobile的升级版,定位支持包括手机移动端在 ...@@ -34,7 +34,7 @@ Paddle Lite为Paddle-Mobile的升级版,定位支持包括手机移动端在
PaddleLite 的架构设计着重考虑了对多硬件和平台的支持,并且强化了多个硬件在一个模型中混合执行的能力,多个层面的性能优化处理,以及对端侧应用的轻量化设计。 PaddleLite 的架构设计着重考虑了对多硬件和平台的支持,并且强化了多个硬件在一个模型中混合执行的能力,多个层面的性能优化处理,以及对端侧应用的轻量化设计。
![](https://github.com/Superjomn/_tmp_images/raw/master/images/paddle-lite-architecture.png) ![](https://user-images.githubusercontent.com/45189361/70908123-6ce4fd00-2045-11ea-97e1-ad08446c5c86.png)
其中,Analysis Phase 包括了 MIR(Machine IR) 相关模块,能够对原有的模型的计算图针对具体的硬件列表进行算子融合、计算裁剪 在内的多种优化。Execution Phase 只涉及到Kernel 的执行,且可以单独部署,以支持极致的轻量级部署。 其中,Analysis Phase 包括了 MIR(Machine IR) 相关模块,能够对原有的模型的计算图针对具体的硬件列表进行算子融合、计算裁剪 在内的多种优化。Execution Phase 只涉及到Kernel 的执行,且可以单独部署,以支持极致的轻量级部署。
......
...@@ -63,7 +63,7 @@ if (LITE_ON_TINY_PUBLISH) ...@@ -63,7 +63,7 @@ if (LITE_ON_TINY_PUBLISH)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
endif() endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -Ofast -Os -fomit-frame-pointer -fno-asynchronous-unwind-tables -fno-unwind-tables") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -Ofast -Os -fomit-frame-pointer -fno-asynchronous-unwind-tables -fno-unwind-tables")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto -fvisibility=hidden -fvisibility-inlines-hidden -fdata-sections -ffunction-sections") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections")
check_linker_flag(-Wl,--gc-sections) check_linker_flag(-Wl,--gc-sections)
endif() endif()
......
...@@ -32,10 +32,9 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS ...@@ -32,10 +32,9 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
$ENV{CUDNN_ROOT}/lib64 $ENV{CUDNN_ROOT}/lib64
$ENV{CUDNN_ROOT}/lib $ENV{CUDNN_ROOT}/lib
/usr/lib /usr/lib
${CUDA_TOOLKIT_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
)
if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0)) if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0))
find_library(CUBLAS_LIBRARY NAMES libcublas.so PATHS ${CUDNN_CHECK_LIBRARY_DIRS} NO_DEFAULT_PATH) find_library(CUBLAS_LIBRARY NAMES libcublas.so PATHS ${CUDNN_CHECK_LIBRARY_DIRS} NO_DEFAULT_PATH)
......
...@@ -22,7 +22,7 @@ endfunction() ...@@ -22,7 +22,7 @@ endfunction()
function (lite_deps TARGET) function (lite_deps TARGET)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS CL_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS ARGS) set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS CL_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS CV_DEPS ARGS)
cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps ${lite_deps_DEPS}) set(deps ${lite_deps_DEPS})
...@@ -44,7 +44,7 @@ function (lite_deps TARGET) ...@@ -44,7 +44,7 @@ function (lite_deps TARGET)
set(deps ${deps} ${var}) set(deps ${deps} ${var})
endforeach(var) endforeach(var)
if(LITE_WITH_CV) if(LITE_WITH_CV)
foreach(var ${lite_cv_deps}) foreach(var ${lite_deps_CV_DEPS})
set(deps ${deps} ${var}) set(deps ${deps} ${var})
endforeach(var) endforeach(var)
endif() endif()
...@@ -121,10 +121,11 @@ file(WRITE ${offline_lib_registry_file} "") # clean ...@@ -121,10 +121,11 @@ file(WRITE ${offline_lib_registry_file} "") # clean
# LIGHT_DEPS: LITE_WITH_LIGHT_WEIGHT_FRAMEWORK # LIGHT_DEPS: LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
# HVY_DEPS: NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK # HVY_DEPS: NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
# EXCLUDE_COMPILE_DEPS: TARGET will not be included in lite_compile_deps if this is not None # EXCLUDE_COMPILE_DEPS: TARGET will not be included in lite_compile_deps if this is not None
# CV_DEPS: LITE_WITH_CV
function(lite_cc_library TARGET) function(lite_cc_library TARGET)
set(options SHARED shared STATIC static MODULE module) set(options SHARED shared STATIC static MODULE module)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS NPU_DEPS XPU_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS LIGHT_DEPS set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS CV_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS) HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -134,11 +135,12 @@ function(lite_cc_library TARGET) ...@@ -134,11 +135,12 @@ function(lite_cc_library TARGET)
X86_DEPS ${args_X86_DEPS} X86_DEPS ${args_X86_DEPS}
CUDA_DEPS ${args_CUDA_DEPS} CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS} CL_DEPS ${args_CL_DEPS}
NPU_DEPS ${args_NPU_DEPS} BM_DEPS ${args_BM_DEPS}
XPU_DEPS ${args_XPU_DEPS}
ARM_DEPS ${args_ARM_DEPS} ARM_DEPS ${args_ARM_DEPS}
CV_DEPS ${args_CV_DEPS}
FPGA_DEPS ${args_FPGA_DEPS} FPGA_DEPS ${args_FPGA_DEPS}
BM_DEPS ${args_BM_DEPS} NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
...@@ -168,8 +170,8 @@ function(lite_cc_binary TARGET) ...@@ -168,8 +170,8 @@ function(lite_cc_binary TARGET)
set(options " -g ") set(options " -g ")
endif() endif()
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS) LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS CV_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps "") set(deps "")
...@@ -180,10 +182,13 @@ function(lite_cc_binary TARGET) ...@@ -180,10 +182,13 @@ function(lite_cc_binary TARGET)
CL_DEPS ${args_CL_DEPS} CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS} ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS} FPGA_DEPS ${args_FPGA_DEPS}
BM_DEPS ${args_BM_DEPS} NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
CV_DEPS ${CV_DEPS}
) )
cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps}) cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers) target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
...@@ -213,8 +218,8 @@ function(lite_cc_test TARGET) ...@@ -213,8 +218,8 @@ function(lite_cc_test TARGET)
endif() endif()
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS CV_DEPS
ARGS ARGS
COMPILE_LEVEL # (basic|extra) COMPILE_LEVEL # (basic|extra)
) )
...@@ -233,10 +238,13 @@ function(lite_cc_test TARGET) ...@@ -233,10 +238,13 @@ function(lite_cc_test TARGET)
CL_DEPS ${args_CL_DEPS} CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS} ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS} FPGA_DEPS ${args_FPGA_DEPS}
BM_DEPS ${args_BM_DEPS} NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
CV_DEPS ${args_CV_DEPS}
) )
_lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS}) _lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS})
# strip binary target to reduce size # strip binary target to reduce size
...@@ -277,7 +285,7 @@ endif() ...@@ -277,7 +285,7 @@ endif()
function(add_kernel TARGET device level) function(add_kernel TARGET device level)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS PROFILE_DEPS set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS) ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -376,12 +384,13 @@ function(add_kernel TARGET device level) ...@@ -376,12 +384,13 @@ function(add_kernel TARGET device level)
lite_cc_library(${TARGET} SRCS ${args_SRCS} lite_cc_library(${TARGET} SRCS ${args_SRCS}
DEPS ${args_DEPS} DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS} X86_DEPS ${args_X86_DEPS}
XPU_DEPS ${args_XPU_DEPS}
CUDA_DEPS ${args_CUDA_DEPS} CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS} CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS} ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS} FPGA_DEPS ${args_FPGA_DEPS}
BM_DEPS ${args_BM_DEPS} NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
...@@ -400,7 +409,7 @@ endif() ...@@ -400,7 +409,7 @@ endif()
function(add_operator TARGET level) function(add_operator TARGET level)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS BM_DEPS FPGA_DEPS PROFILE_DEPS set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS ARM_DEPS FPGA_DEPS BM_DEPS NPU_DEPS XPU_DEPS PROFILE_DEPS
LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS LIGHT_DEPS HVY_DEPS EXCLUDE_COMPILE_DEPS
ARGS) ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
...@@ -426,12 +435,13 @@ function(add_operator TARGET level) ...@@ -426,12 +435,13 @@ function(add_operator TARGET level)
lite_cc_library(${TARGET} SRCS ${args_SRCS} lite_cc_library(${TARGET} SRCS ${args_SRCS}
DEPS ${args_DEPS} DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS} X86_DEPS ${args_X86_DEPS}
XPU_DEPS ${args_XPU_DEPS}
CUDA_DEPS ${args_CUDA_DEPS} CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS} CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS} ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS} FPGA_DEPS ${args_FPGA_DEPS}
BM_DEPS ${args_BM_DEPS} NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
BM_DEPS ${args_BM_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS} PROFILE_DEPS ${args_PROFILE_DEPS}
LIGHT_DEPS ${args_LIGHT_DEPS} LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS} HVY_DEPS ${args_HVY_DEPS}
......
...@@ -99,7 +99,7 @@ else() ...@@ -99,7 +99,7 @@ else()
set_property(TARGET xpu_sdk_llvm PROPERTY IMPORTED_LOCATION ${XPU_SDK_LLVM_FILE}) set_property(TARGET xpu_sdk_llvm PROPERTY IMPORTED_LOCATION ${XPU_SDK_LLVM_FILE})
endif() endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDMLC_USE_GLOG=1") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDMLC_USE_GLOG=1 -D_GLIBCXX_USE_CXX11_ABI=0")
set(xpu_runtime_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_xpu_jitc xpu_sdk_llvm CACHE INTERNAL "xpu runtime libs") set(xpu_runtime_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_xpu_jitc xpu_sdk_llvm CACHE INTERNAL "xpu runtime libs")
set(xpu_builder_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_xpu_jitc xpu_sdk_llvm CACHE INTERNAL "xpu builder libs") set(xpu_builder_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_xpu_jitc xpu_sdk_llvm CACHE INTERNAL "xpu builder libs")
...@@ -176,13 +176,17 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) ...@@ -176,13 +176,17 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/include" COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/lib" COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include" COMMAND cp "${CMAKE_SOURCE_DIR}/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_light_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/libpaddle_light_api_shared.so" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib" COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/libpaddle_light_api_shared.so" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/utils/cv/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include" COMMAND cp "${CMAKE_SOURCE_DIR}/lite/utils/cv/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
) )
add_dependencies(tiny_publish_cxx_lib paddle_light_api_shared) add_dependencies(tiny_publish_cxx_lib paddle_light_api_shared)
add_dependencies(tiny_publish_cxx_lib bundle_light_api)
add_dependencies(publish_inference tiny_publish_cxx_lib) add_dependencies(publish_inference tiny_publish_cxx_lib)
add_custom_command(TARGET tiny_publish_cxx_lib POST_BUILD if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
COMMAND ${CMAKE_STRIP} "-s" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/libpaddle_light_api_shared.so) add_custom_command(TARGET tiny_publish_cxx_lib POST_BUILD
COMMAND ${CMAKE_STRIP} "-s" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/libpaddle_light_api_shared.so)
endif()
endif() endif()
endif() endif()
endif() endif()
...@@ -222,7 +226,8 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) ...@@ -222,7 +226,8 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_light/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_light/Makefile" COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_light/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_light/Makefile"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_detection" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx" COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_detection" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_detection/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_detection/Makefile" COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_detection/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_detection/Makefile"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/include" COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_classify" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_classify/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_classify/Makefile"
) )
add_dependencies(publish_inference_android_cxx_demos logging gflags) add_dependencies(publish_inference_android_cxx_demos logging gflags)
add_dependencies(publish_inference_cxx_lib publish_inference_android_cxx_demos) add_dependencies(publish_inference_cxx_lib publish_inference_android_cxx_demos)
...@@ -236,7 +241,8 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) ...@@ -236,7 +241,8 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_light/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_light/Makefile" COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_light/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_light/Makefile"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_detection" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx" COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_detection" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_detection/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_detection/Makefile" COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_detection/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_detection/Makefile"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_classify" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_classify/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_classify/Makefile"
) )
add_dependencies(tiny_publish_cxx_lib publish_inference_android_cxx_demos) add_dependencies(tiny_publish_cxx_lib publish_inference_android_cxx_demos)
endif() endif()
......
...@@ -16,17 +16,24 @@ if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR ARM_TARGE ...@@ -16,17 +16,24 @@ if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR ARM_TARGE
add_dependencies(paddle_full_api_shared op_list_h kernel_list_h framework_proto) add_dependencies(paddle_full_api_shared op_list_h kernel_list_h framework_proto)
target_link_libraries(paddle_full_api_shared framework_proto) target_link_libraries(paddle_full_api_shared framework_proto)
if(LITE_WITH_X86) if(LITE_WITH_X86)
add_dependencies(paddle_full_api_shared xxhash) add_dependencies(paddle_full_api_shared xxhash)
target_link_libraries(paddle_full_api_shared xxhash) target_link_libraries(paddle_full_api_shared xxhash)
if (NOT LITE_ON_MODEL_OPTIMIZE_TOOL)
add_dependencies(paddle_full_api_shared dynload_mklml)
endif()
endif() endif()
if(LITE_WITH_CUDA) if(LITE_WITH_CUDA)
target_link_libraries(paddle_full_api_shared ${math_cuda} "-Wl,--whole-archive" ${cuda_kernels} "-Wl,--no-whole-archive") target_link_libraries(paddle_full_api_shared ${math_cuda} "-Wl,--whole-archive" ${cuda_kernels} "-Wl,--no-whole-archive")
endif(LITE_WITH_CUDA) endif(LITE_WITH_CUDA)
#light api dynamic library #light api dynamic library
lite_cc_library(paddle_light_api_shared MODULE lite_cc_library(paddle_light_api_shared MODULE
SRCS light_api_shared.cc SRCS light_api_shared.cc
DEPS ${light_lib_DEPS} DEPS ${light_lib_DEPS}
ARM_DEPS ${arm_kernels} NPU_DEPS ${npu_kernels}) ARM_DEPS ${arm_kernels}
CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels})
target_link_libraries(paddle_light_api_shared ${light_lib_DEPS} ${arm_kernels} ${npu_kernels}) target_link_libraries(paddle_light_api_shared ${light_lib_DEPS} ${arm_kernels} ${npu_kernels})
if (LITE_WITH_NPU) if (LITE_WITH_NPU)
# Strips the symbols of our protobuf functions to fix the conflicts during # Strips the symbols of our protobuf functions to fix the conflicts during
...@@ -38,10 +45,11 @@ else() ...@@ -38,10 +45,11 @@ else()
if ((ARM_TARGET_OS STREQUAL "android") OR (ARM_TARGET_OS STREQUAL "armlinux")) if ((ARM_TARGET_OS STREQUAL "android") OR (ARM_TARGET_OS STREQUAL "armlinux"))
add_library(paddle_light_api_shared SHARED "") add_library(paddle_light_api_shared SHARED "")
target_sources(paddle_light_api_shared PUBLIC ${__lite_cc_files} paddle_api.cc light_api.cc light_api_impl.cc) target_sources(paddle_light_api_shared PUBLIC ${__lite_cc_files} paddle_api.cc light_api.cc light_api_impl.cc)
add_dependencies(paddle_light_api_shared op_list_h kernel_list_h) set_target_properties(paddle_light_api_shared PROPERTIES COMPILE_FLAGS "-flto -fdata-sections")
add_dependencies(paddle_light_api_shared op_list_h kernel_list_h)
if (LITE_WITH_NPU) if (LITE_WITH_NPU)
# Need to add HIAI runtime libs (libhiai.so) dependency # Need to add HIAI runtime libs (libhiai.so) dependency
target_link_libraries(paddle_light_api_shared ${npu_runtime_libs}) target_link_libraries(paddle_light_api_shared ${npu_builder_libs} ${npu_runtime_libs})
endif() endif()
endif() endif()
endif() endif()
...@@ -73,22 +81,22 @@ message(STATUS "get ARM kernels ${arm_kernels}") ...@@ -73,22 +81,22 @@ message(STATUS "get ARM kernels ${arm_kernels}")
message(STATUS "get NPU kernels ${npu_kernels}") message(STATUS "get NPU kernels ${npu_kernels}")
message(STATUS "get XPU kernels ${xpu_kernels}") message(STATUS "get XPU kernels ${xpu_kernels}")
message(STATUS "get FPGA kernels ${fpga_kernels}") message(STATUS "get FPGA kernels ${fpga_kernels}")
message(STATUS "get BM kernels ${bm_kernels}")
# for full api # for full api
if (NOT LITE_ON_TINY_PUBLISH) if (NOT LITE_ON_TINY_PUBLISH)
set(cxx_api_deps set(cxx_api_deps
scope optimizer target_wrapper_host model_parser program) scope optimizer target_wrapper_host model_parser program)
lite_cc_library(cxx_api lite_cc_library(cxx_api
SRCS cxx_api.cc SRCS cxx_api.cc
DEPS ${cxx_api_deps} ${ops} ${host_kernels} program DEPS ${cxx_api_deps} ${ops} ${host_kernels} program
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
ARM_DEPS ${arm_kernels} ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels} ${npu_bridges} npu_pass CV_DEPS paddle_cv_arm
XPU_DEPS ${xpu_kernels} ${xpu_bridges} xpu_pass NPU_DEPS ${npu_kernels}
BM_DEPS ${bm_kernels} ${bm_bridges} bm_pass XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels} BM_DEPS ${bm_kernels}
FPGA_DEPS ${fpga_kernels}) CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels})
endif() endif()
# for light api # for light api
...@@ -104,6 +112,7 @@ lite_cc_library(light_api SRCS light_api.cc ...@@ -104,6 +112,7 @@ lite_cc_library(light_api SRCS light_api.cc
CUDA_DEPS ${cuda_kernels} CUDA_DEPS ${cuda_kernels}
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
ARM_DEPS ${arm_kernels} ARM_DEPS ${arm_kernels}
CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
...@@ -234,11 +243,12 @@ else() ...@@ -234,11 +243,12 @@ else()
endif() endif()
if (NOT LITE_ON_TINY_PUBLISH) if (NOT LITE_ON_TINY_PUBLISH)
lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api paddle_api_light lite_cc_library(paddle_api_full SRCS cxx_api_impl.cc DEPS cxx_api paddle_api_light
${ops} ${ops}
ARM_DEPS ${arm_kernels} ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels} CV_DEPS paddle_cv_arm
CL_DEPS ${opencl_kernels} NPU_DEPS ${npu_kernels}
FPGA_DEPS ${fpga_kernels}) CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels})
# The final inference library for just MobileConfig. # The final inference library for just MobileConfig.
bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api) bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
...@@ -270,7 +280,7 @@ if (LITE_WITH_JAVA AND LITE_WITH_ARM) ...@@ -270,7 +280,7 @@ if (LITE_WITH_JAVA AND LITE_WITH_ARM)
add_subdirectory(android) add_subdirectory(android)
endif() endif()
if (LITE_WITH_PYTHON) if (LITE_WITH_PYTHON)
add_subdirectory(python) add_subdirectory(python)
endif() endif()
...@@ -301,27 +311,38 @@ endif() ...@@ -301,27 +311,38 @@ endif()
# Some bins # Some bins
if(NOT IOS) if(NOT IOS)
lite_cc_binary(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light gflags utils lite_cc_binary(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${ops} ${host_kernels}
ARM_DEPS ${arm_kernels} ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels} CV_DEPS paddle_cv_arm
XPU_DEPS ${xpu_kernels} NPU_DEPS ${npu_kernels}
CL_DEPS ${opencl_kernels} XPU_DEPS ${xpu_kernels}
BM_DEPS ${bm_kernels} CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels} BM_DEPS ${bm_kernels}
X86_DEPS ${x86_kernels} FPGA_DEPS ${fpga_kernels}
CUDA_DEPS ${cuda_kernels}) X86_DEPS ${x86_kernels}
lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags utils CUDA_DEPS ${cuda_kernels})
${ops} ${host_kernels}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
BM_DEPS ${bm_kernels}
FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels}
CUDA_DEPS ${cuda_kernels})
lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels}
CUDA_DEPS ${cuda_kernels})
lite_cc_binary(multithread_test SRCS lite_multithread_test.cc DEPS paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels}
ARM_DEPS ${arm_kernels}
CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
BM_DEPS ${bm_kernels}
FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels}
CUDA_DEPS ${cuda_kernels})
endif() endif()
#lite_cc_binary(cxx_api_bin SRCS cxx_api_bin.cc #lite_cc_binary(cxx_api_bin SRCS cxx_api_bin.cc
......
...@@ -108,7 +108,7 @@ USE_LITE_OP(while) ...@@ -108,7 +108,7 @@ USE_LITE_OP(while)
USE_LITE_OP(lod_reset) USE_LITE_OP(lod_reset)
USE_LITE_OP(lookup_table) USE_LITE_OP(lookup_table)
USE_LITE_OP(multiclass_nms) USE_LITE_OP(multiclass_nms)
USE_LITE_OP(graph_op) USE_LITE_OP(subgraph)
USE_LITE_OP(sequence_expand) USE_LITE_OP(sequence_expand)
USE_LITE_OP(sequence_pool) USE_LITE_OP(sequence_pool)
USE_LITE_OP(reduce_max) USE_LITE_OP(reduce_max)
......
...@@ -25,11 +25,12 @@ if (NOT LITE_ON_TINY_PUBLISH) ...@@ -25,11 +25,12 @@ if (NOT LITE_ON_TINY_PUBLISH)
endif() endif()
else() else()
add_library(paddle_lite_jni SHARED "") add_library(paddle_lite_jni SHARED "")
set_target_properties(paddle_lite_jni PROPERTIES COMPILE_FLAGS "-flto -fdata-sections")
target_sources(paddle_lite_jni PUBLIC ${__lite_cc_files} paddle_lite_jni.cc tensor_jni.cc) target_sources(paddle_lite_jni PUBLIC ${__lite_cc_files} paddle_lite_jni.cc tensor_jni.cc)
add_dependencies(paddle_lite_jni op_list_h kernel_list_h) add_dependencies(paddle_lite_jni op_list_h kernel_list_h)
if (LITE_WITH_NPU) if (LITE_WITH_NPU)
# Need to add HIAI runtime libs (libhiai.so) dependency # Need to add HIAI runtime libs (libhiai.so) dependency
target_link_libraries(paddle_lite_jni ${npu_runtime_libs}) target_link_libraries(paddle_lite_jni ${npu_builder_libs} ${npu_runtime_libs})
endif() endif()
endif() endif()
......
...@@ -120,6 +120,22 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_Tensor_nativeSetData___3B( ...@@ -120,6 +120,22 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_Tensor_nativeSetData___3B(
return JNI_TRUE; return JNI_TRUE;
} }
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_Tensor_nativeSetData___3I(
JNIEnv *env, jobject jtensor, jintArray buf) {
std::unique_ptr<Tensor> *tensor = get_writable_tensor_pointer(env, jtensor);
if (tensor == nullptr || (*tensor == nullptr)) {
return JNI_FALSE;
}
int64_t buf_size = (int64_t)env->GetArrayLength(buf);
if (buf_size != product((*tensor)->shape())) {
return JNI_FALSE;
}
int32_t *input = (*tensor)->mutable_data<int32_t>();
env->GetIntArrayRegion(buf, 0, buf_size, input);
return JNI_TRUE;
}
JNIEXPORT jfloatArray JNICALL JNIEXPORT jfloatArray JNICALL
Java_com_baidu_paddle_lite_Tensor_getFloatData(JNIEnv *env, jobject jtensor) { Java_com_baidu_paddle_lite_Tensor_getFloatData(JNIEnv *env, jobject jtensor) {
if (is_const_tensor(env, jtensor)) { if (is_const_tensor(env, jtensor)) {
...@@ -148,6 +164,20 @@ Java_com_baidu_paddle_lite_Tensor_getByteData(JNIEnv *env, jobject jtensor) { ...@@ -148,6 +164,20 @@ Java_com_baidu_paddle_lite_Tensor_getByteData(JNIEnv *env, jobject jtensor) {
} }
} }
JNIEXPORT jintArray JNICALL
Java_com_baidu_paddle_lite_Tensor_getIntData(JNIEnv *env, jobject jtensor) {
if (is_const_tensor(env, jtensor)) {
std::unique_ptr<const Tensor> *tensor =
get_read_only_tensor_pointer(env, jtensor);
return cpp_array_to_jintarray(
env, (*tensor)->data<int32_t>(), product((*tensor)->shape()));
} else {
std::unique_ptr<Tensor> *tensor = get_writable_tensor_pointer(env, jtensor);
return cpp_array_to_jintarray(
env, (*tensor)->data<int32_t>(), product((*tensor)->shape()));
}
}
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_Tensor_deleteCppTensor( JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_Tensor_deleteCppTensor(
JNIEnv *env, jobject jtensor, jlong java_pointer) { JNIEnv *env, jobject jtensor, jlong java_pointer) {
if (java_pointer == 0) { if (java_pointer == 0) {
......
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
#include <jni.h> #include <jni.h>
/* Header for class com_baidu_paddle_lite_Tensor */ /* Header for class com_baidu_paddle_lite_Tensor */
#ifndef PADDLE_FLUID_LITE_API_ANDROID_JNI_NATIVE_TENSOR_JNI_H_ #ifndef LITE_API_ANDROID_JNI_NATIVE_TENSOR_JNI_H_
#define PADDLE_FLUID_LITE_API_ANDROID_JNI_NATIVE_TENSOR_JNI_H_ #define LITE_API_ANDROID_JNI_NATIVE_TENSOR_JNI_H_
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
...@@ -49,6 +49,14 @@ Java_com_baidu_paddle_lite_Tensor_getFloatData(JNIEnv *, jobject); ...@@ -49,6 +49,14 @@ Java_com_baidu_paddle_lite_Tensor_getFloatData(JNIEnv *, jobject);
JNIEXPORT jbyteArray JNICALL JNIEXPORT jbyteArray JNICALL
Java_com_baidu_paddle_lite_Tensor_getByteData(JNIEnv *, jobject); Java_com_baidu_paddle_lite_Tensor_getByteData(JNIEnv *, jobject);
/*
* Class: com_baidu_paddle_lite_Tensor
* Method: getIntData
* Signature: ()[I
*/
JNIEXPORT jintArray JNICALL
Java_com_baidu_paddle_lite_Tensor_getIntData(JNIEnv *, jobject);
/* /*
* Class: com_baidu_paddle_lite_Tensor * Class: com_baidu_paddle_lite_Tensor
* Method: nativeResize * Method: nativeResize
...@@ -73,6 +81,14 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_Tensor_nativeSetData___3F( ...@@ -73,6 +81,14 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_Tensor_nativeSetData___3F(
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_Tensor_nativeSetData___3B( JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_Tensor_nativeSetData___3B(
JNIEnv *, jobject, jbyteArray); JNIEnv *, jobject, jbyteArray);
/*
* Class: com_baidu_paddle_lite_Tensor
* Method: nativeSetData
* Signature: ([I)Z
*/
JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_Tensor_nativeSetData___3I(
JNIEnv *, jobject, jintArray);
/* /*
* Class: com_baidu_paddle_lite_Tensor * Class: com_baidu_paddle_lite_Tensor
* Method: deleteCppTensor * Method: deleteCppTensor
...@@ -87,4 +103,4 @@ Java_com_baidu_paddle_lite_Tensor_deleteCppTensor(JNIEnv *, jobject, jlong); ...@@ -87,4 +103,4 @@ Java_com_baidu_paddle_lite_Tensor_deleteCppTensor(JNIEnv *, jobject, jlong);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
#endif // PADDLE_FLUID_LITE_API_ANDROID_JNI_NATIVE_TENSOR_JNI_H_ #endif // LITE_API_ANDROID_JNI_NATIVE_TENSOR_JNI_H_
...@@ -108,6 +108,19 @@ public class Tensor { ...@@ -108,6 +108,19 @@ public class Tensor {
return nativeSetData(buf); return nativeSetData(buf);
} }
/**
* Set the tensor int data.
*
* @param buf the int array buffer which will be copied into tensor.
* @return true if set data successfully.
*/
public boolean setData(int[] buf) {
if (readOnly) {
return false;
}
return nativeSetData(buf);
}
/** /**
* @return shape of the tensor as long array. * @return shape of the tensor as long array.
*/ */
...@@ -123,12 +136,19 @@ public class Tensor { ...@@ -123,12 +136,19 @@ public class Tensor {
*/ */
public native byte[] getByteData(); public native byte[] getByteData();
/**
* @return the tensor data as int array.
*/
public native int[] getIntData();
private native boolean nativeResize(long[] dims); private native boolean nativeResize(long[] dims);
private native boolean nativeSetData(float[] buf); private native boolean nativeSetData(float[] buf);
private native boolean nativeSetData(byte[] buf); private native boolean nativeSetData(byte[] buf);
private native boolean nativeSetData(int[] buf);
/** /**
* Delete C++ Tenor object pointed by the input pointer, which is presented by a * Delete C++ Tenor object pointed by the input pointer, which is presented by a
* long value. * long value.
......
...@@ -46,7 +46,6 @@ void OutputOptModel(const std::string& load_model_dir, ...@@ -46,7 +46,6 @@ void OutputOptModel(const std::string& load_model_dir,
config.set_model_dir(load_model_dir); config.set_model_dir(load_model_dir);
std::vector<Place> vaild_places = { std::vector<Place> vaild_places = {
Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)},
}; };
if (FLAGS_is_quantized_model) { if (FLAGS_is_quantized_model) {
vaild_places.insert(vaild_places.begin(), vaild_places.insert(vaild_places.begin(),
......
...@@ -139,22 +139,15 @@ std::vector<std::string> Predictor::GetOutputNames() { return output_names_; } ...@@ -139,22 +139,15 @@ std::vector<std::string> Predictor::GetOutputNames() { return output_names_; }
// append the names of inputs and outputs into input_names_ and output_names_ // append the names of inputs and outputs into input_names_ and output_names_
void Predictor::PrepareFeedFetch() { void Predictor::PrepareFeedFetch() {
std::vector<const cpp::OpDesc *> feeds;
std::vector<const cpp::OpDesc *> fetchs;
#if defined(LITE_WITH_NPU) || defined(LITE_WITH_XPU) || defined(LITE_WITH_BM)
// The shape of input tensors must be determined before generating NPU and XPU
// program.
auto current_block = program_desc_.GetBlock<cpp::BlockDesc>(0);
for (size_t i = 0; i < current_block->OpsSize(); i++) {
auto op = current_block->GetOp<cpp::OpDesc>(i);
#else
if (!program_) { if (!program_) {
GenRuntimeProgram(); GenRuntimeProgram();
} }
std::vector<const cpp::OpDesc *> feeds;
std::vector<const cpp::OpDesc *> fetchs;
const auto &insts = program_->instructions(); const auto &insts = program_->instructions();
for (size_t i = 0; i < program_->num_instructions(); i++) { for (size_t i = 0; i < program_->num_instructions(); i++) {
const auto &op = insts[i].op()->op_info(); const auto &op = insts[i].op()->op_info();
#endif
if (op->Type() == "feed") { if (op->Type() == "feed") {
feeds.push_back(op); feeds.push_back(op);
} else if (op->Type() == "fetch") { } else if (op->Type() == "fetch") {
......
...@@ -20,6 +20,12 @@ ...@@ -20,6 +20,12 @@
#include "lite/core/device_info.h" #include "lite/core/device_info.h"
#include "lite/core/version.h" #include "lite/core/version.h"
#if (defined LITE_WITH_X86) && (defined PADDLE_WITH_MKLML) && \
!(defined LITE_ON_MODEL_OPTIMIZE_TOOL)
#include <omp.h>
#include "lite/backends/x86/mklml.h"
#endif
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -33,6 +39,17 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) { ...@@ -33,6 +39,17 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
mode_ = config.power_mode(); mode_ = config.power_mode();
threads_ = config.threads(); threads_ = config.threads();
#if (defined LITE_WITH_X86) && (defined PADDLE_WITH_MKLML) && \
!(defined LITE_ON_MODEL_OPTIMIZE_TOOL)
int num_threads = config.cpu_math_library_num_threads();
int real_num_threads = num_threads > 1 ? num_threads : 1;
paddle::lite::x86::MKL_Set_Num_Threads(real_num_threads);
omp_set_num_threads(real_num_threads);
VLOG(3) << "set_cpu_math_library_math_threads() is set successfully and the "
"number of threads is:"
<< num_threads;
#endif
} }
std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInput(int i) { std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInput(int i) {
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <string>
#include <vector>
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/device_info.h"
#include "lite/core/profile/timer.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/string.h"
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/basic_profiler.h"
#endif // LITE_WITH_PROFILE
#include <thread> // NOLINT
using paddle::lite::profile::Timer;
DEFINE_string(input_shape,
"1,3,224,224",
"input shapes, separated by colon and comma");
DEFINE_string(model_dir_0, "", "model_dir_0");
DEFINE_string(input_shape_0,
"1,3,224,224",
"input shapes another, separated by colon and comma");
DEFINE_bool(use_optimize_nb,
false,
"optimized & naive buffer model for mobile devices");
DEFINE_int32(test_type, 0, "multithread test type");
namespace paddle {
namespace lite_api {
void OutputOptModel(const std::string& load_model_dir,
const std::string& save_optimized_model_dir,
const std::vector<std::vector<int64_t>>& input_shapes) {
lite_api::CxxConfig config;
config.set_model_dir(load_model_dir);
config.set_valid_places({
Place{TARGET(kARM), PRECISION(kFloat)},
});
auto predictor = lite_api::CreatePaddlePredictor(config);
// delete old optimized model
int ret = system(
paddle::lite::string_format("rm -rf %s", save_optimized_model_dir.c_str())
.c_str());
if (ret == 0) {
LOG(INFO) << "delete old optimized model " << save_optimized_model_dir;
}
predictor->SaveOptimizedModel(save_optimized_model_dir,
LiteModelType::kNaiveBuffer);
LOG(INFO) << "Load model from " << load_model_dir;
LOG(INFO) << "Save optimized model to " << save_optimized_model_dir;
}
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
void Run(const std::vector<std::vector<int64_t>>& input_shapes,
const std::string& model_dir,
const PowerMode power_mode,
const int thread_num,
const int repeat,
int tid,
const int warmup_times = 5) {
lite_api::MobileConfig config;
config.set_model_dir(model_dir);
config.set_power_mode(power_mode);
config.set_threads(thread_num);
auto predictor = lite_api::CreatePaddlePredictor(config);
for (int j = 0; j < input_shapes.size(); ++j) {
auto input_tensor = predictor->GetInput(j);
input_tensor->Resize(input_shapes[j]);
auto input_data = input_tensor->mutable_data<float>();
int input_num = 1;
for (int i = 0; i < input_shapes[j].size(); ++i) {
input_num *= input_shapes[j][i];
}
for (int i = 0; i < input_num; ++i) {
input_data[i] = 1.f;
}
}
for (int i = 0; i < warmup_times; ++i) {
predictor->Run();
}
Timer ti;
for (int j = 0; j < repeat; ++j) {
ti.Start();
predictor->Run();
float t = ti.Stop();
auto output = predictor->GetOutput(0);
auto out = output->data<float>();
LOG(INFO) << "[thread " << tid << "] Model: " << model_dir
<< " output[0]:" << out[0] << "; output[1]:" << out[1];
}
LOG(INFO) << "[thread " << tid << "] Model: " << model_dir
<< ", power_mode: " << static_cast<int>(power_mode)
<< ", threads num " << thread_num
<< ", avg time: " << ti.LapTimes().Avg() << "ms"
<< ", min time: " << ti.LapTimes().Min() << " ms"
<< ", max time: " << ti.LapTimes().Max() << " ms.";
}
void RunTestType_00(const std::vector<std::vector<int64_t>>& input_shapes,
const std::string& model_dir,
const PowerMode power_mode,
const int thread_num,
const int repeat,
const int warmup_times = 5) {
std::thread run_th0(Run,
input_shapes,
model_dir,
power_mode,
thread_num,
repeat,
0,
warmup_times);
Run(input_shapes, model_dir, power_mode, thread_num, repeat, 1, warmup_times);
run_th0.join();
}
void RunTestType_01(const std::vector<std::vector<int64_t>>& input_shapes,
const std::string& model_dir,
const std::vector<std::vector<int64_t>>& input_shapes_0,
const std::string& model_dir_0,
const PowerMode power_mode,
const int thread_num,
const int repeat,
const int warmup_times = 5) {
std::thread run_th0(Run,
input_shapes,
model_dir,
power_mode,
thread_num,
repeat,
0,
warmup_times);
Run(input_shapes_0,
model_dir_0,
power_mode,
thread_num,
repeat,
1,
warmup_times);
run_th0.join();
}
void run_with_predictor(std::shared_ptr<PaddlePredictor> predictor,
const std::vector<std::vector<int64_t>>& input_shapes,
int index,
const std::string& name) {
for (int j = 0; j < input_shapes.size(); ++j) {
auto input_tensor = predictor->GetInput(j);
input_tensor->Resize(input_shapes[j]);
auto input_data = input_tensor->mutable_data<float>();
int input_num = 1;
for (int i = 0; i < input_shapes[j].size(); ++i) {
input_num *= input_shapes[j][i];
}
for (int i = 0; i < input_num; ++i) {
input_data[i] = 1.f;
}
}
Timer ti;
ti.Start();
predictor->Run();
float t = ti.Stop();
auto output = predictor->GetOutput(0);
auto out = output->data<float>();
LOG(INFO) << "[thread " << index << "] name: " << name
<< ",run time: " << ti.LapTimes().Avg() << "ms"
<< " output[0]:" << out[0] << "; output[1]:" << out[1];
}
void RunTestType_10(const std::vector<std::vector<int64_t>>& input_shapes,
const std::string& model_dir,
const PowerMode power_mode,
const int thread_num,
const int repeat,
int warmup = 5) {
lite_api::MobileConfig config;
config.set_model_dir(model_dir);
config.set_power_mode(power_mode);
config.set_threads(thread_num);
auto predictor = lite_api::CreatePaddlePredictor(config);
for (int i = 0; i < repeat; ++i) {
std::thread pre_th0(
run_with_predictor, predictor, input_shapes, i, model_dir);
pre_th0.join();
}
}
void RunTestType_11(const std::vector<std::vector<int64_t>>& input_shapes,
const std::string& model_dir,
const std::vector<std::vector<int64_t>>& input_shapes_0,
const std::string& model_dir_0,
const PowerMode power_mode,
const int thread_num,
const int repeat,
int warmup = 5) {
lite_api::MobileConfig config;
config.set_model_dir(model_dir);
config.set_power_mode(power_mode);
config.set_threads(thread_num);
auto predictor = lite_api::CreatePaddlePredictor(config);
config.set_model_dir(model_dir_0);
auto predictor_0 = lite_api::CreatePaddlePredictor(config);
for (int i = 0; i < 2 * repeat; i += 2) {
std::thread pre_th0(
run_with_predictor, predictor, input_shapes, i, model_dir);
std::thread pre_th1(
run_with_predictor, predictor_0, input_shapes_0, i + 1, model_dir_0);
pre_th0.join();
pre_th1.join();
}
}
#endif
} // namespace lite_api
} // namespace paddle
int main(int argc, char** argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
if (FLAGS_model_dir == "") {
LOG(INFO) << "usage: "
<< "--model_dir /path/to/your/model";
exit(0);
}
std::string save_optimized_model_dir = "";
std::string save_optimized_model_dir_0 = "";
if (FLAGS_use_optimize_nb) {
save_optimized_model_dir = FLAGS_model_dir;
save_optimized_model_dir_0 = FLAGS_model_dir_0;
} else {
save_optimized_model_dir = FLAGS_model_dir + "opt2";
save_optimized_model_dir_0 = FLAGS_model_dir_0 + "opt2";
}
auto split_string =
[](const std::string& str_in) -> std::vector<std::string> {
std::vector<std::string> str_out;
std::string tmp_str = str_in;
while (!tmp_str.empty()) {
size_t next_offset = tmp_str.find(":");
str_out.push_back(tmp_str.substr(0, next_offset));
if (next_offset == std::string::npos) {
break;
} else {
tmp_str = tmp_str.substr(next_offset + 1);
}
}
return str_out;
};
auto get_shape = [](const std::string& str_shape) -> std::vector<int64_t> {
std::vector<int64_t> shape;
std::string tmp_str = str_shape;
while (!tmp_str.empty()) {
int dim = atoi(tmp_str.data());
shape.push_back(dim);
size_t next_offset = tmp_str.find(",");
if (next_offset == std::string::npos) {
break;
} else {
tmp_str = tmp_str.substr(next_offset + 1);
}
}
return shape;
};
std::vector<std::string> str_input_shapes = split_string(FLAGS_input_shape);
std::vector<std::vector<int64_t>> input_shapes;
for (int i = 0; i < str_input_shapes.size(); ++i) {
input_shapes.push_back(get_shape(str_input_shapes[i]));
}
std::vector<std::string> str_input_shapes_0 =
split_string(FLAGS_input_shape_0);
std::vector<std::vector<int64_t>> input_shapes_0;
for (int i = 0; i < str_input_shapes_0.size(); ++i) {
input_shapes_0.push_back(get_shape(str_input_shapes_0[i]));
}
if (!FLAGS_use_optimize_nb) {
// Output optimized model
paddle::lite_api::OutputOptModel(
FLAGS_model_dir, save_optimized_model_dir, input_shapes);
paddle::lite_api::OutputOptModel(
FLAGS_model_dir_0, save_optimized_model_dir_0, input_shapes_0);
}
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
// Run inference using optimized model
if (FLAGS_test_type == 0) {
paddle::lite_api::RunTestType_00(
input_shapes,
save_optimized_model_dir,
static_cast<paddle::lite_api::PowerMode>(0),
FLAGS_threads,
FLAGS_repeats,
5);
LOG(INFO) << "=========above is case 0, below is case "
"1============================";
paddle::lite_api::RunTestType_10(
input_shapes,
save_optimized_model_dir,
static_cast<paddle::lite_api::PowerMode>(0),
FLAGS_threads,
FLAGS_repeats);
}
if (FLAGS_test_type == 1) {
paddle::lite_api::RunTestType_01(
input_shapes,
save_optimized_model_dir,
input_shapes_0,
save_optimized_model_dir_0,
static_cast<paddle::lite_api::PowerMode>(0),
FLAGS_threads,
FLAGS_repeats,
5);
LOG(INFO) << "=========above is case 0, below is case "
"1============================";
paddle::lite_api::RunTestType_11(
input_shapes,
save_optimized_model_dir,
input_shapes_0,
save_optimized_model_dir_0,
static_cast<paddle::lite_api::PowerMode>(0),
FLAGS_threads,
FLAGS_repeats);
}
#endif
return 0;
}
...@@ -90,6 +90,10 @@ std::vector<Place> ParserValidPlaces() { ...@@ -90,6 +90,10 @@ std::vector<Place> ParserValidPlaces() {
TARGET(kARM)); // enable kARM CPU kernel when no opencl kernel TARGET(kARM)); // enable kARM CPU kernel when no opencl kernel
} else if (target_repr == "x86") { } else if (target_repr == "x86") {
valid_places.emplace_back(TARGET(kX86)); valid_places.emplace_back(TARGET(kX86));
} else if (target_repr == "npu") {
valid_places.emplace_back(TARGET(kNPU));
} else if (target_repr == "xpu") {
valid_places.emplace_back(TARGET(kXPU));
} else { } else {
LOG(FATAL) << lite::string_format( LOG(FATAL) << lite::string_format(
"Wrong target '%s' found, please check the command flag " "Wrong target '%s' found, please check the command flag "
......
...@@ -47,7 +47,6 @@ void OutputOptModel(const std::string& load_model_dir, ...@@ -47,7 +47,6 @@ void OutputOptModel(const std::string& load_model_dir,
lite_api::CxxConfig config; lite_api::CxxConfig config;
config.set_model_dir(load_model_dir); config.set_model_dir(load_model_dir);
config.set_valid_places({ config.set_valid_places({
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}, Place{TARGET(kARM), PRECISION(kFloat)},
}); });
auto predictor = lite_api::CreatePaddlePredictor(config); auto predictor = lite_api::CreatePaddlePredictor(config);
...@@ -72,10 +71,6 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes, ...@@ -72,10 +71,6 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
const int thread_num, const int thread_num,
const int repeat, const int repeat,
const int warmup_times = 0) { const int warmup_times = 0) {
#ifdef LITE_WITH_PROFILE
lite::profile::BasicProfiler<lite::profile::BasicTimer>::Global().SetWarmup(
warmup_times);
#endif
lite_api::MobileConfig config; lite_api::MobileConfig config;
config.set_model_dir(model_dir); config.set_model_dir(model_dir);
config.set_power_mode(power_mode); config.set_power_mode(power_mode);
......
...@@ -133,6 +133,7 @@ class LITE_API CxxConfig : public ConfigBase { ...@@ -133,6 +133,7 @@ class LITE_API CxxConfig : public ConfigBase {
std::string model_file_; std::string model_file_;
std::string param_file_; std::string param_file_;
bool model_from_memory_{false}; bool model_from_memory_{false};
int cpu_math_library_math_threads_ = 1;
public: public:
void set_valid_places(const std::vector<Place>& x) { valid_places_ = x; } void set_valid_places(const std::vector<Place>& x) { valid_places_ = x; }
...@@ -151,6 +152,13 @@ class LITE_API CxxConfig : public ConfigBase { ...@@ -151,6 +152,13 @@ class LITE_API CxxConfig : public ConfigBase {
std::string model_file() const { return model_file_; } std::string model_file() const { return model_file_; }
std::string param_file() const { return param_file_; } std::string param_file() const { return param_file_; }
bool model_from_memory() const { return model_from_memory_; } bool model_from_memory() const { return model_from_memory_; }
void set_cpu_math_library_num_threads(int threads) {
cpu_math_library_math_threads_ = threads;
}
int cpu_math_library_num_threads() const {
return cpu_math_library_math_threads_;
}
}; };
/// MobileConfig is the config for the light weight predictor, it will skip /// MobileConfig is the config for the light weight predictor, it will skip
......
...@@ -78,7 +78,8 @@ const std::string& PrecisionToStr(PrecisionType precision) { ...@@ -78,7 +78,8 @@ const std::string& PrecisionToStr(PrecisionType precision) {
} }
const std::string& DataLayoutToStr(DataLayoutType layout) { const std::string& DataLayoutToStr(DataLayoutType layout) {
static const std::string datalayout2string[] = {"unk", "NCHW", "any", "NHWC"}; static const std::string datalayout2string[] = {
"unk", "NCHW", "any", "NHWC", "ImageDefault", "ImageFolder", "ImageNW"};
auto x = static_cast<int>(layout); auto x = static_cast<int>(layout);
CHECK_LT(x, static_cast<int>(DATALAYOUT(NUM))); CHECK_LT(x, static_cast<int>(DATALAYOUT(NUM)));
return datalayout2string[x]; return datalayout2string[x];
...@@ -117,8 +118,13 @@ const std::string& PrecisionRepr(PrecisionType precision) { ...@@ -117,8 +118,13 @@ const std::string& PrecisionRepr(PrecisionType precision) {
} }
const std::string& DataLayoutRepr(DataLayoutType layout) { const std::string& DataLayoutRepr(DataLayoutType layout) {
static const std::string datalayout2string[] = { static const std::string datalayout2string[] = {"kUnk",
"kUnk", "kNCHW", "kAny", "kNHWC"}; "kNCHW",
"kAny",
"kNHWC",
"kImageDefault",
"kImageFolder",
"kImageNW"};
auto x = static_cast<int>(layout); auto x = static_cast<int>(layout);
CHECK_LT(x, static_cast<int>(DATALAYOUT(NUM))); CHECK_LT(x, static_cast<int>(DATALAYOUT(NUM)));
return datalayout2string[x]; return datalayout2string[x];
...@@ -149,8 +155,12 @@ std::set<PrecisionType> ExpandValidPrecisions(PrecisionType precision) { ...@@ -149,8 +155,12 @@ std::set<PrecisionType> ExpandValidPrecisions(PrecisionType precision) {
} }
std::set<DataLayoutType> ExpandValidLayouts(DataLayoutType layout) { std::set<DataLayoutType> ExpandValidLayouts(DataLayoutType layout) {
static const std::set<DataLayoutType> valid_set( static const std::set<DataLayoutType> valid_set({DATALAYOUT(kNCHW),
{DATALAYOUT(kNCHW), DATALAYOUT(kAny), DATALAYOUT(kNHWC)}); DATALAYOUT(kAny),
DATALAYOUT(kNHWC),
DATALAYOUT(kImageDefault),
DATALAYOUT(kImageFolder),
DATALAYOUT(kImageNW)});
if (layout == DATALAYOUT(kAny)) { if (layout == DATALAYOUT(kAny)) {
return valid_set; return valid_set;
} }
......
...@@ -72,8 +72,11 @@ enum class DataLayoutType : int { ...@@ -72,8 +72,11 @@ enum class DataLayoutType : int {
kUnk = 0, kUnk = 0,
kNCHW = 1, kNCHW = 1,
kNHWC = 3, kNHWC = 3,
kAny = 2, // any data layout kImageDefault = 4, // for opencl image2d
NUM = 4, // number of fields. kImageFolder = 5, // for opencl image2d
kImageNW = 6, // for opencl image2d
kAny = 2, // any data layout
NUM = 7, // number of fields.
}; };
typedef enum { typedef enum {
......
...@@ -20,15 +20,6 @@ USE_MIR_PASS(static_kernel_pick_pass); ...@@ -20,15 +20,6 @@ USE_MIR_PASS(static_kernel_pick_pass);
USE_MIR_PASS(variable_place_inference_pass); USE_MIR_PASS(variable_place_inference_pass);
USE_MIR_PASS(type_target_cast_pass); USE_MIR_PASS(type_target_cast_pass);
USE_MIR_PASS(generate_program_pass); USE_MIR_PASS(generate_program_pass);
#ifdef LITE_WITH_NPU
USE_MIR_PASS(generate_npu_program_pass);
#endif
#ifdef LITE_WITH_XPU
USE_MIR_PASS(generate_xpu_program_pass);
#endif
#ifdef LITE_WITH_BM
USE_MIR_PASS(generate_bm_program_pass);
#endif
USE_MIR_PASS(io_copy_kernel_pick_pass); USE_MIR_PASS(io_copy_kernel_pick_pass);
USE_MIR_PASS(argument_type_display_pass); USE_MIR_PASS(argument_type_display_pass);
...@@ -40,11 +31,16 @@ USE_MIR_PASS(lite_fc_fuse_pass); ...@@ -40,11 +31,16 @@ USE_MIR_PASS(lite_fc_fuse_pass);
USE_MIR_PASS(lite_shuffle_channel_fuse_pass); USE_MIR_PASS(lite_shuffle_channel_fuse_pass);
USE_MIR_PASS(lite_transpose_softmax_transpose_fuse_pass); USE_MIR_PASS(lite_transpose_softmax_transpose_fuse_pass);
USE_MIR_PASS(lite_interpolate_fuse_pass); USE_MIR_PASS(lite_interpolate_fuse_pass);
USE_MIR_PASS(lite_sequence_pool_concat_fuse_pass);
USE_MIR_PASS(identity_scale_eliminate_pass); USE_MIR_PASS(identity_scale_eliminate_pass);
USE_MIR_PASS(lite_conv_elementwise_fuse_pass); USE_MIR_PASS(lite_conv_elementwise_fuse_pass);
USE_MIR_PASS(lite_conv_activation_fuse_pass); USE_MIR_PASS(lite_conv_activation_fuse_pass);
USE_MIR_PASS(lite_var_conv_2d_activation_fuse_pass);
USE_MIR_PASS(lite_elementwise_add_activation_fuse_pass); USE_MIR_PASS(lite_elementwise_add_activation_fuse_pass);
USE_MIR_PASS(lite_quant_dequant_fuse_pass); USE_MIR_PASS(lite_quant_dequant_fuse_pass);
USE_MIR_PASS(type_precision_cast_pass); USE_MIR_PASS(type_precision_cast_pass);
USE_MIR_PASS(type_layout_cast_pass); USE_MIR_PASS(type_layout_cast_pass);
USE_MIR_PASS(memory_optimize_pass); USE_MIR_PASS(memory_optimize_pass);
USE_MIR_PASS(elementwise_mul_constant_eliminate_pass)
USE_MIR_PASS(npu_subgraph_pass);
USE_MIR_PASS(xpu_subgraph_pass);
...@@ -4,3 +4,6 @@ if (NOT LITE_ON_TINY_PUBLISH) ...@@ -4,3 +4,6 @@ if (NOT LITE_ON_TINY_PUBLISH)
endif() endif()
lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS}) lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})
if (LITE_ON_TINY_PUBLISH)
set_target_properties(lite_pybind PROPERTIES COMPILE_FLAGS "-flto -fdata-sections")
endif()
...@@ -165,6 +165,9 @@ void BindLitePlace(py::module *m) { ...@@ -165,6 +165,9 @@ void BindLitePlace(py::module *m) {
py::enum_<DataLayoutType>(*m, "DataLayoutType") py::enum_<DataLayoutType>(*m, "DataLayoutType")
.value("NCHW", DataLayoutType::kNCHW) .value("NCHW", DataLayoutType::kNCHW)
.value("NHWC", DataLayoutType::kNHWC) .value("NHWC", DataLayoutType::kNHWC)
.value("ImageDefault", DataLayoutType::kImageDefault)
.value("ImageFolder", DataLayoutType::kImageFolder)
.value("ImageNW", DataLayoutType::kImageNW)
.value("Any", DataLayoutType::kAny); .value("Any", DataLayoutType::kAny);
// Place // Place
......
...@@ -30,6 +30,7 @@ TEST(Step_rnn, test_step_rnn_lite_x86) { ...@@ -30,6 +30,7 @@ TEST(Step_rnn, test_step_rnn_lite_x86) {
std::string model_dir = FLAGS_model_dir; std::string model_dir = FLAGS_model_dir;
lite_api::CxxConfig config; lite_api::CxxConfig config;
config.set_model_dir(model_dir); config.set_model_dir(model_dir);
config.set_cpu_math_library_num_threads(1);
config.set_valid_places({lite_api::Place{TARGET(kX86), PRECISION(kInt64)}, config.set_valid_places({lite_api::Place{TARGET(kX86), PRECISION(kInt64)},
lite_api::Place{TARGET(kX86), PRECISION(kFloat)}, lite_api::Place{TARGET(kX86), PRECISION(kFloat)},
lite_api::Place{TARGET(kHost), PRECISION(kFloat)}}); lite_api::Place{TARGET(kHost), PRECISION(kFloat)}});
...@@ -48,7 +49,7 @@ TEST(Step_rnn, test_step_rnn_lite_x86) { ...@@ -48,7 +49,7 @@ TEST(Step_rnn, test_step_rnn_lite_x86) {
"micro_video_id", "micro_video_id",
"vertical_type_id"}; "vertical_type_id"};
for (int i = 0; i < target_names.size(); ++i) { for (size_t i = 0; i < target_names.size(); ++i) {
auto input_tensor = predictor->GetInput(i); auto input_tensor = predictor->GetInput(i);
int size = 0; int size = 0;
if (i == 6 || i == 8) { if (i == 6 || i == 8) {
...@@ -73,8 +74,7 @@ TEST(Step_rnn, test_step_rnn_lite_x86) { ...@@ -73,8 +74,7 @@ TEST(Step_rnn, test_step_rnn_lite_x86) {
predictor->Run(); predictor->Run();
} }
// LOG(INFO) << "================== Speed Report ==================="; LOG(INFO) << "warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
LOG(INFO) << ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0 << ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average."; << " ms in average.";
...@@ -85,8 +85,8 @@ TEST(Step_rnn, test_step_rnn_lite_x86) { ...@@ -85,8 +85,8 @@ TEST(Step_rnn, test_step_rnn_lite_x86) {
std::vector<int64_t> out_shape = out->shape(); std::vector<int64_t> out_shape = out->shape();
for (int i = 0; i < results.size(); ++i) { for (size_t i = 0; i < results.size(); ++i) {
for (int j = 0; j < results[i].size(); ++j) { for (size_t j = 0; j < results[i].size(); ++j) {
EXPECT_NEAR( EXPECT_NEAR(
out->data<float>()[j + (out_shape[1] * i)], results[i][j], 1e-6); out->data<float>()[j + (out_shape[1] * i)], results[i][j], 1e-6);
} }
......
...@@ -120,5 +120,7 @@ if (NOT HAS_ARM_MATH_LIB_DIR) ...@@ -120,5 +120,7 @@ if (NOT HAS_ARM_MATH_LIB_DIR)
stack.cc stack.cc
affine_channel.cc affine_channel.cc
anchor_generator.cc anchor_generator.cc
split_merge_lod_tenosr.cc
reduce_prod.cc
DEPS ${lite_kernel_deps} context tensor) DEPS ${lite_kernel_deps} context tensor)
endif() endif()
...@@ -26,31 +26,32 @@ namespace math { ...@@ -26,31 +26,32 @@ namespace math {
void concat_func(const std::vector<lite::Tensor *> &input, void concat_func(const std::vector<lite::Tensor *> &input,
const int axis, const int axis,
lite::Tensor *output) { lite::Tensor *output) {
size_t num = input.size(); int64_t concat_input_size = 1;
int rows = 1; int64_t num_cancats = 1;
auto dim_0 = input[0]->dims(); auto dim_0 = input[0]->dims();
for (int i = 0; i < axis; ++i) { size_t num = input.size();
rows *= dim_0[i]; for (int i = axis + 1; i < dim_0.size(); i++) {
concat_input_size *= dim_0[i];
} }
int out_rows = rows, out_cols = 0; for (int i = 0; i < axis; i++) {
num_cancats *= dim_0[i];
std::vector<int64_t> input_cols(input.size());
for (int i = 0; i < num; ++i) {
int t_cols = input[i]->numel() / rows;
out_cols += t_cols;
input_cols[i] = t_cols;
} }
float *dst_ptr = output->mutable_data<float>();
// computation const int out_concat_axis = output->dims()[axis];
for (int k = 0; k < out_rows; ++k) { int64_t offset_concat_axis = 0;
float *dst_ptr = output->mutable_data<float>() + k * out_cols; int64_t out_sum = out_concat_axis * concat_input_size;
int col_idx = 0; for (int n = 0; n < num; n++) {
for (int j = 0; j < num; ++j) { auto dims = input[n]->dims();
int col_len = input_cols[j]; const float *src_ptr = input[n]->data<float>();
const float *src_prt = input[j]->data<float>() + k * col_len; int64_t in_concat_axis = dims[axis];
std::memcpy(dst_ptr + col_idx, src_prt, sizeof(float) * col_len); float *dout_ptr = dst_ptr + offset_concat_axis * concat_input_size;
col_idx += col_len; int64_t in_sum = in_concat_axis * concat_input_size;
for (int i = 0; i < num_cancats; i++) {
std::memcpy(dout_ptr, src_ptr, sizeof(float) * in_sum);
dout_ptr += out_sum;
src_ptr += in_sum;
} }
offset_concat_axis += in_concat_axis;
} }
} }
......
...@@ -76,6 +76,7 @@ void conv_3x3s1_direct_fp32(const float* i_data, ...@@ -76,6 +76,7 @@ void conv_3x3s1_direct_fp32(const float* i_data,
const int threads = ctx->threads(); const int threads = ctx->threads();
int l2_size = ctx->llc_size() / sizeof(float); int l2_size = ctx->llc_size() / sizeof(float);
auto paddings = *param.paddings; auto paddings = *param.paddings;
auto act_param = param.activation_param;
const int pad_h = paddings[0]; const int pad_h = paddings[0];
const int pad_w = paddings[2]; const int pad_w = paddings[2];
...@@ -469,7 +470,8 @@ void conv_3x3s1_direct_fp32(const float* i_data, ...@@ -469,7 +470,8 @@ void conv_3x3s1_direct_fp32(const float* i_data,
oh, oh,
ow, ow,
flag_relu, flag_relu,
ptr_write); ptr_write,
&act_param);
} }
const float* weight_remain_ptr = weights + c_round_down * w_stride; const float* weight_remain_ptr = weights + c_round_down * w_stride;
#pragma omp parallel for num_threads(threads) #pragma omp parallel for num_threads(threads)
...@@ -780,7 +782,8 @@ void conv_3x3s1_direct_fp32(const float* i_data, ...@@ -780,7 +782,8 @@ void conv_3x3s1_direct_fp32(const float* i_data,
oh, oh,
ow, ow,
flag_relu, flag_relu,
ptr_write); ptr_write,
&act_param);
} }
} }
} }
......
...@@ -75,6 +75,7 @@ void conv_3x3s2_direct_fp32(const float* i_data, ...@@ -75,6 +75,7 @@ void conv_3x3s2_direct_fp32(const float* i_data,
//! prepack input to tmp buffer //! prepack input to tmp buffer
//! write output to tmp buffer //! write output to tmp buffer
auto paddings = *param.paddings; auto paddings = *param.paddings;
auto act_param = param.activation_param;
const int threads = ctx->threads(); const int threads = ctx->threads();
int l2_size = ctx->llc_size() / sizeof(float); int l2_size = ctx->llc_size() / sizeof(float);
const int pad_w = paddings[2]; const int pad_w = paddings[2];
...@@ -510,7 +511,8 @@ void conv_3x3s2_direct_fp32(const float* i_data, ...@@ -510,7 +511,8 @@ void conv_3x3s2_direct_fp32(const float* i_data,
oh, oh,
ow, ow,
flag_relu, flag_relu,
ptr_write); ptr_write,
&act_param);
} }
#pragma omp parallel for num_threads(threads) #pragma omp parallel for num_threads(threads)
...@@ -839,7 +841,8 @@ void conv_3x3s2_direct_fp32(const float* i_data, ...@@ -839,7 +841,8 @@ void conv_3x3s2_direct_fp32(const float* i_data,
oh, oh,
ow, ow,
flag_relu, flag_relu,
ptr_write); ptr_write,
&act_param);
} }
} }
} }
......
...@@ -37,6 +37,7 @@ void conv_3x3s1_depthwise_fp32(const float* i_data, ...@@ -37,6 +37,7 @@ void conv_3x3s1_depthwise_fp32(const float* i_data,
const float* weights, const float* weights,
const float* bias, const float* bias,
const operators::ConvParam& param, const operators::ConvParam& param,
const operators::ActivationParam act_param,
ARMContext* ctx); ARMContext* ctx);
void conv_3x3s2_depthwise_fp32(const float* i_data, void conv_3x3s2_depthwise_fp32(const float* i_data,
...@@ -51,6 +52,7 @@ void conv_3x3s2_depthwise_fp32(const float* i_data, ...@@ -51,6 +52,7 @@ void conv_3x3s2_depthwise_fp32(const float* i_data,
const float* weights, const float* weights,
const float* bias, const float* bias,
const operators::ConvParam& param, const operators::ConvParam& param,
const operators::ActivationParam act_param,
ARMContext* ctx); ARMContext* ctx);
void conv_depthwise_3x3s1_fp32(const float* din, void conv_depthwise_3x3s1_fp32(const float* din,
...@@ -66,7 +68,7 @@ void conv_depthwise_3x3s1_fp32(const float* din, ...@@ -66,7 +68,7 @@ void conv_depthwise_3x3s1_fp32(const float* din,
const float* bias, const float* bias,
int pad, int pad,
bool flag_bias, bool flag_bias,
bool flag_relu, const operators::ActivationParam act_param,
ARMContext* ctx); ARMContext* ctx);
void conv_depthwise_3x3s2_fp32(const float* din, void conv_depthwise_3x3s2_fp32(const float* din,
...@@ -82,7 +84,7 @@ void conv_depthwise_3x3s2_fp32(const float* din, ...@@ -82,7 +84,7 @@ void conv_depthwise_3x3s2_fp32(const float* din,
const float* bias, const float* bias,
int pad, int pad,
bool flag_bias, bool flag_bias,
bool flag_relu, const operators::ActivationParam act_param,
ARMContext* ctx); ARMContext* ctx);
template <typename Dtype> template <typename Dtype>
......
...@@ -579,11 +579,11 @@ void conv_depthwise_3x3_fp32(const void* din, ...@@ -579,11 +579,11 @@ void conv_depthwise_3x3_fp32(const void* din,
ARMContext* ctx, ARMContext* ctx,
const float* scale) { const float* scale) {
auto paddings = *param.paddings; auto paddings = *param.paddings;
auto act_param = param.activation_param;
const int pad_h = paddings[0]; const int pad_h = paddings[0];
const int pad_w = paddings[2]; const int pad_w = paddings[2];
int stride = param.strides[1]; int stride = param.strides[1];
int pad = pad_w; int pad = pad_w;
bool flag_relu = param.fuse_relu;
bool flag_bias = param.bias != nullptr; bool flag_bias = param.bias != nullptr;
bool pads_equal = bool pads_equal =
((paddings[0] == paddings[1]) && (paddings[2] == paddings[3])); ((paddings[0] == paddings[1]) && (paddings[2] == paddings[3]));
...@@ -602,7 +602,7 @@ void conv_depthwise_3x3_fp32(const void* din, ...@@ -602,7 +602,7 @@ void conv_depthwise_3x3_fp32(const void* din,
bias, bias,
pad, pad,
flag_bias, flag_bias,
flag_relu, act_param,
ctx); ctx);
} else { } else {
conv_3x3s1_depthwise_fp32(reinterpret_cast<const float*>(din), conv_3x3s1_depthwise_fp32(reinterpret_cast<const float*>(din),
...@@ -617,6 +617,7 @@ void conv_depthwise_3x3_fp32(const void* din, ...@@ -617,6 +617,7 @@ void conv_depthwise_3x3_fp32(const void* din,
reinterpret_cast<const float*>(weights), reinterpret_cast<const float*>(weights),
bias, bias,
param, param,
act_param,
ctx); ctx);
} }
...@@ -635,7 +636,7 @@ void conv_depthwise_3x3_fp32(const void* din, ...@@ -635,7 +636,7 @@ void conv_depthwise_3x3_fp32(const void* din,
bias, bias,
pad, pad,
flag_bias, flag_bias,
flag_relu, act_param,
ctx); ctx);
} else { } else {
conv_3x3s2_depthwise_fp32(reinterpret_cast<const float*>(din), conv_3x3s2_depthwise_fp32(reinterpret_cast<const float*>(din),
...@@ -650,6 +651,7 @@ void conv_depthwise_3x3_fp32(const void* din, ...@@ -650,6 +651,7 @@ void conv_depthwise_3x3_fp32(const void* din,
reinterpret_cast<const float*>(weights), reinterpret_cast<const float*>(weights),
bias, bias,
param, param,
act_param,
ctx); ctx);
} }
} else { } else {
......
...@@ -316,7 +316,9 @@ void fill_bias_int8(int* tensor, ...@@ -316,7 +316,9 @@ void fill_bias_int8(int* tensor,
int channel_size); int channel_size);
// new winograd // new winograd
void weight_trans_c4( void weight_trans_c4_8x8(
float* dest, const float* src, int ic, int oc, void* workspace);
void weight_trans_c4_4x4(
float* dest, const float* src, int ic, int oc, void* workspace); float* dest, const float* src, int ic, int oc, void* workspace);
void conv_compute_6x6_3x3(const float* input, void conv_compute_6x6_3x3(const float* input,
float* output, float* output,
...@@ -331,6 +333,32 @@ void conv_compute_6x6_3x3(const float* input, ...@@ -331,6 +333,32 @@ void conv_compute_6x6_3x3(const float* input,
const float* bias, const float* bias,
const operators::ConvParam& param, const operators::ConvParam& param,
ARMContext* ctx); ARMContext* ctx);
void conv_compute_2x2_3x3(const float* input,
float* output,
int num,
int chout,
int hout,
int wout,
int chin,
int hin,
int win,
const float* weight,
const float* bias,
const operators::ConvParam& param,
ARMContext* ctx);
void conv_compute_2x2_3x3_small(const float* input,
float* output,
int num,
int chout,
int hout,
int wout,
int chin,
int hin,
int win,
const float* weight,
const float* bias,
const operators::ConvParam& param,
ARMContext* ctx);
} // namespace math } // namespace math
} // namespace arm } // namespace arm
} // namespace lite } // namespace lite
......
...@@ -557,6 +557,52 @@ void elementwise_mul<float>(const float* dinx, ...@@ -557,6 +557,52 @@ void elementwise_mul<float>(const float* dinx,
} }
} }
template <>
void elementwise_mul<int>(const int* dinx,
const int* diny,
int* dout,
int num) {
int cnt = num >> 4;
int remain = num % 16;
#pragma omp parallel for
for (int i = 0; i < cnt; ++i) {
const int* dinx_ptr = dinx + (i << 4);
const int* diny_ptr = diny + (i << 4);
int* dout_ptr = dout + (i << 4);
int32x4_t dinx0 = vld1q_s32(dinx_ptr);
int32x4_t dinx1 = vld1q_s32(dinx_ptr + 4);
int32x4_t dinx2 = vld1q_s32(dinx_ptr + 8);
int32x4_t dinx3 = vld1q_s32(dinx_ptr + 12);
int32x4_t diny0 = vld1q_s32(diny_ptr);
int32x4_t diny1 = vld1q_s32(diny_ptr + 4);
int32x4_t diny2 = vld1q_s32(diny_ptr + 8);
int32x4_t diny3 = vld1q_s32(diny_ptr + 12);
dinx0 = vmulq_s32(dinx0, diny0);
dinx1 = vmulq_s32(dinx1, diny1);
dinx2 = vmulq_s32(dinx2, diny2);
dinx3 = vmulq_s32(dinx3, diny3);
vst1q_s32(dout_ptr, dinx0);
vst1q_s32(dout_ptr + 4, dinx1);
vst1q_s32(dout_ptr + 8, dinx2);
vst1q_s32(dout_ptr + 12, dinx3);
}
if (remain > 0) {
const int* dinx_ptr = dinx + (cnt << 4);
const int* diny_ptr = diny + (cnt << 4);
int* dout_ptr = dout + (cnt << 4);
for (int i = 0; i < remain; i++) {
*dout_ptr = *dinx_ptr * *diny_ptr;
dout_ptr++;
dinx_ptr++;
diny_ptr++;
}
}
}
template <> template <>
void elementwise_mul_relu<float>(const float* dinx, void elementwise_mul_relu<float>(const float* dinx,
const float* diny, const float* diny,
...@@ -678,6 +724,73 @@ void elementwise_mul_broadcast<float>(const float* dinx, ...@@ -678,6 +724,73 @@ void elementwise_mul_broadcast<float>(const float* dinx,
} }
} }
template <>
void elementwise_mul_broadcast<int>(const int* dinx,
const int* diny,
int* dout,
int batch,
int channels,
int num) {
#pragma omp parallel for collapse(2)
for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num;
const int* din_ptr = dinx + offset;
const int diny_data = diny[j];
int* dout_ptr = dout + offset;
int cnt = num >> 4;
int remain = num % 16;
int32x4_t rb = vdupq_n_s32(diny_data);
for (int k = 0; k < cnt; ++k) {
int32x4_t din0 = vld1q_s32(din_ptr);
int32x4_t din1 = vld1q_s32(din_ptr + 4);
int32x4_t din2 = vld1q_s32(din_ptr + 8);
int32x4_t din3 = vld1q_s32(din_ptr + 12);
din0 = vmulq_s32(din0, rb);
din1 = vmulq_s32(din1, rb);
din2 = vmulq_s32(din2, rb);
din3 = vmulq_s32(din3, rb);
vst1q_s32(dout_ptr, din0);
vst1q_s32(dout_ptr + 4, din1);
vst1q_s32(dout_ptr + 8, din2);
vst1q_s32(dout_ptr + 12, din3);
din_ptr += 16;
dout_ptr += 16;
}
if (remain >= 8) {
int32x4_t din0 = vld1q_s32(din_ptr);
int32x4_t din1 = vld1q_s32(din_ptr + 4);
din0 = vmulq_s32(din0, rb);
din1 = vmulq_s32(din1, rb);
vst1q_s32(dout_ptr, din0);
vst1q_s32(dout_ptr + 4, din1);
din_ptr += 8;
dout_ptr += 8;
remain -= 8;
}
if (remain >= 4) {
int32x4_t din0 = vld1q_s32(din_ptr);
din0 = vmulq_s32(din0, rb);
vst1q_s32(dout_ptr, din0);
din_ptr += 4;
dout_ptr += 4;
remain -= 4;
}
if (remain > 0) {
for (int p = 0; p < remain; ++p) {
*dout_ptr = *din_ptr * diny_data;
dout_ptr++;
din_ptr++;
}
}
}
}
}
template <> template <>
void elementwise_mul_relu_broadcast<float>(const float* dinx, void elementwise_mul_relu_broadcast<float>(const float* dinx,
const float* diny, const float* diny,
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#include "lite/backends/arm/math/prior_box.h" #include "lite/backends/arm/math/prior_box.h"
#include "lite/backends/arm/math/reduce_max.h" #include "lite/backends/arm/math/reduce_max.h"
#include "lite/backends/arm/math/reduce_mean.h" #include "lite/backends/arm/math/reduce_mean.h"
#include "lite/backends/arm/math/reduce_prod.h"
#include "lite/backends/arm/math/scale.h" #include "lite/backends/arm/math/scale.h"
#include "lite/backends/arm/math/sequence_expand.h" #include "lite/backends/arm/math/sequence_expand.h"
#include "lite/backends/arm/math/sequence_pool.h" #include "lite/backends/arm/math/sequence_pool.h"
...@@ -61,6 +62,7 @@ ...@@ -61,6 +62,7 @@
#include "lite/backends/arm/math/slice.h" #include "lite/backends/arm/math/slice.h"
#include "lite/backends/arm/math/softmax.h" #include "lite/backends/arm/math/softmax.h"
#include "lite/backends/arm/math/split.h" #include "lite/backends/arm/math/split.h"
#include "lite/backends/arm/math/split_merge_lod_tenosr.h"
#include "lite/backends/arm/math/stack.h" #include "lite/backends/arm/math/stack.h"
#include "lite/backends/arm/math/topk.h" #include "lite/backends/arm/math/topk.h"
#include "lite/backends/arm/math/yolo_box.h" #include "lite/backends/arm/math/yolo_box.h"
......
...@@ -477,17 +477,23 @@ void nearest_interp(const float* src, ...@@ -477,17 +477,23 @@ void nearest_interp(const float* src,
float scale_h_new = (with_align) float scale_h_new = (with_align)
? (static_cast<float>(h_in - 1) / (h_out - 1)) ? (static_cast<float>(h_in - 1) / (h_out - 1))
: (static_cast<float>(h_in) / (h_out)); : (static_cast<float>(h_in) / (h_out));
if (with_align) {
#pragma omp parallel for collapse(2) schedule(static) for (int h = 0; h < h_out; ++h) {
for (int h = 0; h < h_out; ++h) { float* dst_p = dst + h * w_out;
for (int w = 0; w < w_out; ++w) { int near_y = static_cast<int>(scale_h_new * h + 0.5);
int near_x = (with_align) ? static_cast<int>(scale_w_new * w + 0.5) for (int w = 0; w < w_out; ++w) {
: static_cast<int>(scale_w_new * w); int near_x = static_cast<int>(scale_w_new * w + 0.5);
int near_y = (with_align) ? static_cast<int>(scale_h_new * h + 0.5) *dst_p++ = src[near_y * w_in + near_x];
: static_cast<int>(scale_h_new * h); }
near_x = near_x < 0 ? 0 : near_x; }
near_y = near_y < 0 ? 0 : near_y; } else {
dst[h * w_out + w] = src[near_y * w_in + near_x]; for (int h = 0; h < h_out; ++h) {
float* dst_p = dst + h * w_out;
int near_y = static_cast<int>(scale_h_new * h);
for (int w = 0; w < w_out; ++w) {
int near_x = static_cast<int>(scale_w_new * w);
*dst_p++ = src[near_y * w_in + near_x];
}
} }
} }
} }
...@@ -520,9 +526,9 @@ void interpolate(lite::Tensor* X, ...@@ -520,9 +526,9 @@ void interpolate(lite::Tensor* X,
} }
auto out_size = OutSize; auto out_size = OutSize;
if (out_size != nullptr) { if (out_size != nullptr) {
auto out_size_data = get_new_data_from_tensor<float>(out_size); auto out_size_data = get_new_data_from_tensor<int>(out_size);
out_height = static_cast<int>(out_size_data[0]); out_height = out_size_data[0];
out_width = static_cast<int>(out_size_data[1]); out_width = out_size_data[1];
} }
} }
float height_scale = scale; float height_scale = scale;
...@@ -544,8 +550,10 @@ void interpolate(lite::Tensor* X, ...@@ -544,8 +550,10 @@ void interpolate(lite::Tensor* X,
int out_w = Out->dims()[3]; int out_w = Out->dims()[3];
int spatial_in = in_h * in_w; int spatial_in = in_h * in_w;
int spatial_out = out_h * out_w; int spatial_out = out_h * out_w;
for (int i = 0; i < count; ++i) {
if ("Bilinear" == interpolate_type) { if ("Bilinear" == interpolate_type) {
#pragma omp parallel for
for (int i = 0; i < count; ++i) {
bilinear_interp(din + spatial_in * i, bilinear_interp(din + spatial_in * i,
in_w, in_w,
in_h, in_h,
...@@ -555,7 +563,10 @@ void interpolate(lite::Tensor* X, ...@@ -555,7 +563,10 @@ void interpolate(lite::Tensor* X,
1.f / width_scale, 1.f / width_scale,
1.f / height_scale, 1.f / height_scale,
with_align); with_align);
} else if ("Nearest" == interpolate_type) { }
} else if ("Nearest" == interpolate_type) {
#pragma omp parallel for
for (int i = 0; i < count; ++i) {
nearest_interp(din + spatial_in * i, nearest_interp(din + spatial_in * i,
in_w, in_w,
in_h, in_h,
......
...@@ -47,6 +47,13 @@ void sgemm_prepack_c4_small(int M, ...@@ -47,6 +47,13 @@ void sgemm_prepack_c4_small(int M,
bool has_bias, bool has_bias,
bool has_relu, bool has_relu,
ARMContext* ctx); ARMContext* ctx);
void sgemm_prepack_c4_small(int M,
int N,
int K,
const float* A_packed,
const float* B,
float* C,
ARMContext* ctx);
} // namespace math } // namespace math
} // namespace arm } // namespace arm
} // namespace lite } // namespace lite
......
...@@ -167,7 +167,7 @@ void pooling_basic(const float* din, ...@@ -167,7 +167,7 @@ void pooling_basic(const float* din,
"ld1 {v2.4s-v3.4s}, [%[data_in_channel]], #32 \n" \ "ld1 {v2.4s-v3.4s}, [%[data_in_channel]], #32 \n" \
"fmax v6.4s, v4.4s, v5.4s \n" \ "fmax v6.4s, v4.4s, v5.4s \n" \
"subs %w[cnt], %w[cnt], #1 \n" \ "subs %w[cnt], %w[cnt], #1 \n" \
"fmax %w[vmax].4s, %w[vmax].4s, v6.4s \n" \ "fmax %[vmax].4s, %[vmax].4s, v6.4s \n" \
"bne 1b \n" "bne 1b \n"
#define GLOBAL_AVG \ #define GLOBAL_AVG \
"1: \n" \ "1: \n" \
...@@ -176,7 +176,7 @@ void pooling_basic(const float* din, ...@@ -176,7 +176,7 @@ void pooling_basic(const float* din,
"ld1 {v0.4s-v1.4s}, [%[data_in_channel]], #32 \n" \ "ld1 {v0.4s-v1.4s}, [%[data_in_channel]], #32 \n" \
"fadd %[vsum].4s, %[vsum].4s, v3.4s \n" \ "fadd %[vsum].4s, %[vsum].4s, v3.4s \n" \
"subs %w[cnt], %w[cnt], #1 \n" \ "subs %w[cnt], %w[cnt], #1 \n" \
"fadd %w[vsum].4s, %w[vsum].4s, v4.4s \n" \ "fadd %[vsum].4s, %[vsum].4s, v4.4s \n" \
"ld1 {v2.4s-v3.4s}, [%[data_in_channel]], #32 \n" \ "ld1 {v2.4s-v3.4s}, [%[data_in_channel]], #32 \n" \
"bne 1b \n" "bne 1b \n"
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "lite/backends/arm/math/reduce_prod.h"
namespace paddle {
namespace lite {
namespace arm {
namespace math {} // namespace math
} // namespace arm
} // namespace lite
} // namespace paddle
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "lite/core/tensor.h"
namespace paddle {
namespace lite {
namespace arm {
namespace math {
template <typename T>
void reduce_prod_n(const T* src,
T* dst,
int num_in,
int channel_in,
int height_in,
int width_in) {
int hw_size = height_in * width_in;
int chw_size = channel_in * hw_size;
int data_index, src_index, src_index0;
for (int c = 0; c < channel_in; ++c) {
for (int h = 0; h < height_in; ++h) {
for (int w = 0; w < width_in; ++w) {
data_index = c * hw_size + h * width_in + w;
dst[data_index] = static_cast<T>(1);
for (int n = 0; n < num_in; ++n) {
src_index = n * chw_size + data_index;
dst[data_index] *= src[src_index];
}
}
}
}
}
template <typename T>
void reduce_prod_c(const T* src,
T* dst,
int num_in,
int channel_in,
int height_in,
int width_in) {
int hw_size = height_in * width_in;
int chw_size = hw_size * channel_in;
int data_index, src_index0, src_index;
for (int n = 0; n < num_in; ++n) {
for (int h = 0; h < height_in; ++h) {
for (int w = 0; w < width_in; ++w) {
data_index = n * hw_size + h * width_in + w;
src_index0 = n * chw_size + h * width_in + w;
dst[data_index] = static_cast<T>(1);
for (int c = 0; c < channel_in; ++c) {
src_index = src_index0 + c * hw_size;
dst[data_index] *= src[src_index];
}
}
}
}
}
template <typename T>
void reduce_prod_h(const T* src,
T* dst,
int num_in,
int channel_in,
int height_in,
int width_in) {
int cw_size = channel_in * width_in;
int chw_size = cw_size * height_in;
int hw_size = height_in * width_in;
int data_index, src_index, src_index0;
for (int n = 0; n < num_in; ++n) {
for (int c = 0; c < channel_in; ++c) {
for (int w = 0; w < width_in; ++w) {
data_index = n * cw_size + c * width_in + w;
src_index0 = n * chw_size + c * hw_size + w;
dst[data_index] = static_cast<T>(1);
for (int h = 0; h < height_in; ++h) {
src_index = src_index0 + h * width_in;
dst[data_index] *= src[src_index];
}
}
}
}
}
template <typename T>
void reduce_prod_w(const T* src,
T* dst,
int num_in,
int channel_in,
int height_in,
int width_in) {
int ch_size = channel_in * height_in;
int hw_size = height_in * width_in;
int chw_size = ch_size * width_in;
int data_index = 0;
int src_index0 = 0;
int src_index = 0;
for (int n = 0; n < num_in; ++n) {
for (int c = 0; c < channel_in; ++c) {
for (int h = 0; h < height_in; ++h) {
data_index = n * ch_size + c * height_in + h;
src_index0 = n * chw_size + c * hw_size + h * width_in;
dst[data_index] = static_cast<T>(1);
for (int w = 0; w < width_in; ++w) {
src_index = src_index0 + w;
dst[data_index] *= src[src_index];
}
}
}
}
}
template <typename T>
void reduce_prod_nc(const T* src,
T* dst,
int num_in,
int channel_in,
int height_in,
int width_in) {
// reduce n first.
DDimLite ddimA({1, channel_in, height_in, width_in});
lite::Tensor tensor_tmp;
tensor_tmp.Resize(ddimA);
auto* tmp_out = tensor_tmp.mutable_data<T>();
reduce_prod_n(src, tmp_out, num_in, channel_in, height_in, width_in);
reduce_prod_c(tmp_out, dst, 1, channel_in, height_in, width_in);
}
template <typename T>
void reduce_prod_ch(const T* src,
T* dst,
int num_in,
int channel_in,
int height_in,
int width_in) {
// reduce c first
DDimLite ddimA({num_in, 1, height_in, width_in});
lite::Tensor tensor_tmp;
tensor_tmp.Resize(ddimA);
auto* tmp_out = tensor_tmp.mutable_data<T>();
reduce_prod_c(src, tmp_out, num_in, channel_in, height_in, width_in);
reduce_prod_h(tmp_out, dst, num_in, 1, height_in, width_in);
}
template <typename T>
void reduce_prod_hw(const T* src,
T* dst,
int num_in,
int channel_in,
int height_in,
int width_in) {
// reduce h first
DDimLite ddimA({num_in, channel_in, 1, width_in});
lite::Tensor tensor_tmp;
tensor_tmp.Resize(ddimA);
auto* tmp_out = tensor_tmp.mutable_data<T>();
reduce_prod_h(src, tmp_out, num_in, channel_in, height_in, width_in);
reduce_prod_w(tmp_out, dst, num_in, channel_in, 1, width_in);
}
template <typename T>
void reduce_prod_all(const T* src, T* dst, int64_t total_num) {
dst[0] = static_cast<T>(1);
for (int n = 0; n < total_num; ++n) {
dst[0] *= src[n];
}
}
} // namespace math
} // namespace arm
} // namespace lite
} // namespace paddle
...@@ -86,6 +86,13 @@ template void slice(const int* input, ...@@ -86,6 +86,13 @@ template void slice(const int* input,
std::vector<int> ends, std::vector<int> ends,
int* out, int* out,
Context<TARGET(kARM)>* ctx); Context<TARGET(kARM)>* ctx);
template void slice(const float* input,
std::vector<int64_t> dims,
std::vector<int> axes,
std::vector<int> starts,
std::vector<int> ends,
float* out,
Context<TARGET(kARM)>* ctx);
} // namespace math } // namespace math
} // namespace arm } // namespace arm
......
...@@ -70,10 +70,12 @@ void split<float>(const float* din, ...@@ -70,10 +70,12 @@ void split<float>(const float* din,
int in_after = in_strides[axis]; int in_after = in_strides[axis];
int out_after = out_strides[axis]; int out_after = out_strides[axis];
const float* din_ptr = din + input_offset;
for (int i = 0; i < before; ++i) { for (int i = 0; i < before; ++i) {
split_cpy(din + input_offset + i * in_after, std::memcpy(out_data, din_ptr, sizeof(float) * out_after);
out_data + i * out_after, din_ptr += in_after;
out_after); out_data += out_after;
} }
input_offset += out_strides[axis]; input_offset += out_strides[axis];
} }
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/arm/math/split_merge_lod_tenosr.h"
#include <utility>
#include <vector>
namespace paddle {
namespace lite {
namespace arm {
namespace math {
using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod,
size_t start_idx,
size_t end_idx,
size_t start_level) {
LoD sub_lod;
for (size_t level_idx = start_level; level_idx < lod.size(); ++level_idx) {
CHECK(start_idx <= end_idx);
CHECK(end_idx < lod[level_idx].size());
std::vector<uint64_t> level_lens;
for (size_t i = start_idx; i < end_idx; ++i) {
level_lens.push_back(lod[level_idx][i + 1] - lod[level_idx][i]);
}
sub_lod.emplace_back(level_lens);
start_idx = lod[level_idx][start_idx];
end_idx = lod[level_idx][end_idx];
}
return LoDAndOffset{sub_lod, {start_idx, end_idx}};
}
void AppendLoD(LoD *lod, const LoD &lod_length) {
CHECK(lod->empty() || lod->size() == lod_length.size());
if (lod->empty()) {
for (size_t i = 0; i < lod_length.size(); ++i) {
lod->emplace_back(std::vector<uint64_t>({0}));
}
}
for (size_t i = 0; i < lod->size(); ++i) {
auto &level = (*lod)[i];
for (auto len : lod_length[i]) {
level.push_back(level.back() + len);
}
}
}
} // namespace math
} // namespace arm
} // namespace lite
} // namespace paddle
此差异已折叠。
...@@ -2,5 +2,4 @@ if (NOT LITE_WITH_BM) ...@@ -2,5 +2,4 @@ if (NOT LITE_WITH_BM)
return() return()
endif() endif()
lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc bm_context.cc DEPS ${bm_runtime_libs}) lite_cc_library(target_wrapper_bm SRCS target_wrapper.cc DEPS ${bm_runtime_libs})
lite_cc_library(bm_builder SRCS builder.cc DEPS ${bm_builder_libs})
...@@ -89,9 +89,15 @@ bool CudnnConv2D<PRECISION(kFloat)>::create(const operators::ConvParam& param, ...@@ -89,9 +89,15 @@ bool CudnnConv2D<PRECISION(kFloat)>::create(const operators::ConvParam& param,
this->act_desc_, CUDNN_ACTIVATION_RELU, CUDNN_NOT_PROPAGATE_NAN, 0.0)); this->act_desc_, CUDNN_ACTIVATION_RELU, CUDNN_NOT_PROPAGATE_NAN, 0.0));
} }
#if CUDNN_VERSION_MIN(7, 0, 0)
cudnnMathType_t math_type =
use_tensor_core_ ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH;
CUDNN_CHECK(cudnnSetConvolutionMathType(this->conv_desc_, math_type));
#endif
if (ic == param.groups && ic == oc && ic != 1) { if (ic == param.groups && ic == oc && ic != 1) {
this->fwd_algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; this->fwd_algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
} else if (1) { } else if (!param.var_length) {
const auto* i_data = param.x->data<float>(); const auto* i_data = param.x->data<float>();
const auto* w_data = param.filter->data<float>(); const auto* w_data = param.filter->data<float>();
auto* o_data = param.output->mutable_data<float>(TARGET(kCUDA)); auto* o_data = param.output->mutable_data<float>(TARGET(kCUDA));
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -32,6 +32,5 @@ class PE { ...@@ -32,6 +32,5 @@ class PE {
virtual ~PE() {} virtual ~PE() {}
}; };
} // namespace zynqmp } // namespace zynqmp
} // namespace paddle } // namespace paddle
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册