未验证 提交 34e73be0 编写于 作者: myq406450149's avatar myq406450149 提交者: GitHub

Merge pull request #1 from PaddlePaddle/develop

pull paddle lite develop
......@@ -34,6 +34,7 @@
.DS_Store
build/
build_fpga/
.idea/
......@@ -71,6 +72,9 @@ build
cmake-build-debug
cmake-build-release
# vscode
.vscode
# ios
tools/libomp.a
......
......@@ -36,7 +36,7 @@ repos:
entry: bash ./tools/codestyle/cpplint_pre_commit.hook
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
exclude: ^(mobile/|metal/|web/)
exclude: ^(mobile/) | ^(metal/) | ^(web/)
#- repo: local
#hooks:
#- id: pylint-doc-string
......
......@@ -47,33 +47,19 @@ include(simd)
################################ Exposed Configurations #######################################
lite_option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
lite_option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ON IF ${AVX_FOUND})
lite_option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
lite_option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
lite_option(WITH_MKL "Compile PaddlePaddle with MKL support." ON IF ${AVX_FOUND})
lite_option(WITH_ARM_DOTPROD "Compile PaddlePaddle with ARM dot production" ON)
lite_option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
if(ANDROID OR IOS OR ARMLINUX)
set(WITH_GPU OFF CACHE STRING
"Disable GPU when cross-compiling for Android and iOS" FORCE)
set(WITH_DSO OFF CACHE STRING
"Disable DSO when cross-compiling for Android and iOS" FORCE)
set(WITH_AVX OFF CACHE STRING
"Disable AVX when cross-compiling for Android and iOS" FORCE)
set(WITH_PYTHON OFF CACHE STRING
"Disable PYTHON when cross-compiling for Android and iOS" FORCE)
set(WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android and iOS" FORCE)
set(WITH_MKL OFF CACHE STRING
"Disable MKL when cross-compiling for Android and iOS" FORCE)
endif()
# for lite, both server and mobile framework.
lite_option(LITE_WITH_JAVA "Enable Java JNI lib in lite mode" OFF)
lite_option(LITE_WITH_PYTHON "Enable Python api lib in lite mode" OFF)
lite_option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
lite_option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
lite_option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
lite_option(LITE_WITH_NPU "Enable NPU in lite mode" OFF)
lite_option(LITE_WITH_XPU "Enable XPU in lite mode" OFF)
lite_option(LITE_WITH_OPENMP "Enable OpenMP in lite framework" ON)
lite_option(LITE_WITH_OPENCL "Enable OpenCL support in lite" OFF)
lite_option(LITE_WITH_FPGA "Enable FPGA support in lite" OFF)
......@@ -82,8 +68,29 @@ lite_option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF)
lite_option(LITE_WITH_PRECISION_PROFILE "Enable precision profile in profile mode ON in lite" OFF IF LITE_WITH_PROFILE)
lite_option(LITE_SHUTDOWN_LOG "Shutdown log system or not." OFF)
lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib." OFF)
lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF)
# publish options
lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF)
lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF)
# TODO(Superjomn) Remove WITH_ANAKIN option if not needed latter.
if(ANDROID OR IOS OR ARMLINUX)
set(WITH_GPU OFF CACHE STRING
"Disable GPU when cross-compiling for Android and iOS" FORCE)
set(WITH_DSO OFF CACHE STRING
"Disable DSO when cross-compiling for Android and iOS" FORCE)
set(WITH_AVX OFF CACHE STRING
"Disable AVX when cross-compiling for Android and iOS" FORCE)
set(WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android and iOS" FORCE)
set(WITH_MKL OFF CACHE STRING
"Disable MKL when cross-compiling for Android and iOS" FORCE)
endif()
if(ANDROID OR IOS)
set(LITE_WITH_PYTHON OFF CACHE STRING
"Disable PYTHON when cross-compiling for Android and iOS" FORCE)
endif()
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.")
......@@ -94,6 +101,7 @@ if(NOT CMAKE_BUILD_TYPE)
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif()
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
# check options
if (LITE_ON_TINY_PUBLISH)
......@@ -104,6 +112,15 @@ if (LITE_ON_TINY_PUBLISH)
endif()
include_directories("${PADDLE_SOURCE_DIR}")
# the generated header files.
set(LITE_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}")
include_directories("${LITE_GENERATED_INCLUDE_DIR}")
if (LITE_WITH_PYTHON)
include(external/python) # download, build, install python
include(external/pybind11) # download, build, install pybind11
endif()
# for mobile
if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
......@@ -168,10 +185,15 @@ if(LITE_WITH_CUDA)
include(cuda)
endif()
if(LITE_WITH_XPU)
include(xpu)
endif()
include(generic) # simplify cmake module
include(ccache) # set ccache for compilation
include(util) # set unittest and link libs
include(version) # set PADDLE_VERSION
include(flags)
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
......
......@@ -3,14 +3,14 @@
# Paddle Lite
<!--[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle-Lite.svg?branch=develop&longCache=true&style=flat-square)](https://travis-ci.org/PaddlePaddle/Paddle-Lite)-->
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://github.com/PaddlePaddle/Paddle-Lite/wiki)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://paddlepaddle.github.io/Paddle-Lite/)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
<!-- [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle-Mobile.svg)](https://github.com/PaddlePaddle/Paddle-Mobile/releases) -->
Paddle Lite is an updated version of Paddle-Mobile, an open-open source deep learning framework designed to make it easy to perform inference on mobile, embeded, and IoT devices. It is compatible with PaddlePaddle and pre-trained models from other sources.
For tutorials, please see [PaddleLite Wiki](https://github.com/PaddlePaddle/Paddle-Lite/wiki).
For tutorials, please see [PaddleLite Document](https://paddlepaddle.github.io/Paddle-Lite/).
## Key Features
......@@ -30,7 +30,7 @@ It also supports INT8 quantizations with [PaddleSlim model compression tools](ht
On Huawei NPU and FPGA, the performance is also boosted.
The latest benchmark is located at [benchmark](https://github.com/PaddlePaddle/Paddle-Lite/wiki/benchmark)
The latest benchmark is located at [benchmark](https://paddlepaddle.github.io/Paddle-Lite/develop/benchmark/)
### High Compatibility
......
# Paddle Lite
<!--[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle-Lite.svg?branch=develop&longCache=true&style=flat-square)](https://travis-ci.org/PaddlePaddle/Paddle-Lite)-->
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://github.com/PaddlePaddle/Paddle-Lite/wiki)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://paddlepaddle.github.io/Paddle-Lite/)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
<!-- [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle-Mobile.svg)](https://github.com/PaddlePaddle/Paddle-Mobile/releases) -->
Paddle Lite为Paddle-Mobile的升级版,定位支持包括手机移动端在内更多场景的轻量化高效预测,支持更广泛的硬件和平台,是一个高性能、轻量级的深度学习预测引擎。在保持和PaddlePaddle无缝对接外,也兼容支持其他训练框架产出的模型。
完整使用文档位于 [PaddleLite Wiki](https://github.com/PaddlePaddle/Paddle-Lite/wiki)
完整使用文档位于 [PaddleLite 文档](https://paddlepaddle.github.io/Paddle-Lite/)
## 特性
......@@ -21,7 +21,7 @@ Paddle Lite为Paddle-Mobile的升级版,定位支持包括手机移动端在
支持INT8量化计算,结合 [PaddleSlim 模型压缩工具](https://github.com/PaddlePaddle/models/tree/v1.5/PaddleSlim) 中 INT8量化训练功能,可以提供高精度高性能的预测能力。
在Huawei NPU, FPGA上也具有有很好的性能表现。
最新 Benchmark 位于 [benchmark](https://github.com/PaddlePaddle/Paddle-Lite/wiki/benchmark)
最新 Benchmark 位于 [benchmark](https://paddlepaddle.github.io/Paddle-Lite/develop/benchmark/)
### 通用性
硬件方面,Paddle Lite 的架构设计为多硬件兼容支持做了良好设计。除了支持ARM CPU、Mali GPU、Adreno GPU,还特别支持了华为 NPU,以及 FPGA 等边缘设备广泛使用的硬件。即将支持支持包括寒武纪、比特大陆等AI芯片,未来会增加对更多硬件的支持。
......
......@@ -34,33 +34,6 @@ elseif(SSE3_FOUND)
set(SIMD_FLAG ${SSE3_FLAG})
endif()
if(WIN32)
# windows header option for all targets.
add_definitions(-D_XKEYCHECK_H)
# Use symbols instead of absolute path, reduce the cmake link command length.
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1)
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1)
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1)
SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@")
SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@")
# Specify the program to use when building static libraries
SET(CMAKE_C_CREATE_STATIC_LIBRARY "<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>")
SET(CMAKE_CXX_CREATE_STATIC_LIBRARY "<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>")
# set defination for the dll export
if (NOT MSVC)
message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.")
endif(NOT MSVC)
endif(WIN32)
if(WITH_PSLIB)
add_definitions(-DPADDLE_WITH_PSLIB)
endif()
if(LITE_WITH_CUDA)
add_definitions(-DLITE_WITH_CUDA)
add_definitions(-DEIGEN_USE_GPU)
......@@ -154,6 +127,10 @@ if (LITE_WITH_NPU)
add_definitions("-DLITE_WITH_NPU")
endif()
if (LITE_WITH_XPU)
add_definitions("-DLITE_WITH_XPU")
endif()
if (LITE_WITH_OPENCL)
add_definitions("-DLITE_WITH_OPENCL")
endif()
......@@ -180,3 +157,8 @@ endif()
if (LITE_ON_TINY_PUBLISH)
add_definitions("-DLITE_ON_TINY_PUBLISH")
endif()
if (LITE_ON_MODEL_OPTIMIZE_TOOL)
add_definitions("-DLITE_ON_MODEL_OPTIMIZE_TOOL")
endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
......@@ -18,6 +18,7 @@ endif()
set(ANDROID TRUE)
add_definitions(-DLITE_WITH_LINUX)
add_definitions(-DLITE_WITH_ANDROID)
if(NOT DEFINED ANDROID_NDK)
set(ANDROID_NDK $ENV{NDK_ROOT})
......@@ -32,7 +33,10 @@ if(ARM_TARGET_LANG STREQUAL "gcc")
endif()
if(NOT DEFINED ANDROID_API_LEVEL)
set(ANDROID_API_LEVEL "22")
set(ANDROID_API_LEVEL "23")
if(ARM_TARGET_ARCH_ABI STREQUAL "armv7")
set(ANDROID_API_LEVEL "22")
endif()
endif()
# then check input arm abi
......
......@@ -50,9 +50,6 @@ find_library(NPU_DDK_IR_FILE NAMES hiai_ir
find_library(NPU_DDK_IR_BUILD_FILE NAMES hiai_ir_build
PATHS ${NPU_DDK_ROOT}/${NPU_SUB_LIB_PATH})
find_library(NPU_DDK_PROTO_FILE NAMES protobuf-lite
PATHS ${NPU_DDK_ROOT}/${NPU_SUB_LIB_PATH})
if(NOT NPU_DDK_HIAI_FILE)
message(FATAL_ERROR "Can not find NPU_DDK_HIAI_FILE in ${NPU_DDK_ROOT}")
else()
......@@ -77,14 +74,8 @@ else()
set_property(TARGET npu_ddk_ir_build PROPERTY IMPORTED_LOCATION ${NPU_DDK_IR_BUILD_FILE})
endif()
if(NOT NPU_DDK_PROTO_FILE)
message(FATAL_ERROR "Can not find NPU_DDK_PROTO_FILE in ${NPU_DDK_ROOT}")
else()
message(STATUS "Found NPU_DDK Protobuf Library: ${NPU_DDK_PROTO_FILE}")
add_library(npu_ddk_proto SHARED IMPORTED GLOBAL)
set_property(TARGET npu_ddk_proto PROPERTY IMPORTED_LOCATION ${NPU_DDK_PROTO_FILE})
endif()
set(npu_runtime_libs npu_ddk_hiai CACHE INTERNAL "npu ddk runtime libs")
set(npu_builder_libs npu_ddk_ir npu_ddk_ir_build CACHE INTERNAL "npu ddk builder libs")
set(npu_ddk_libs npu_ddk_hiai npu_ddk_ir npu_ddk_ir_build npu_ddk_proto CACHE INTERNAL "npu ddk libs")
......@@ -26,6 +26,8 @@ if(ANDROID)
endif()
if(ARMLINUX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
if(ARMLINUX_ARCH_ABI STREQUAL "armv8")
set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}")
......@@ -57,7 +59,10 @@ function(check_linker_flag)
endfunction()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if (LITE_ON_TINY_PUBLISH)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -Ofast -Os -fno-exceptions -fomit-frame-pointer -fno-asynchronous-unwind-tables -fno-unwind-tables")
if(NOT LITE_WITH_PYTHON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math -Ofast -Os -fomit-frame-pointer -fno-asynchronous-unwind-tables -fno-unwind-tables")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto -fvisibility=hidden -fvisibility-inlines-hidden -fdata-sections -ffunction-sections")
check_linker_flag(-Wl,--gc-sections)
endif()
......
......@@ -4,9 +4,9 @@ endif()
set(paddle_known_gpu_archs "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs7 "30 35 50 52")
set(paddle_known_gpu_archs8 "30 35 50 52 60 61")
set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75")
set(paddle_known_gpu_archs8 "30 35 50 52 53 60 61 62")
set(paddle_known_gpu_archs9 "30 35 50 52 53 60 61 62 70")
set(paddle_known_gpu_archs10 "30 35 50 52 53 60 61 62 70 72 75")
######################################################################################
# A function for automatic detection of GPUs installed (if autodetection is enabled)
......@@ -174,6 +174,16 @@ if(NOT WITH_DSO)
endif(WIN32)
endif(NOT WITH_DSO)
get_filename_component(CUDA_LIB_PATH ${CUDA_curand_LIBRARY} DIRECTORY)
function(import_static_library alias path)
add_library(${alias} STATIC IMPORTED GLOBAL)
set_property(TARGET ${alias} PROPERTY IMPORTED_LOCATION ${path})
endfunction()
import_static_library(cudart_static ${CUDA_LIB_PATH}/libcudart_static.a)
import_static_library(cublas_static ${CUDA_LIB_PATH}/libcublas_static.a)
import_static_library(curand_static ${CUDA_LIB_PATH}/libcurand_static.a)
import_static_library(culibos_static ${CUDA_LIB_PATH}/libculibos.a)
# setting nvcc arch flags
select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA})
......
......@@ -34,6 +34,14 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
${CUDA_TOOLKIT_ROOT_DIR}
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
)
if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0))
find_library(CUBLAS_LIBRARY NAMES libcublas.so PATHS ${CUDNN_CHECK_LIBRARY_DIRS} NO_DEFAULT_PATH)
set(CUBLAS_LIBRARIES ${CUBLAS_LIBRARY})
else()
set(CUBLAS_LIBRARIES ${CUDA_CUBLAS_LIBRARIES})
endif()
set(CUDNN_LIB_NAME "libcudnn.so")
if(WIN32)
......@@ -45,11 +53,10 @@ if(APPLE)
set(CUDNN_LIB_NAME "libcudnn.dylib" "libcudnn.so")
endif(APPLE)
find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME} # libcudnn_static.a
find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME}
PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist}
NO_DEFAULT_PATH
DOC "Path to cuDNN library.")
DOC "Path to cuDNN dynamic library.")
if(CUDNN_INCLUDE_DIR AND CUDNN_LIBRARY)
set(CUDNN_FOUND ON)
......@@ -61,6 +68,9 @@ if(CUDNN_FOUND)
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY)
add_library(cudnn_static STATIC IMPORTED GLOBAL)
set_property(TARGET cudnn_static PROPERTY IMPORTED_LOCATION
"${CUDNN_LIB_PATH}/libcudnn_static.a")
string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)"
CUDNN_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
......
......@@ -109,8 +109,7 @@ macro(PROMPT_PROTOBUF_LIB)
ADD_LIBRARY(protobuf ${protobuf_LIBTYPE} IMPORTED GLOBAL)
SET_PROPERTY(TARGET protobuf PROPERTY IMPORTED_LOCATION ${PROTOBUF_LIBRARY})
ADD_LIBRARY(protobuf_lite ${protobuf_LIBTYPE} IMPORTED GLOBAL)
ADD_LIBRARY(protobuf_lite ${protobuf_LIBTYPE} IMPORTED GLOBAL)
SET_PROPERTY(TARGET protobuf_lite PROPERTY IMPORTED_LOCATION ${PROTOBUF_LITE_LIBRARY})
ADD_LIBRARY(libprotoc ${protobuf_LIBTYPE} IMPORTED GLOBAL)
......@@ -185,6 +184,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
SET(SOURCE_DIR "${CMAKE_SOURCE_DIR}/third-party/protobuf-host")
IF(BUILD_FOR_HOST)
# set for server compile.
if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
set(HOST_C_COMPILER "${CMAKE_C_COMPILER}")
set(HOST_CXX_COMPILER "${CMAKE_CXX_COMPILER}")
endif()
SET(OPTIONAL_ARGS
"-DCMAKE_C_COMPILER=${HOST_C_COMPILER}"
"-DCMAKE_CXX_COMPILER=${HOST_CXX_COMPILER}"
......@@ -247,6 +252,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
GIT_REPOSITORY ""
GIT_TAG ${PROTOBUF_TAG}
SOURCE_DIR ${SOURCE_DIR}
BUILD_ALWAYS 1
CONFIGURE_COMMAND ${CMAKE_COMMAND} ${SOURCE_DIR}/cmake
${OPTIONAL_ARGS}
-Dprotobuf_BUILD_TESTS=OFF
......@@ -276,7 +282,11 @@ IF(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
ENDIF()
IF(NOT PROTOBUF_FOUND)
build_protobuf(extern_protobuf FALSE)
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
build_protobuf(extern_protobuf FALSE)
else()
build_protobuf(extern_protobuf TRUE)
endif()
SET(PROTOBUF_INCLUDE_DIR ${extern_protobuf_INCLUDE_DIR}
CACHE PATH "protobuf include directory." FORCE)
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT LITE_WITH_PYTHON)
return()
endif()
include(ExternalProject)
set(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind)
include_directories(${PYBIND_SOURCE_DIR}/src/extern_pybind/include)
ExternalProject_Add(
extern_pybind
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/pybind/pybind11.git"
GIT_TAG "v2.2.4"
PREFIX ${PYBIND_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c)
file(WRITE ${dummyfile} "const char * dummy_pybind = \"${dummyfile}\";")
add_library(pybind STATIC ${dummyfile})
else()
add_library(pybind INTERFACE)
endif()
add_dependencies(pybind extern_pybind)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
IF(NOT LITE_WITH_PYTHON)
return()
ENDIF()
INCLUDE(python_module)
FIND_PACKAGE(PythonInterp ${PY_VERSION} REQUIRED)
FIND_PACKAGE(PythonLibs ${PY_VERSION} REQUIRED)
if(WIN32)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"from distutils import sysconfig as s;import sys;import struct;
print(sys.prefix);
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE _PYTHON_VALUES
ERROR_VARIABLE _PYTHON_ERROR_VALUE)
if(NOT _PYTHON_SUCCESS MATCHES 0)
set(PYTHONLIBS_FOUND FALSE)
return()
endif()
# Convert the process output into a list
string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
list(GET _PYTHON_VALUES 0 PYTHON_PREFIX)
list(GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX)
# Make sure all directory separators are '/'
string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
set(PYTHON_LIBRARY
"${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
# when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
# original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
if(NOT EXISTS "${PYTHON_LIBRARY}")
get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY)
set(PYTHON_LIBRARY
"${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
endif()
# raise an error if the python libs are still not found.
if(NOT EXISTS "${PYTHON_LIBRARY}")
message(FATAL_ERROR "Python libraries not found")
endif()
SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
endif(WIN32)
# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE.
ADD_LIBRARY(python SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES})
SET(py_env "")
IF(PYTHONINTERP_FOUND)
find_python_module(pip REQUIRED)
find_python_module(numpy REQUIRED)
#find_python_module(wheel REQUIRED)
#find_python_module(google.protobuf REQUIRED)
FIND_PACKAGE(NumPy REQUIRED)
#IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
# MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, "
# "please use pip to upgrade protobuf. pip install -U protobuf")
#ENDIF()
ENDIF(PYTHONINTERP_FOUND)
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
......@@ -146,8 +146,11 @@ set(GPU_COMMON_FLAGS
-Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header.
-Wno-error=array-bounds # Warnings in Eigen::array
-gencode arch=compute_62,code=sm_62
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
if(NOT LITE_WITH_CUDA)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
endif()
endif(NOT WIN32)
if (APPLE)
......
......@@ -105,8 +105,8 @@ set_property(GLOBAL PROPERTY FLUID_MODULES "")
function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos)
if(pos GREATER 1)
string(FIND "${__target_path}" "lite" pos)
if((pos GREATER 0) OR (pos EQUAL 0))
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
set(fluid_modules ${fluid_modules} ${TARGET_NAME})
set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}")
......@@ -303,10 +303,12 @@ function(cc_library TARGET_NAME)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND cc_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
if(${source_file} MATCHES "framework.pb.cc")
if(${source_file} MATCHES "__generated_code__.cc")
list(APPEND full_path_src ${source_file})
else()
list(APPEND full_path_src ${CMAKE_CURRENT_SOURCE_DIR}/${source_file})
if(NOT ${source_file} MATCHES "framework.pb.cc" AND NOT ${source_file} MATCHES "__generated_code__.cc")
list(APPEND full_path_src ${CMAKE_CURRENT_SOURCE_DIR}/${source_file})
endif()
endif()
endforeach()
set(__lite_cc_files ${__lite_cc_files} ${full_path_src} CACHE INTERNAL "")
......@@ -371,6 +373,7 @@ function(cc_binary TARGET_NAME)
endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${os_dependency_modules})
find_fluid_modules(${TARGET_NAME})
endfunction(cc_binary)
function(cc_test TARGET_NAME)
......@@ -503,17 +506,14 @@ function(nv_test TARGET_NAME)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog ${os_dependency_modules})
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} lite_gtest_main gtest
gflags glog ${os_dependency_modules} ${CUDNN_LIBRARY} ${CUBLAS_LIBRARIES} )
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} lite_gtest_main gtest gflags glog)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME})
if (nv_test_SERIAL)
set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
endif()
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_limit_of_tmp_allocation=4294967296) # 4G
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
endif()
endfunction(nv_test)
......
......@@ -22,7 +22,7 @@ endfunction()
function (lite_deps TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS CL_DEPS FPGA_DEPS NPU_DEPS ARGS)
set(multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS CL_DEPS FPGA_DEPS NPU_DEPS XPU_DEPS ARGS)
cmake_parse_arguments(lite_deps "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(deps ${lite_deps_DEPS})
......@@ -83,6 +83,12 @@ function (lite_deps TARGET)
endforeach(var)
endif()
if (LITE_WITH_XPU)
foreach(var ${lite_deps_XPU_DEPS})
set(deps ${deps} ${var})
endforeach(var)
endif()
set(${TARGET} ${deps} PARENT_SCOPE)
endfunction()
......@@ -107,7 +113,7 @@ file(WRITE ${offline_lib_registry_file} "") # clean
function(lite_cc_library TARGET)
set(options SHARED shared STATIC static MODULE module)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS NPU_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS LIGHT_DEPS
set(multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS CL_DEPS NPU_DEPS XPU_DEPS ARM_DEPS FPGA_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS EXCLUDE_COMPILE_DEPS ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
......@@ -118,6 +124,7 @@ function(lite_cc_library TARGET)
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
NPU_DEPS ${args_NPU_DEPS}
XPU_DEPS ${args_XPU_DEPS}
ARM_DEPS ${args_ARM_DEPS}
FPGA_DEPS ${args_FPGA_DEPS}
PROFILE_DEPS ${args_PROFILE_DEPS}
......@@ -126,12 +133,12 @@ function(lite_cc_library TARGET)
)
if (args_SHARED OR ARGS_shared)
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS} SHARED)
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} SHARED)
elseif (args_MODULE OR ARGS_module)
add_library(${TARGET} MODULE ${args_SRCS})
add_dependencies(${TARGET} ${deps} ${args_DEPS})
else()
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
cc_library(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
endif()
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
......@@ -163,8 +170,17 @@ function(lite_cc_binary TARGET)
LIGHT_DEPS ${args_LIGHT_DEPS}
HVY_DEPS ${args_HVY_DEPS}
)
cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ${args_DEPS})
cc_binary(${TARGET} SRCS ${args_SRCS} DEPS ${deps})
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
if (NOT APPLE)
# strip binary target to reduce size
if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
add_custom_command(TARGET ${TARGET} POST_BUILD
COMMAND "${CMAKE_STRIP}" -s
"${TARGET}"
COMMENT "Strip debug symbols done on final executable file.")
endif()
endif()
# collect targets need to compile for lite
if (NOT args_EXCLUDE_COMPILE_DEPS)
add_dependencies(lite_compile_deps ${TARGET})
......@@ -207,6 +223,13 @@ function(lite_cc_test TARGET)
HVY_DEPS ${args_HVY_DEPS}
)
_lite_cc_test(${TARGET} SRCS ${args_SRCS} DEPS ${deps} ARGS ${args_ARGS})
# strip binary target to reduce size
if(NOT "${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
add_custom_command(TARGET ${TARGET} POST_BUILD
COMMAND "${CMAKE_STRIP}" -s
"${TARGET}"
COMMENT "Strip debug symbols done on final executable file.")
endif()
target_compile_options(${TARGET} BEFORE PRIVATE -Wno-ignored-qualifiers)
file(APPEND ${offline_test_registry_file} "${TARGET}\n")
......@@ -220,11 +243,16 @@ set(arm_kernels CACHE INTERNAL "arm kernels")
set(x86_kernels CACHE INTERNAL "x86 kernels")
set(fpga_kernels CACHE INTERNAL "fpga kernels")
set(npu_kernels CACHE INTERNAL "npu kernels")
set(xpu_kernels CACHE INTERNAL "xpu kernels")
set(opencl_kernels CACHE INTERNAL "opencl kernels")
set(host_kernels CACHE INTERNAL "host kernels")
set(kernels_src_list "${CMAKE_BINARY_DIR}/kernels_src_list.txt")
file(WRITE ${kernels_src_list} "") # clean
if(LITE_BUILD_TAILOR)
set(tailored_kernels_list_path "${LITE_OPTMODEL_DIR}/.tailored_kernels_source_list")
file(STRINGS ${tailored_kernels_list_path} tailored_kernels_list)
endif()
# add a kernel for some specific device
# device: one of (Host, ARM, X86, NPU, FPGA, OPENCL, CUDA)
# level: one of (basic, extra)
......@@ -236,10 +264,34 @@ function(add_kernel TARGET device level)
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(LITE_BUILD_TAILOR)
foreach(src ${args_SRCS})
list (FIND tailored_kernels_list ${src} _index)
if (${_index} EQUAL -1)
return()
endif()
endforeach()
endif()
if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
return()
endif()
if (LITE_ON_MODEL_OPTIMIZE_TOOL)
# the source list will collect for model_optimize_tool to fake kernel generation.
foreach(src ${args_SRCS})
file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
return()
endif()
# when compiling the model_optimize_tool, a source file with all the fake kernel definitions will be generated,
# no need to continue the compilation of the true kernel source.
if (LITE_ON_MODEL_OPTIMIZE_TOOL)
return()
endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
if ("${device}" STREQUAL "Host")
set(host_kernels "${host_kernels};${TARGET}" CACHE INTERNAL "")
endif()
......@@ -261,6 +313,12 @@ function(add_kernel TARGET device level)
endif()
set(npu_kernels "${npu_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "XPU")
if (NOT LITE_WITH_XPU)
return()
endif()
set(xpu_kernels "${xpu_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "FPGA")
if (NOT LITE_WITH_FPGA)
return()
......@@ -274,6 +332,19 @@ function(add_kernel TARGET device level)
set(opencl_kernels "${opencl_kernels};${TARGET}" CACHE INTERNAL "")
endif()
if ("${device}" STREQUAL "CUDA")
if (NOT LITE_WITH_CUDA)
return()
endif()
set(cuda_kernels "${cuda_kernels};${TARGET}" CACHE INTERNAL "")
foreach(src ${args_SRCS})
file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
nv_library(${TARGET} SRCS ${args_SRCS} DEPS ${args_DEPS})
return()
endif()
# the source list will collect for paddle_use_kernel.h code generation.
foreach(src ${args_SRCS})
file(APPEND ${kernels_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
......@@ -281,6 +352,7 @@ function(add_kernel TARGET device level)
lite_cc_library(${TARGET} SRCS ${args_SRCS}
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
XPU_DEPS ${args_XPU_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
......@@ -294,6 +366,10 @@ endfunction()
set(ops CACHE INTERNAL "ops")
set(ops_src_list "${CMAKE_BINARY_DIR}/ops_src_list.txt")
file(WRITE ${ops_src_list} "") # clean
if(LITE_BUILD_TAILOR)
set(tailored_ops_list_path "${LITE_OPTMODEL_DIR}/.tailored_ops_source_list")
file(STRINGS ${tailored_ops_list_path} tailored_ops_list)
endif()
# add an operator
# level: one of (basic, extra)
function(add_operator TARGET level)
......@@ -304,19 +380,28 @@ function(add_operator TARGET level)
ARGS)
cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if ("${level}" STREQUAL "extra" AND (NOT LITE_BUILD_EXTRA))
return()
endif()
set(ops "${ops};${TARGET}" CACHE INTERNAL "source")
foreach(src ${args_SRCS})
if(LITE_BUILD_TAILOR)
list(FIND tailored_ops_list ${src} _index)
if (${_index} EQUAL -1)
return()
endif()
endif()
file(APPEND ${ops_src_list} "${CMAKE_CURRENT_SOURCE_DIR}/${src}\n")
endforeach()
set(ops "${ops};${TARGET}" CACHE INTERNAL "source")
lite_cc_library(${TARGET} SRCS ${args_SRCS}
DEPS ${args_DEPS}
X86_DEPS ${args_X86_DEPS}
XPU_DEPS ${args_XPU_DEPS}
CUDA_DEPS ${args_CUDA_DEPS}
CL_DEPS ${args_CL_DEPS}
ARM_DEPS ${args_ARM_DEPS}
......@@ -331,6 +416,8 @@ endfunction()
# Bundle several static libraries into one.
function(bundle_static_library tgt_name bundled_tgt_name fake_target)
list(APPEND static_libs ${tgt_name})
# for x86
add_dependencies(lite_compile_deps ${fake_target})
function(_recursively_collect_dependencies input_target)
set(_input_link_libraries LINK_LIBRARIES)
......
# Find if a Python module is installed
# Found at http://www.cmake.org/pipermail/cmake/2011-January/041666.html
# To use do: find_python_module(PyQt4 REQUIRED)
function(find_python_module module)
string(TOUPPER ${module} module_upper)
if(NOT PY_${module_upper})
if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED")
set(${module}_FIND_REQUIRED TRUE)
else()
set(${module}_FIND_REQUIRED FALSE)
endif()
# A module's location is usually a directory, but for binary modules
# it's a .so file.
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import re, ${module}; print(re.compile('/__init__.py.*').sub('',${module}.__file__))"
RESULT_VARIABLE _${module}_status
OUTPUT_VARIABLE _${module}_location
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _${module}_status)
set(PY_${module_upper} ${_${module}_location} CACHE STRING
"Location of Python module ${module}")
endif(NOT _${module}_status)
endif(NOT PY_${module_upper})
find_package_handle_standard_args(PY_${module} DEFAULT_MSG PY_${module_upper})
if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED)
message(FATAL_ERROR "python module ${module} is not found")
endif()
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import sys, ${module}; sys.stdout.write(${module}.__version__)"
OUTPUT_VARIABLE _${module}_version
RESULT_VARIABLE _${module}_status
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _${module}_status)
set(PY_${module_upper}_VERSION ${_${module}_version} CACHE STRING
"Version of Python module ${module}")
endif(NOT _${module}_status)
set(PY_${module_upper}_FOUND ${PY_${module_upper}_FOUND} PARENT_SCOPE)
set(PY_${module_upper}_VERSION ${PY_${module_upper}_VERSION} PARENT_SCOPE)
endfunction(find_python_module)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT LITE_WITH_XPU)
return()
endif()
if(NOT DEFINED XPU_SDK_ROOT)
set(XPU_SDK_ROOT $ENV{XPU_SDK_ROOT})
if(NOT XPU_SDK_ROOT)
message(FATAL_ERROR "Must set XPU_SDK_ROOT or env XPU_SDK_ROOT when LITE_WITH_XPU=ON")
endif()
endif()
message(STATUS "XPU_SDK_ROOT: ${XPU_SDK_ROOT}")
find_path(XPU_SDK_INC NAMES xtcl.h
PATHS ${XPU_SDK_ROOT}/XTCL/include/xtcl NO_DEFAULT_PATH)
if(NOT XPU_SDK_INC)
message(FATAL_ERROR "Can not find xtcl.h in ${XPU_SDK_ROOT}/include")
endif()
include_directories("${XPU_SDK_ROOT}/XTCL/include")
include_directories("${XPU_SDK_ROOT}/XTDK/include")
find_library(XPU_SDK_XTCL_FILE NAMES xtcl
PATHS ${XPU_SDK_ROOT}/XTCL/so)
if(NOT XPU_SDK_XTCL_FILE)
message(FATAL_ERROR "Can not find XPU XTCL Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU XTCL Library: ${XPU_SDK_XTCL_FILE}")
add_library(xpu_sdk_xtcl SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_xtcl PROPERTY IMPORTED_LOCATION ${XPU_SDK_XTCL_FILE})
endif()
find_library(XPU_SDK_TVM_FILE NAMES tvm
PATHS ${XPU_SDK_ROOT}/XTCL/so)
if(NOT XPU_SDK_TVM_FILE)
message(FATAL_ERROR "Can not find XPU TVM Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU TVM Library: ${XPU_SDK_TVM_FILE}")
add_library(xpu_sdk_tvm SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_tvm PROPERTY IMPORTED_LOCATION ${XPU_SDK_TVM_FILE})
endif()
find_library(XPU_SDK_XPU_API_FILE NAMES xpuapi
PATHS ${XPU_SDK_ROOT}/XTDK/shlib)
if(NOT XPU_SDK_XPU_API_FILE)
message(FATAL_ERROR "Can not find XPU API Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU API Library: ${XPU_SDK_XPU_API_FILE}")
add_library(xpu_sdk_xpu_api SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_xpu_api PROPERTY IMPORTED_LOCATION ${XPU_SDK_XPU_API_FILE})
endif()
find_library(XPU_SDK_XPU_RT_FILE NAMES xpurt
PATHS ${XPU_SDK_ROOT}/XTDK/shlib)
if(NOT XPU_SDK_XPU_RT_FILE)
message(FATAL_ERROR "Can not find XPU RT Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU RT Library: ${XPU_SDK_XPU_RT_FILE}")
add_library(xpu_sdk_xpu_rt SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_xpu_rt PROPERTY IMPORTED_LOCATION ${XPU_SDK_XPU_RT_FILE})
endif()
find_library(XPU_SDK_XPU_JITC_FILE NAMES xpujitc
PATHS ${XPU_SDK_ROOT}/XTDK/shlib)
if(NOT XPU_SDK_XPU_JITC_FILE)
message(FATAL_ERROR "Can not find XPU JITC Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU JITC Library: ${XPU_SDK_XPU_JITC_FILE}")
add_library(xpu_sdk_xpu_jitc SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_xpu_jitc PROPERTY IMPORTED_LOCATION ${XPU_SDK_XPU_JITC_FILE})
endif()
find_library(XPU_SDK_LLVM_FILE NAMES LLVM-8
PATHS ${XPU_SDK_ROOT}/XTDK/shlib)
if(NOT XPU_SDK_LLVM_FILE)
message(FATAL_ERROR "Can not find LLVM Library in ${XPU_SDK_ROOT}")
else()
message(STATUS "Found XPU LLVM Library: ${XPU_SDK_LLVM_FILE}")
add_library(xpu_sdk_llvm SHARED IMPORTED GLOBAL)
set_property(TARGET xpu_sdk_llvm PROPERTY IMPORTED_LOCATION ${XPU_SDK_LLVM_FILE})
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDMLC_USE_GLOG=1")
set(xpu_runtime_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_xpu_jitc xpu_sdk_llvm CACHE INTERNAL "xpu runtime libs")
set(xpu_builder_libs xpu_sdk_xtcl xpu_sdk_tvm xpu_sdk_xpu_api xpu_sdk_xpu_rt xpu_sdk_xpu_jitc xpu_sdk_llvm CACHE INTERNAL "xpu builder libs")
......@@ -6,12 +6,14 @@ message(STATUS "LITE_WITH_CUDA:\t${LITE_WITH_CUDA}")
message(STATUS "LITE_WITH_X86:\t${LITE_WITH_X86}")
message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}")
message(STATUS "LITE_WITH_NPU:\t${LITE_WITH_NPU}")
message(STATUS "LITE_WITH_XPU:\t${LITE_WITH_XPU}")
message(STATUS "LITE_WITH_FPGA:\t${LITE_WITH_FPGA}")
message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}")
set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install")
set(LITE_ON_MOBILE ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK})
add_subdirectory(backends)
add_subdirectory(utils)
add_subdirectory(operators)
add_subdirectory(kernels)
......@@ -19,7 +21,6 @@ add_subdirectory(core)
add_subdirectory(model_parser)
add_subdirectory(api)
add_subdirectory(fluid)
add_subdirectory(backends)
if (NOT LITE_ON_TINY_PUBLISH)
add_subdirectory(tests)
......@@ -44,9 +45,13 @@ if (WITH_TESTING)
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v2_relu.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "resnet50.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "inception_v4_simple.tar.gz")
lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "step_rnn.tar.gz")
endif()
endif()
# ----------------------------- PUBLISH -----------------------------
# The final target for publish lite lib
add_custom_target(publish_inference)
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
# for publish
set(INFER_LITE_PUBLISH_ROOT "${CMAKE_BINARY_DIR}/inference_lite_lib.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}")
......@@ -56,10 +61,62 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
if (LITE_WITH_NPU)
set(INFER_LITE_PUBLISH_ROOT "${INFER_LITE_PUBLISH_ROOT}.npu")
endif(LITE_WITH_NPU)
message(STATUS "publish inference lib to ${INFER_LITE_PUBLISH_ROOT}")
if (LITE_WITH_FPGA)
set(INFER_LITE_PUBLISH_ROOT "${INFER_LITE_PUBLISH_ROOT}.fpga")
endif(LITE_WITH_FPGA)
else()
set(INFER_LITE_PUBLISH_ROOT "${CMAKE_BINARY_DIR}/inference_lite_lib")
endif()
message(STATUS "publish inference lib to ${INFER_LITE_PUBLISH_ROOT}")
# add python lib
if (LITE_WITH_PYTHON)
add_custom_target(publish_inference_python_lib ${TARGET}
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/python/lib"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/python/pybind/liblite_pybind.so" "${INFER_LITE_PUBLISH_ROOT}/python/lib/lite_core.so")
add_custom_target(publish_inference_python_light_demo ${TARGET}
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/demo/python"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/python/mobilenetv1_light_api.py" "${INFER_LITE_PUBLISH_ROOT}/demo/python/")
if (NOT LITE_ON_TINY_PUBLISH)
add_custom_target(publish_inference_python_full_demo ${TARGET}
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/demo/python"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/python/mobilenetv1_full_api.py" "${INFER_LITE_PUBLISH_ROOT}/demo/python/")
add_dependencies(publish_inference publish_inference_python_full_demo)
endif()
add_dependencies(publish_inference_python_lib lite_pybind)
add_dependencies(publish_inference publish_inference_python_lib)
add_dependencies(publish_inference publish_inference_python_light_demo)
endif()
if (LITE_WITH_X86)
add_custom_target(publish_inference_x86_cxx_lib ${TARGET}
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/bin"
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_full_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_light_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/*.so" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/test_model_bin" "${INFER_LITE_PUBLISH_ROOT}/bin"
)
add_dependencies(publish_inference_x86_cxx_lib bundle_full_api)
add_dependencies(publish_inference_x86_cxx_lib bundle_light_api)
add_dependencies(publish_inference_x86_cxx_lib test_model_bin)
add_dependencies(publish_inference_x86_cxx_lib paddle_full_api_shared)
add_dependencies(publish_inference_x86_cxx_lib paddle_light_api_shared)
add_dependencies(publish_inference publish_inference_x86_cxx_lib)
add_custom_target(publish_inference_x86_cxx_demos ${TARGET}
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/third_party"
COMMAND cp -r "${CMAKE_BINARY_DIR}/third_party/install/*" "${INFER_LITE_PUBLISH_ROOT}/third_party"
COMMAND cp -r "${CMAKE_BINARY_DIR}/third_party/eigen3" "${INFER_LITE_PUBLISH_ROOT}/third_party"
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
)
add_dependencies(publish_inference_x86_cxx_lib publish_inference_x86_cxx_demos)
add_dependencies(publish_inference_x86_cxx_demos paddle_full_api_shared eigen3)
endif()
# The final target for publish lite lib
add_custom_target(publish_inference)
if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
if (NOT LITE_ON_TINY_PUBLISH)
# add cxx lib
add_custom_target(publish_inference_cxx_lib ${TARGET}
......@@ -69,22 +126,28 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_full_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_BINARY_DIR}/libpaddle_api_light_bundled.a" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/model_optimize_tool" "${INFER_LITE_PUBLISH_ROOT}/bin"
#COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/model_optimize_tool" "${INFER_LITE_PUBLISH_ROOT}/bin"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/gen_code/paddle_code_generator" "${INFER_LITE_PUBLISH_ROOT}/bin"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/test_model_bin" "${INFER_LITE_PUBLISH_ROOT}/bin"
)
if(NOT IOS)
add_dependencies(publish_inference_cxx_lib model_optimize_tool)
#add_dependencies(publish_inference_cxx_lib model_optimize_tool)
add_dependencies(publish_inference_cxx_lib paddle_code_generator)
add_dependencies(publish_inference_cxx_lib bundle_full_api)
add_dependencies(publish_inference_cxx_lib bundle_light_api)
add_dependencies(publish_inference_cxx_lib test_model_bin)
if (ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux")
add_dependencies(publish_inference_cxx_lib paddle_full_api_shared)
add_dependencies(publish_inference paddle_light_api_shared)
add_custom_command(TARGET publish_inference_cxx_lib
COMMAND cp ${CMAKE_BINARY_DIR}/lite/api/*.so ${INFER_LITE_PUBLISH_ROOT}/cxx/lib)
endif()
add_dependencies(publish_inference publish_inference_cxx_lib)
add_custom_command(TARGET publish_inference_cxx_lib POST_BUILD
COMMAND ${CMAKE_STRIP} "--strip-debug" ${INFER_LITE_PUBLISH_ROOT}/cxx/lib/*.a)
endif()
else()
if (IOS OR (ARM_TARGET_OS STREQUAL "armlinux"))
if (IOS)
add_custom_target(tiny_publish_lib ${TARGET}
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/lib"
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/include"
......@@ -93,6 +156,18 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
)
add_dependencies(tiny_publish_lib bundle_light_api)
add_dependencies(publish_inference tiny_publish_lib)
else()
if ((ARM_TARGET_OS STREQUAL "android") OR (ARM_TARGET_OS STREQUAL "armlinux"))
add_custom_target(tiny_publish_cxx_lib ${TARGET}
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx"
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/api/paddle_*.h" "${INFER_LITE_PUBLISH_ROOT}/cxx/include"
COMMAND cp "${CMAKE_BINARY_DIR}/lite/api/libpaddle_light_api_shared.so" "${INFER_LITE_PUBLISH_ROOT}/cxx/lib"
)
add_dependencies(tiny_publish_cxx_lib paddle_light_api_shared)
add_dependencies(publish_inference tiny_publish_cxx_lib)
endif()
endif()
endif()
......@@ -130,6 +205,16 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
)
add_dependencies(publish_inference_android_cxx_demos logging gflags)
add_dependencies(publish_inference_cxx_lib publish_inference_android_cxx_demos)
else()
# copy
add_custom_target(publish_inference_android_cxx_demos ${TARGET}
COMMAND mkdir -p "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/Makefile.def" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/README.md" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_light" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_light/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_light/Makefile"
)
add_dependencies(tiny_publish_cxx_lib publish_inference_android_cxx_demos)
endif()
if (LITE_WITH_JAVA)
......
......@@ -4,12 +4,53 @@ else()
lite_cc_library(place SRCS paddle_place.cc DEPS glog)
endif(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
if (LITE_ON_TINY_PUBLISH)
set(CMAKE_CXX_FLAGS_RELEASE "-Os -DNDEBUG")
set(CMAKE_C_FLAGS_RELEASE "-Os -DNDEBUG")
endif()
set(light_lib_DEPS light_api paddle_api paddle_api_light optimizer)
if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_X86 OR ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux"))
#full api dynamic library
add_library(paddle_full_api_shared SHARED "")
target_sources(paddle_full_api_shared PUBLIC ${__lite_cc_files} paddle_api.cc light_api.cc cxx_api.cc cxx_api_impl.cc light_api_impl.cc)
add_dependencies(paddle_full_api_shared op_list_h kernel_list_h framework_proto)
target_link_libraries(paddle_full_api_shared framework_proto)
if(LITE_WITH_X86)
add_dependencies(paddle_full_api_shared xxhash)
target_link_libraries(paddle_full_api_shared xxhash)
endif()
#light api dynamic library
lite_cc_library(paddle_light_api_shared MODULE
SRCS light_api_shared.cc
DEPS ${light_lib_DEPS}
ARM_DEPS ${arm_kernels} NPU_DEPS ${npu_kernels})
target_link_libraries(paddle_light_api_shared ${light_lib_DEPS} ${arm_kernels} ${npu_kernels})
if (LITE_WITH_NPU)
# Strips the symbols of our protobuf functions to fix the conflicts during
# loading HIAI builder libs (libhiai_ir.so and libhiai_ir_build.so)
set(LINK_FLAGS "-Wl,--version-script ${PADDLE_SOURCE_DIR}/lite/core/lite.map")
set_target_properties(paddle_light_api_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
endif()
else()
if ((ARM_TARGET_OS STREQUAL "android") OR (ARM_TARGET_OS STREQUAL "armlinux"))
add_library(paddle_light_api_shared SHARED "")
target_sources(paddle_light_api_shared PUBLIC ${__lite_cc_files} paddle_api.cc light_api.cc light_api_impl.cc)
add_dependencies(paddle_light_api_shared op_list_h kernel_list_h)
if (LITE_WITH_NPU)
# Need to add HIAI runtime libs (libhiai.so) dependency
target_link_libraries(paddle_light_api_shared ${npu_runtime_libs})
endif()
endif()
endif()
if (WITH_TESTING)
lite_cc_library(lite_api_test_helper SRCS lite_api_test_helper.cc
DEPS scope optimizer target_wrapper_host model_parser program
${ops} ${host_kernels}
CUDA_DEPS ${cuda_kernels}
X86_DEPS ${x86_kernels})
X86_DEPS ${x86_kernels}
XPU_DEPS ${xpu_kernels})
endif()
if(LITE_WITH_FPGA)
set(light_api_deps ${light_api_deps} ${fpga_deps})
......@@ -21,6 +62,7 @@ message(STATUS "get X86 kernels ${x86_kernels}")
message(STATUS "get Host kernels ${host_kernels}")
message(STATUS "get ARM kernels ${arm_kernels}")
message(STATUS "get NPU kernels ${npu_kernels}")
message(STATUS "get XPU kernels ${xpu_kernels}")
message(STATUS "get FPGA kernels ${fpga_kernels}")
# for full api
......@@ -33,6 +75,7 @@ if (NOT LITE_ON_TINY_PUBLISH)
X86_DEPS ${x86_kernels}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels} ${npu_bridges} npu_pass
XPU_DEPS ${xpu_kernels} ${xpu_bridges} xpu_pass
CL_DEPS ${opencl_kenrels}
FPGA_DEPS ${fpga_kenrels})
endif()
......@@ -42,6 +85,8 @@ set(light_api_deps
scope target_wrapper_host model_parser program)
if(LITE_WITH_CUDA)
set(light_api_deps ${light_api_deps} target_wrapper_cuda)
set(cuda_static_deps cudart_static cublas_static curand_static
cudnn_static culibos_static)
endif()
lite_cc_library(light_api SRCS light_api.cc
DEPS scope target_wrapper_host model_parser
......@@ -49,7 +94,8 @@ lite_cc_library(light_api SRCS light_api.cc
CUDA_DEPS ${cuda_kernels}
X86_DEPS ${x86_kernels}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels} ${npu_bridges} npu_pass
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kenrels}
FPGA_DEPS ${fpga_kenrels})
......@@ -64,6 +110,7 @@ if(WITH_TESTING)
X86_DEPS ${x86_kernels}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
EXCLUDE_COMPILE_DEPS "ON"
......@@ -72,25 +119,35 @@ if(WITH_TESTING)
add_dependencies(test_cxx_api extern_lite_download_lite_naive_model_tar_gz)
if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
lite_cc_test(test_googlenet SRCS test_googlenet_lite.cc
DEPS cxx_api mir_passes lite_api_test_helper
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/googlenet)
add_dependencies(test_googlenet extern_lite_download_GoogleNet_inference_tar_gz)
lite_cc_test(test_mobilenetv1_lite_x86 SRCS test_mobilenetv1_lite_x86.cc
DEPS cxx_api mir_passes lite_api_test_helper
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/mobilenet_v1)
add_dependencies(test_mobilenetv1_lite_x86 extern_lite_download_mobilenet_v1_tar_gz)
lite_cc_test(test_mobilenetv2_lite_x86 SRCS test_mobilenetv2_lite_x86.cc
DEPS cxx_api mir_passes lite_api_test_helper
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/mobilenet_v2_relu)
add_dependencies(test_mobilenetv2_lite_x86 extern_lite_download_mobilenet_v2_relu_tar_gz)
lite_cc_test(test_inceptionv4_lite_x86 SRCS test_inceptionv4_lite_x86.cc
DEPS cxx_api mir_passes lite_api_test_helper
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/inception_v4_simple)
add_dependencies(test_inceptionv4_lite_x86 extern_lite_download_inception_v4_simple_tar_gz)
lite_cc_test(test_resnet50_lite_x86 SRCS test_resnet50_lite_x86.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/resnet50)
add_dependencies(test_resnet50_lite_x86 extern_lite_download_resnet50_tar_gz)
lite_cc_test(test_step_rnn_lite_x86 SRCS test_step_rnn_lite_x86.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${x86_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/step_rnn)
add_dependencies(test_step_rnn_lite_x86 extern_lite_download_step_rnn_tar_gz)
endif()
endif()
......@@ -150,23 +207,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
# FPGA_DEPS ${fpga_kernels})
endif()
# These tests needs CLI arguments, and is not supported in ARM CI.
# TODO(Superjomn) support latter.
lite_cc_test(test_light_api SRCS light_api_test.cc
DEPS light_api program mir_passes
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_cc_test(test_apis SRCS apis_test.cc
DEPS cxx_api light_api ${ops}
CL_DEPS ${opencl_kernels}
X86_DEPS ${x86_kernels}
FPGA_DEPS ${fpga_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_cc_library(paddle_api SRCS paddle_api.cc DEPS op_params tensor)
lite_cc_library(paddle_api SRCS paddle_api.cc DEPS op_params tensor device_info)
#-----------------------------------------------------------------------------------------------------
# The final inference library for both CxxConfig and MobileConfig.
......@@ -184,21 +225,53 @@ if (NOT LITE_ON_TINY_PUBLISH)
FPGA_DEPS ${fpga_kernels})
# The final inference library for just MobileConfig.
bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
cc_library(api_full_static SRCS DEPS paddle_api_full cxx_api paddle_api light_api ${cxx_api_deps} ${ops} ${host_kernels} ${cuda_kernels} program tensor memory naive_buffer types ${fluid_modules} protobuf ${cuda_static_deps})
endif()
bundle_static_library(paddle_api_light paddle_api_light_bundled bundle_light_api)
#-----------------------------------------------------------------------------------------------------
# These tests needs CLI arguments, and is not supported in ARM CI.
# TODO(Superjomn) support latter.
lite_cc_test(test_light_api SRCS light_api_test.cc
DEPS light_api program mir_passes paddle_api_light
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
lite_cc_test(test_apis SRCS apis_test.cc
DEPS cxx_api light_api ${ops} paddle_api_light
CL_DEPS ${opencl_kernels}
X86_DEPS ${x86_kernels}
XPU_DEPS ${xpu_kernels}
FPGA_DEPS ${fpga_kernels}
ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
--optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
if (LITE_WITH_JAVA AND LITE_WITH_ARM)
add_subdirectory(android)
endif()
if (LITE_WITH_PYTHON)
add_subdirectory(python)
endif()
if (LITE_ON_TINY_PUBLISH)
return()
endif()
if (LITE_ON_MODEL_OPTIMIZE_TOOL)
message(STATUS "Compiling model_optimize_tool")
lite_cc_binary(model_optimize_tool SRCS model_optimize_tool.cc cxx_api_impl.cc paddle_api.cc cxx_api.cc
DEPS gflags kernel op optimizer mir_passes utils)
add_dependencies(model_optimize_tool op_list_h kernel_list_h all_kernel_faked_cc)
endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light
${ops}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
X86_DEPS ${x86_kernels}
FPGA_DEPS ${fpga_kernels}
......@@ -209,17 +282,19 @@ endif()
# Some bins
if(NOT IOS)
lite_cc_binary(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light gflags
${ops}
lite_cc_binary(test_model_bin SRCS model_test.cc DEPS paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels})
lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags
${ops}
lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels}
ARM_DEPS ${arm_kernels}
NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels})
......@@ -229,7 +304,3 @@ endif()
#X86_DEPS operator
#DEPS light_api model_parser target_wrapper_host mir_passes
#ARM_DEPS ${arm_kernels}) NPU_DEPS ${npu_kernels})
lite_cc_binary(model_optimize_tool SRCS model_optimize_tool.cc
DEPS paddle_api_full gflags
CL_DEPS ${opencl_kernels})
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* ATTENTION this header file can only include in .cc file.
*/
#pragma once
#include "paddle_lite_factory_helper.h" // NOLINT
#ifndef LITE_WITH_FPGA
USE_LITE_KERNEL(feed, kHost, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def);
USE_LITE_KERNEL(flatten, kHost, kAny, kAny, def);
USE_LITE_KERNEL(flatten2, kHost, kAny, kAny, def);
#else
USE_LITE_KERNEL(feed, kFPGA, kFP16, kNHWC, def);
USE_LITE_KERNEL(fetch, kFPGA, kFP16, kNHWC, def);
#endif
// host kernels
USE_LITE_KERNEL(reshape, kHost, kAny, kAny, def);
USE_LITE_KERNEL(reshape2, kHost, kAny, kAny, def);
USE_LITE_KERNEL(multiclass_nms, kHost, kFloat, kNCHW, def);
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(matmul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(lrn, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(decode_bboxes, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(box_coder, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_mul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_max, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_div, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(fusion_elementwise_div_activation, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(fusion_elementwise_add_activation, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(fusion_elementwise_mul_activation, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(fusion_elementwise_max_activation, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(split, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(dropout, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(concat, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(relu, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(relu6, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(transpose, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(transpose2, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(batch_norm, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(power, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(shuffle_channel, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(yolo_box, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(argmax, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(axpy, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(leaky_relu, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(relu_clipped, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(prelu, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(sigmoid, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(tanh, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(swish, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(log, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(exp, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(conv2d_transpose, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(pad2d, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(prior_box, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(density_prior_box, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(negative, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(crop, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(norm, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(sequence_softmax, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(im2sequence, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(bilinear_interp, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(nearest_interp, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(logical_xor, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(logical_and, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(less_than, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(top_k, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(increment, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(write_to_array, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(read_from_array, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(reduce_max, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(sequence_expand, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(sequence_pool, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(shape, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(fill_constant, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(cast, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(slice, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(affine_channel, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(anchor_generator, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(generate_proposals, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(squeeze, kARM, kFloat, kNCHW, def) // for x2paddle
USE_LITE_KERNEL(squeeze2, kARM, kFloat, kNCHW, def) // for x2paddle
USE_LITE_KERNEL(expand, kARM, kFloat, kNCHW, def) // for x2paddle
USE_LITE_KERNEL(roi_align, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(box_clip, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(reduce_mean, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(stack, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(calib, kARM, kInt8, kNCHW, fp32_to_int8);
USE_LITE_KERNEL(calib, kARM, kInt8, kNCHW, int8_to_fp32);
USE_LITE_KERNEL(calib_once, kARM, kInt8, kNCHW, fp32_to_int8);
USE_LITE_KERNEL(calib_once, kARM, kInt8, kNCHW, int8_to_fp32);
USE_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, int8_out);
USE_LITE_KERNEL(conv2d, kARM, kInt8, kNCHW, fp32_out);
USE_LITE_KERNEL(fc, kARM, kInt8, kNCHW, int8out);
USE_LITE_KERNEL(fc, kARM, kInt8, kNCHW, fp32out);
USE_LITE_KERNEL(gru_unit, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(gru, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(beam_search_decode, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(beam_search, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(while, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(lod_reset, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(lookup_table, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(is_empty, kARM, kFloat, kNCHW, def)
USE_LITE_KERNEL(assign, kARM, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_X86
// NOTE all the X86 kernels are disabled temporarily for kernel are changed.
// USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(fill_constant, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(concat, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(conv2d, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, def);
// USE_LITE_KERNEL(batch_norm, kX86, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device);
USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
USE_LITE_KERNEL(io_copy_once, kCUDA, kAny, kAny, host_to_device);
USE_LITE_KERNEL(io_copy_once, kCUDA, kAny, kAny, device_to_host);
USE_LITE_KERNEL(leaky_relu, kCUDA, kFloat, kNCHW, def);
USE_LITE_KERNEL(nearest_interp, kCUDA, kFloat, kNCHW, def);
USE_LITE_KERNEL(yolo_box, kCUDA, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_OPENCL
USE_LITE_KERNEL(io_copy, kOpenCL, kAny, kAny, host_to_device);
USE_LITE_KERNEL(io_copy, kOpenCL, kAny, kAny, device_to_host);
USE_LITE_KERNEL(io_copy_once, kOpenCL, kAny, kAny, host_to_device);
USE_LITE_KERNEL(io_copy_once, kOpenCL, kAny, kAny, device_to_host);
USE_LITE_KERNEL(fc, kOpenCL, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kOpenCL, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_add, kOpenCL, kFloat, kNCHW, def);
USE_LITE_KERNEL(fusion_elementwise_add_activation, kOpenCL, kFloat, kNCHW, def);
USE_LITE_KERNEL(pool2d, kOpenCL, kFloat, kNCHW, def);
USE_LITE_KERNEL(relu, kOpenCL, kFloat, kNCHW, def);
USE_LITE_KERNEL(depthwise_conv2d, kOpenCL, kFloat, kNCHW, def);
USE_LITE_KERNEL(conv2d, kOpenCL, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_NPU
USE_LITE_KERNEL(graph_op, kNPU, kFloat, kNCHW, def);
#endif
#ifdef LITE_WITH_FPGA
USE_LITE_KERNEL(relu, kFPGA, kFP16, kNHWC, def);
USE_LITE_KERNEL(conv2d, kFPGA, kFP16, kNHWC, def);
USE_LITE_KERNEL(elementwise_add, kFPGA, kFP16, kNHWC, def);
USE_LITE_KERNEL(fusion_elementwise_add_activation, kFPGA, kFP16, kNHWC, def);
USE_LITE_KERNEL(fc, kFPGA, kFP16, kNHWC, def);
USE_LITE_KERNEL(pool2d, kFPGA, kFP16, kNHWC, def);
USE_LITE_KERNEL(scale, kFPGA, kFP16, kNHWC, def);
USE_LITE_KERNEL(softmax, kFPGA, kFP16, kNHWC, def);
USE_LITE_KERNEL(io_copy, kFPGA, kAny, kAny, host_to_device);
USE_LITE_KERNEL(io_copy, kFPGA, kAny, kAny, device_to_host);
USE_LITE_KERNEL(io_copy_once, kFPGA, kAny, kAny, host_to_device_once);
USE_LITE_KERNEL(io_copy_once, kFPGA, kAny, kAny, device_to_host_once);
USE_LITE_KERNEL(calib, kFPGA, kFP16, kNHWC, fp32_to_fp16_fpga);
USE_LITE_KERNEL(calib, kFPGA, kFP16, kNHWC, fp16_to_fp32_fpga);
USE_LITE_KERNEL(calib_once, kFPGA, kFP16, kNHWC, fp32_to_fp16_fpga);
USE_LITE_KERNEL(calib_once, kFPGA, kFP16, kNHWC, fp16_to_fp32_fpga);
USE_LITE_KERNEL(layout, kFPGA, kAny, kNHWC, hwc_to_chw_fpga_fp16);
USE_LITE_KERNEL(layout, kFPGA, kAny, kNHWC, chw_to_hwc_fpga_fp16);
USE_LITE_KERNEL(layout_once, kFPGA, kAny, kNHWC, hwc_to_chw_fpga_fp16);
USE_LITE_KERNEL(layout_once, kFPGA, kAny, kNHWC, chw_to_hwc_fpga_fp16);
#endif
......@@ -21,6 +21,7 @@
USE_LITE_OP(mul);
USE_LITE_OP(matmul);
USE_LITE_OP(fc);
USE_LITE_OP(assign);
USE_LITE_OP(relu);
USE_LITE_OP(relu6);
USE_LITE_OP(scale);
......@@ -51,7 +52,7 @@ USE_LITE_OP(batch_norm)
USE_LITE_OP(fusion_elementwise_sub_activation)
USE_LITE_OP(transpose)
USE_LITE_OP(transpose2)
USE_LITE_OP(argmax)
USE_LITE_OP(arg_max)
USE_LITE_OP(axpy)
USE_LITE_OP(leaky_relu)
USE_LITE_OP(relu_clipped)
......@@ -118,8 +119,13 @@ USE_LITE_OP(cast)
USE_LITE_OP(affine_channel)
USE_LITE_OP(anchor_generator)
USE_LITE_OP(generate_proposals)
USE_LITE_OP(squeeze) // for x2paddle
USE_LITE_OP(squeeze2) // for x2paddle
USE_LITE_OP(expand) // for x2paddle
USE_LITE_OP(squeeze) // for x2paddle
USE_LITE_OP(squeeze2) // for x2paddle
USE_LITE_OP(unsqueeze) // for x2paddle
USE_LITE_OP(unsqueeze2) // for x2paddle
USE_LITE_OP(expand) // for x2paddle
USE_LITE_OP(roi_align)
USE_LITE_OP(box_clip)
USE_LITE_OP(assign_value)
USE_LITE_OP(hard_sigmoid)
USE_LITE_OP(rsqrt)
......@@ -17,10 +17,20 @@ if (NOT LITE_ON_TINY_PUBLISH)
# Unlike static library, module library has to link target to be able to work
# as a single .so lib.
target_link_libraries(paddle_lite_jni ${lib_DEPS} ${arm_kernels} ${npu_kernels})
if (LITE_WITH_NPU)
# Strips the symbols of our protobuf functions to fix the conflicts during
# loading HIAI builder libs (libhiai_ir.so and libhiai_ir_build.so)
set(LINK_FLAGS "-Wl,--version-script ${PADDLE_SOURCE_DIR}/lite/core/lite.map")
set_target_properties(paddle_lite_jni PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
endif()
else()
add_library(paddle_lite_jni SHARED "")
target_sources(paddle_lite_jni PUBLIC ${__lite_cc_files} paddle_lite_jni.cc tensor_jni.cc)
add_dependencies(paddle_lite_jni op_list_h kernel_list_h)
if (LITE_WITH_NPU)
# Need to add HIAI runtime libs (libhiai.so) dependency
target_link_libraries(paddle_lite_jni ${npu_runtime_libs})
endif()
endif()
if (APPLE)
......
......@@ -49,6 +49,27 @@ inline std::string jstring_to_cpp_string(JNIEnv *env, jstring jstr) {
return ret;
}
inline jstring cpp_string_to_jstring(JNIEnv *env, std::string str) {
auto *data = str.c_str();
jclass strClass = env->FindClass("java/lang/String");
jmethodID strClassInitMethodID =
env->GetMethodID(strClass, "<init>", "([BLjava/lang/String;)V");
jbyteArray bytes = env->NewByteArray(strlen(data));
env->SetByteArrayRegion(
bytes, 0, strlen(data), reinterpret_cast<const jbyte *>(data));
jstring encoding = env->NewStringUTF("UTF-8");
jstring res = (jstring)(
env->NewObject(strClass, strClassInitMethodID, bytes, encoding));
env->DeleteLocalRef(strClass);
env->DeleteLocalRef(encoding);
env->DeleteLocalRef(bytes);
return res;
}
inline jfloatArray cpp_array_to_jfloatarray(JNIEnv *env,
const float *buf,
int64_t len) {
......@@ -124,8 +145,6 @@ inline CxxConfig jcxxconfig_to_cpp_cxxconfig(JNIEnv *env, jobject jcxxconfig) {
jmethodID model_dir_method =
env->GetMethodID(cxxconfig_jclazz, "getModelDir", "()Ljava/lang/String;");
jmethodID preferred_place_method = env->GetMethodID(
cxxconfig_jclazz, "getPreferredPlace", "()Lcom/baidu/paddle/lite/Place;");
jmethodID valid_places_method = env->GetMethodID(
cxxconfig_jclazz, "getValidPlaces", "()[Lcom/baidu/paddle/lite/Place;");
......@@ -138,13 +157,6 @@ inline CxxConfig jcxxconfig_to_cpp_cxxconfig(JNIEnv *env, jobject jcxxconfig) {
config.set_model_dir(cpp_model_dir);
}
jobject java_preferred_place =
env->CallObjectMethod(jcxxconfig, preferred_place_method);
if (java_preferred_place != nullptr) {
Place cpp_preferred_place = jplace_to_cpp_place(env, java_preferred_place);
config.set_preferred_place(cpp_preferred_place);
}
jobject object_valid_places =
env->CallObjectMethod(jcxxconfig, valid_places_method);
jobjectArray *java_valid_places =
......
......@@ -50,6 +50,16 @@ JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_lite_PaddlePredictor_run(
return JNI_TRUE;
}
JNIEXPORT jstring JNICALL Java_com_baidu_paddle_lite_PaddlePredictor_getVersion(
JNIEnv *env, jobject jpaddle_predictor) {
std::shared_ptr<PaddlePredictor> *predictor =
getPaddlePredictorPointer(env, jpaddle_predictor);
if (predictor == nullptr || (*predictor == nullptr)) {
return cpp_string_to_jstring(env, "");
}
return cpp_string_to_jstring(env, (*predictor)->GetVersion());
}
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_saveOptimizedModel(
JNIEnv *env, jobject jpaddle_predictor, jstring model_dir) {
......
......@@ -37,6 +37,14 @@ namespace lite_api {
JNIEXPORT jboolean JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_run(JNIEnv *, jobject);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: getVersion
* Signature: ()Z
*/
JNIEXPORT jstring JNICALL
Java_com_baidu_paddle_lite_PaddlePredictor_getVersion(JNIEnv *, jobject);
/*
* Class: com_baidu_paddle_lite_PaddlePredictor
* Method: saveOptimizedModel
......
......@@ -18,17 +18,8 @@ package com.baidu.paddle.lite;
*/
public class CxxConfig extends ConfigBase {
protected Place preferredPlace;
protected Place[] validPlaces;
public Place getPreferredPlace() {
return preferredPlace;
}
public void setPreferredPlace(Place preferredPlace) {
this.preferredPlace = preferredPlace;
}
public Place[] getValidPlaces() {
return validPlaces;
}
......
......@@ -82,6 +82,13 @@ public class PaddlePredictor {
*/
public native boolean run();
/**
* Get c++ lib's version information.
*
* @return C++ lib's version information.
*/
public native String getVersion();
/**
* Saves the optimized model. It is available only for {@link CxxConfig}
*
......
......@@ -51,17 +51,12 @@ bool CompareTensors(const std::string& name,
TEST(CXXApi_LightApi, optim_model) {
lite::Predictor cxx_api;
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}, // Both works on X86 and ARM
});
// On ARM devices, the preferred X86 target not works, but it can still
// select ARM kernels.
cxx_api.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)},
valid_places);
cxx_api.Build(FLAGS_model_dir, "", "", valid_places);
cxx_api.SaveModel(FLAGS_optimized_model);
}
......@@ -72,17 +67,12 @@ TEST(CXXApi_LightApi, save_and_load_model) {
// CXXAPi
{
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}, // Both works on X86 and ARM
});
// On ARM devices, the preferred X86 target not works, but it can still
// select ARM kernels.
cxx_api.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)},
valid_places);
cxx_api.Build(FLAGS_model_dir, "", "", valid_places);
auto* x = cxx_api.GetInput(0);
SetConstInput(x);
......
......@@ -32,7 +32,9 @@ DEFINE_string(input_shape,
DEFINE_string(result_filename, "", "save test result");
DEFINE_bool(run_model_optimize,
false,
"apply model_optimize_tool to model, use optimized model to test");
"if set true, apply model_optimize_tool to model, use optimized "
"model to test");
DEFINE_bool(is_quantized_model, false, "if set true, test the quantized model");
namespace paddle {
namespace lite_api {
......@@ -42,11 +44,14 @@ void OutputOptModel(const std::string& load_model_dir,
const std::vector<std::vector<int64_t>>& input_shapes) {
lite_api::CxxConfig config;
config.set_model_dir(load_model_dir);
config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)});
config.set_valid_places({
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
std::vector<Place> vaild_places = {Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat)}};
if (FLAGS_is_quantized_model) {
vaild_places.insert(vaild_places.begin(),
Place{TARGET(kARM), PRECISION(kInt8)});
}
config.set_valid_places(vaild_places);
auto predictor = lite_api::CreatePaddlePredictor(config);
int ret = system(
......@@ -70,11 +75,7 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
const std::string model_name) {
lite_api::MobileConfig config;
config.set_threads(thread_num);
if (thread_num == 1) {
config.set_power_mode(LITE_POWER_HIGH);
} else {
config.set_power_mode(LITE_POWER_NO_BIND);
}
config.set_power_mode(LITE_POWER_NO_BIND);
config.set_model_dir(model_dir);
auto predictor = lite_api::CreatePaddlePredictor(config);
......
......@@ -13,20 +13,27 @@
// limitations under the License.
#include "lite/api/cxx_api.h"
#include <algorithm>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include "lite/utils/io.h"
#ifdef LITE_WITH_NPU
#include "lite/backends/npu/npu_helper.h"
#endif
namespace paddle {
namespace lite {
static const char TAILORD_OPS_SOURCE_LIST_FILENAME[] =
".tailored_ops_source_list";
static const char TAILORD_OPS_LIST_NAME[] = ".tailored_ops_list";
static const char TAILORD_KERNELS_SOURCE_LIST_FILENAME[] =
".tailored_kernels_source_list";
static const char TAILORD_KERNELS_LIST_NAME[] = ".tailored_kernels_list";
void Predictor::SaveModel(const std::string &dir,
lite_api::LiteModelType model_type) {
lite_api::LiteModelType model_type,
bool record_info) {
if (!program_) {
GenRuntimeProgram();
}
......@@ -42,41 +49,142 @@ void Predictor::SaveModel(const std::string &dir,
default:
LOG(FATAL) << "Unknown model type";
}
#ifdef LITE_WITH_NPU
for (auto name : npu::DeviceInfo::Global().AllClientNames()) {
// the npu offline model is saved in current dir
// so just copy to dst dir
CHECK_EQ(
system(string_format("cp -r %s %s", name.c_str(), dir.c_str()).c_str()),
0)
<< "Failed copy NPU model to " << dir;
if (record_info) {
SaveOpKernelInfo(dir);
}
#endif
}
void Predictor::SaveOpKernelInfo(const std::string &model_dir) {
std::set<std::string> ops_info;
std::set<std::string> kernels_info;
const auto &instructions_ = program_->instructions();
for (auto &node : instructions_) {
// parse op type infomation
auto op = node.op()->op_info();
ops_info.insert(op->Type());
// parse kernel type information
std::string kernel_type_str =
node.kernel()->op_type() + "," + TargetRepr(node.kernel()->target()) +
"," + PrecisionRepr(node.kernel()->precision()) + "," +
DataLayoutRepr(node.kernel()->layout()) + "," + node.kernel()->alias();
kernels_info.insert(kernel_type_str);
}
// get souce_file name from op type and kernel type
auto op2pathmap = OpKernelInfoCollector::Global().GetOp2PathDict();
auto kernel2pathmap = OpKernelInfoCollector::Global().GetKernel2PathDict();
// write used op and kernel info into files
std::string opf_path = model_dir + "/" + TAILORD_OPS_LIST_NAME;
std::string opf_source_path =
model_dir + "/" + TAILORD_OPS_SOURCE_LIST_FILENAME;
std::string kpf_path = model_dir + "/" + TAILORD_KERNELS_LIST_NAME;
std::string kpf_source_path =
model_dir + "/" + TAILORD_KERNELS_SOURCE_LIST_FILENAME;
std::map<std::string, std::string> op2path;
std::FILE *opf = std::fopen(opf_path.c_str(), "w");
std::FILE *opf_source = std::fopen(opf_source_path.c_str(), "w");
std::FILE *kpf = std::fopen(kpf_path.c_str(), "w");
std::FILE *kpf_source = std::fopen(kpf_source_path.c_str(), "w");
std::vector<std::string> opcompile;
std::vector<std::string> kernelcompile;
if (nullptr == opf || nullptr == opf_source || nullptr == opf ||
nullptr == kpf_source) {
LOG(FATAL) << "failed to create info file into: " << model_dir;
}
for (auto op_info = ops_info.begin(); op_info != ops_info.end(); op_info++) {
fputs(op_info->c_str(), opf);
fputc('\n', opf);
std::string op_path = op2pathmap[*op_info];
fputs(op_path.c_str(), opf_source);
fputc('\n', opf_source);
}
std::fclose(opf_source);
std::fclose(opf);
LOG(INFO) << "operators information of tailored model is stored into: "
<< opf_path;
// write Kernel_type and Kernel_path into file
for (auto kernel_info = kernels_info.begin();
kernel_info != kernels_info.end();
kernel_info++) {
fputs(kernel_info->c_str(), kpf);
fputc('\n', kpf);
std::string kernel_path = kernel2pathmap[*kernel_info];
fputs(kernel_path.c_str(), kpf_source);
fputc('\n', kpf_source);
if (kernel_path == "conv_compute.cc") {
fputs(
"conv_depthwise.cc\nconv_direct.cc\nconv_gemmlike.cc\nconv_"
"winograd.cc\n",
kpf_source);
}
}
std::fclose(kpf_source);
std::fclose(kpf);
LOG(INFO) << "kernels information of tailored model is stored into: "
<< kpf_path;
}
lite::Tensor *Predictor::GetInput(size_t offset) {
auto *_feed_list = exec_scope_->FindVar("feed");
CHECK(_feed_list) << "no feed variable in exec_scope";
auto *feed_list = _feed_list->GetMutable<std::vector<lite::Tensor>>();
if (offset >= feed_list->size()) {
feed_list->resize(offset + 1);
CHECK(input_names_.size() > offset)
<< "The network has " << input_names_.size() << " inputs"
<< ", the offset should be less than this.";
auto *in_var = exec_scope_->FindVar(input_names_[offset]);
CHECK(in_var) << "no fatch variable " << input_names_[offset]
<< " in exec_scope";
return in_var->GetMutable<lite::Tensor>();
}
// get inputs names
std::vector<std::string> Predictor::GetInputNames() { return input_names_; }
// get outputnames
std::vector<std::string> Predictor::GetOutputNames() { return output_names_; }
// append the names of inputs and outputs into input_names_ and output_names_
void Predictor::PrepareFeedFetch() {
auto current_block = program_desc_.GetBlock<cpp::BlockDesc>(0);
std::vector<cpp::OpDesc *> feeds;
std::vector<cpp::OpDesc *> fetchs;
for (size_t i = 0; i < current_block->OpsSize(); i++) {
auto op = current_block->GetOp<cpp::OpDesc>(i);
if (op->Type() == "feed") {
feeds.push_back(op);
} else if (op->Type() == "fetch") {
fetchs.push_back(op);
}
}
input_names_.resize(feeds.size());
output_names_.resize(fetchs.size());
for (size_t i = 0; i < feeds.size(); i++) {
input_names_[feeds[i]->GetAttr<int>("col")] =
feeds[i]->Output("Out").front();
}
for (size_t i = 0; i < fetchs.size(); i++) {
output_names_[fetchs[i]->GetAttr<int>("col")] =
fetchs[i]->Input("X").front();
}
return &feed_list->at(offset);
}
const lite::Tensor *Predictor::GetOutput(size_t offset) const {
auto *_fetch_list = exec_scope_->FindVar("fetch");
CHECK(_fetch_list) << "no fatch variable in exec_scope";
auto &fetch_list = *_fetch_list->GetMutable<std::vector<lite::Tensor>>();
CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow";
return &fetch_list.at(offset);
CHECK(output_names_.size() > offset)
<< "The network has " << output_names_.size() << " outputs"
<< ", the offset should be less than this.";
const std::string name = output_names_.at(offset);
auto *out_var = exec_scope_->FindVar(name);
CHECK(out_var) << "no fatch variable " << name << " in exec_scope";
return out_var->GetMutable<lite::Tensor>();
}
const std::vector<lite::Tensor> *Predictor::GetOutputs() const {
auto *_fetch_list = exec_scope_->FindVar("fetch");
CHECK(_fetch_list) << "no fatch variable in exec_scope";
auto &fetch_list = *_fetch_list->GetMutable<std::vector<lite::Tensor>>();
return &fetch_list;
std::vector<const lite::Tensor *> Predictor::GetOutputs() const {
std::vector<const lite::Tensor *> outputs;
size_t out_size = output_names_.size();
for (size_t i = 0; i < out_size; i++) {
const std::string name = output_names_.at(i);
outputs.push_back(GetTensor(name));
}
return outputs;
}
const cpp::ProgramDesc &Predictor::program_desc() const {
......@@ -91,14 +199,12 @@ void Predictor::Build(const lite_api::CxxConfig &config,
const std::string &model_path = config.model_dir();
const std::string &model_file = config.model_file();
const std::string &param_file = config.param_file();
const Place prefer_place = config.preferred_place();
const bool model_from_memory = config.model_from_memory();
LOG(INFO) << "load from memory " << model_from_memory;
Build(model_path,
model_file,
param_file,
prefer_place,
valid_places,
passes,
model_type,
......@@ -107,7 +213,6 @@ void Predictor::Build(const lite_api::CxxConfig &config,
void Predictor::Build(const std::string &model_path,
const std::string &model_file,
const std::string &param_file,
const Place &prefer_place,
const std::vector<Place> &valid_places,
const std::vector<std::string> &passes,
lite_api::LiteModelType model_type,
......@@ -134,21 +239,26 @@ void Predictor::Build(const std::string &model_path,
default:
LOG(FATAL) << "Unknown model type";
}
Build(program_desc_, prefer_place, valid_places, passes);
Build(program_desc_, valid_places, passes);
}
void Predictor::Build(const cpp::ProgramDesc &desc,
const Place &prefer_place,
const std::vector<Place> &valid_places,
const std::vector<std::string> &passes) {
program_desc_ = desc;
Program program(desc, scope_, valid_places);
optimizer_.KernelPickPreferPlace(prefer_place);
std::vector<Place> inner_places = valid_places;
inner_places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny));
inner_places.emplace_back(
TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW));
Program program(desc, scope_, inner_places);
/// The first place in valid_places is
core::KernelPickFactor factor;
factor.ConsiderTarget();
factor.ConsiderPrecision();
optimizer_.Run(std::move(program), valid_places, factor, passes);
factor.ConsiderDataLayout();
optimizer_.Run(std::move(program), inner_places, factor, passes);
exec_scope_ = optimizer_.exec_scope();
PrepareFeedFetch();
}
void Predictor::GenRuntimeProgram() {
......@@ -161,6 +271,21 @@ const lite::Tensor *Predictor::GetTensor(const std::string &name) const {
auto *var = exec_scope_->FindVar(name);
return &var->Get<lite::Tensor>();
}
// get input by name
lite::Tensor *Predictor::GetInputByName(const std::string &name) {
auto element = std::find(input_names_.begin(), input_names_.end(), name);
if (element == input_names_.end()) {
LOG(ERROR) << "Model do not have input named with: [" << name
<< "], model's inputs include:";
for (size_t i = 0; i < input_names_.size(); i++) {
LOG(ERROR) << "[" << input_names_[i] << "]";
}
return nullptr;
} else {
int position = std::distance(input_names_.begin(), element);
return GetInput(position);
}
}
#ifdef LITE_WITH_TRAIN
void Predictor::FeedVars(const std::vector<framework::Tensor> &tensors) {
......
......@@ -13,7 +13,9 @@
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <mutex> //NOLINT
#include <string>
#include <utility>
#include <vector>
......@@ -49,14 +51,12 @@ class LITE_API Predictor {
const std::string& model_path,
const std::string& model_file_path,
const std::string& param_file_path,
const Place& prefer_place,
const std::vector<Place>& valid_places,
const std::vector<std::string>& passes = {},
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf,
bool memory_from_memory = false);
void Build(const cpp::ProgramDesc& desc,
const Place& prefer_place,
const std::vector<Place>& valid_places,
const std::vector<std::string>& passes = {});
......@@ -68,15 +68,20 @@ class LITE_API Predictor {
GenRuntimeProgram();
}
program_->Run();
LOG(INFO) << "running";
}
// Get offset-th col of feed inputs.
lite::Tensor* GetInput(size_t offset);
// get input by name.
lite::Tensor* GetInputByName(const std::string& name);
// get inputnames and get outputnames.
std::vector<std::string> GetInputNames();
std::vector<std::string> GetOutputNames();
void PrepareFeedFetch();
// Get offset-th col of fetch results.
const lite::Tensor* GetOutput(size_t offset) const;
const std::vector<lite::Tensor>* GetOutputs() const;
std::vector<const lite::Tensor*> GetOutputs() const;
const cpp::ProgramDesc& program_desc() const;
const lite::Tensor* GetTensor(const std::string& name) const;
......@@ -85,7 +90,9 @@ class LITE_API Predictor {
// This method is disabled in mobile, for unnecessary dependencies required.
void SaveModel(
const std::string& dir,
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf);
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf,
bool record_info = false);
void SaveOpKernelInfo(const std::string& model_dir);
#ifdef LITE_WITH_TRAIN
void Run(const std::vector<framework::Tensor>& tensors) {
......@@ -103,6 +110,47 @@ class LITE_API Predictor {
const Scope* exec_scope_;
std::unique_ptr<RuntimeProgram> program_;
bool program_generated_{false};
std::vector<std::string> input_names_;
std::vector<std::string> output_names_;
};
class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
public:
CxxPaddleApiImpl() {}
/// Create a new predictor from a config.
void Init(const lite_api::CxxConfig& config);
std::unique_ptr<lite_api::Tensor> GetInput(int i) override;
std::unique_ptr<const lite_api::Tensor> GetOutput(int i) const override;
void Run() override;
std::shared_ptr<lite_api::PaddlePredictor> Clone() override;
std::string GetVersion() const override;
// get inputs names and get outputs names
std::vector<std::string> GetInputNames() override;
std::vector<std::string> GetOutputNames() override;
std::unique_ptr<const lite_api::Tensor> GetTensor(
const std::string& name) const override;
// Get InputTebsor by name
std::unique_ptr<lite_api::Tensor> GetInputByName(
const std::string& name) override;
void SaveOptimizedModel(
const std::string& model_dir,
lite_api::LiteModelType model_type = lite_api::LiteModelType::kProtobuf,
bool record_info = false) override;
private:
Predictor raw_predictor_;
lite_api::CxxConfig config_;
std::mutex mutex_;
};
/*
......@@ -123,10 +171,8 @@ class LITE_API Predictor {
class LITE_API CXXTrainer {
public:
CXXTrainer(const std::shared_ptr<lite::Scope>& root_scope,
const Place& preferred_place,
const std::vector<Place>& valid_places)
: scope_(root_scope),
preferred_place_(preferred_place),
valid_places_(valid_places),
main_program_executor_(Predictor(scope_)) {}
......@@ -135,7 +181,7 @@ class LITE_API CXXTrainer {
// NOTE Just support to execute the 0-th block currently.
Predictor& BuildMainProgramExecutor(const framework::proto::ProgramDesc& desc,
int block_id = 0) {
main_program_executor_.Build(desc, preferred_place_, valid_places_);
main_program_executor_.Build(desc, valid_places_);
return main_program_executor_;
}
......@@ -153,14 +199,12 @@ class LITE_API CXXTrainer {
void RunStartupProgram(const framework::proto::ProgramDesc& desc,
int block_id = 0) {
Predictor exe(scope_);
exe.Build(desc, preferred_place_, valid_places_);
exe.Build(desc, valid_places_);
exe.Run();
}
private:
std::shared_ptr<lite::Scope> scope_;
Place preferred_place_;
std::vector<Place> valid_places_;
// The training program.
......
......@@ -35,13 +35,11 @@ void Run(const char* model_dir, int repeat) {
#endif
lite::Predictor predictor;
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kInt8)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
predictor.Build(
model_dir, "", "", Place{TARGET(kARM), PRECISION(kInt8)}, valid_places);
predictor.Build(model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......
......@@ -13,41 +13,26 @@
// limitations under the License.
#include "lite/api/cxx_api.h"
#include <memory>
#include <mutex> //NOLINT
#include <string>
#include "lite/api/paddle_api.h"
#include "lite/core/device_info.h"
#include "lite/core/version.h"
namespace paddle {
namespace lite {
class CxxPaddleApiImpl : public lite_api::PaddlePredictor {
public:
CxxPaddleApiImpl();
/// Create a new predictor from a config.
void Init(const lite_api::CxxConfig &config);
std::unique_ptr<lite_api::Tensor> GetInput(int i) override;
std::unique_ptr<const lite_api::Tensor> GetOutput(int i) const override;
void Run() override;
std::unique_ptr<const lite_api::Tensor> GetTensor(
const std::string &name) const override;
void SaveOptimizedModel(const std::string &model_dir,
lite_api::LiteModelType model_type =
lite_api::LiteModelType::kProtobuf) override;
private:
Predictor raw_predictor_;
};
CxxPaddleApiImpl::CxxPaddleApiImpl() {}
void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
config_ = config;
#ifdef LITE_WITH_CUDA
Env<TARGET(kCUDA)>::Init();
#endif
auto places = config.valid_places();
places.emplace_back(TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny));
raw_predictor_.Build(config, places);
mode_ = config.power_mode();
threads_ = config.threads();
}
std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInput(int i) {
......@@ -61,7 +46,29 @@ std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetOutput(
return std::unique_ptr<lite_api::Tensor>(new lite_api::Tensor(x));
}
void CxxPaddleApiImpl::Run() { raw_predictor_.Run(); }
std::vector<std::string> CxxPaddleApiImpl::GetInputNames() {
return raw_predictor_.GetInputNames();
}
std::vector<std::string> CxxPaddleApiImpl::GetOutputNames() {
return raw_predictor_.GetOutputNames();
}
void CxxPaddleApiImpl::Run() {
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Global().SetRunMode(mode_, threads_);
#endif
raw_predictor_.Run();
}
std::shared_ptr<lite_api::PaddlePredictor> CxxPaddleApiImpl::Clone() {
std::lock_guard<std::mutex> lock(mutex_);
auto predictor = std::make_shared<lite::CxxPaddleApiImpl>();
predictor->Init(config_);
return predictor;
}
std::string CxxPaddleApiImpl::GetVersion() const { return version(); }
std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetTensor(
const std::string &name) const {
......@@ -69,9 +76,16 @@ std::unique_ptr<const lite_api::Tensor> CxxPaddleApiImpl::GetTensor(
return std::unique_ptr<const lite_api::Tensor>(new lite_api::Tensor(x));
}
std::unique_ptr<lite_api::Tensor> CxxPaddleApiImpl::GetInputByName(
const std::string &name) {
return std::unique_ptr<lite_api::Tensor>(
new lite_api::Tensor(raw_predictor_.GetInputByName(name)));
}
void CxxPaddleApiImpl::SaveOptimizedModel(const std::string &model_dir,
lite_api::LiteModelType model_type) {
raw_predictor_.SaveModel(model_dir, model_type);
lite_api::LiteModelType model_type,
bool record_info) {
raw_predictor_.SaveModel(model_dir, model_type, record_info);
}
} // namespace lite
......
......@@ -43,13 +43,8 @@ TEST(CXXApi, test) {
TEST(CXXApi, save_model) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kCUDA), PRECISION(kFloat)},
valid_places);
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir, "", "", valid_places);
LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model;
predictor.SaveModel(FLAGS_optimized_model,
......@@ -59,11 +54,11 @@ TEST(CXXApi, save_model) {
}
/*TEST(CXXTrainer, train) {
Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)});
std::vector<Place> valid_places({prefer_place});
Place place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)});
std::vector<Place> valid_places({place});
auto scope = std::make_shared<lite::Scope>();
CXXTrainer trainer(scope, prefer_place, valid_places);
CXXTrainer trainer(scope, valid_places);
std::string main_program_pb, startup_program_pb;
ReadBinaryFile(FLAGS_main_program_path, &main_program_pb);
......@@ -94,13 +89,8 @@ TEST(CXXApi, save_model) {
#ifdef LITE_WITH_ARM
TEST(CXXApi, save_model) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir, "", "", valid_places);
LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model;
predictor.SaveModel(FLAGS_optimized_model);
......@@ -110,12 +100,10 @@ TEST(CXXApi, save_model) {
TEST(CXXApi, load_model_naive) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_optimized_model + ".naive",
"",
"",
Place{TARGET(kARM), PRECISION(kFloat)},
valid_places,
{},
lite_api::LiteModelType::kNaiveBuffer);
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <fstream>
#include <vector>
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
DEFINE_bool(is_run_model_optimize,
false,
"apply model_optimize_tool to model, use optimized model to test");
namespace paddle {
namespace lite_api {
void OutputOptModel(const std::string& load_model_dir,
const std::string& save_optimized_model_dir) {
lite_api::CxxConfig config;
config.set_model_dir(load_model_dir);
config.set_valid_places({
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
auto predictor = lite_api::CreatePaddlePredictor(config);
int ret = system(
paddle::lite::string_format("rm -rf %s", save_optimized_model_dir.c_str())
.c_str());
if (ret == 0) {
LOG(INFO) << "delete old optimized model " << save_optimized_model_dir;
}
predictor->SaveOptimizedModel(save_optimized_model_dir,
LiteModelType::kNaiveBuffer);
LOG(INFO) << "Load model from " << load_model_dir;
LOG(INFO) << "Save optimized model to " << save_optimized_model_dir;
}
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
void Run(const std::string& model_dir,
const int repeat,
const int warmup_times,
const int thread_num) {
// set config and create predictor
lite_api::MobileConfig config;
config.set_model_dir(model_dir);
config.set_threads(thread_num);
if (thread_num == 1) {
config.set_power_mode(LITE_POWER_HIGH);
} else {
config.set_power_mode(LITE_POWER_NO_BIND);
}
auto predictor = lite_api::CreatePaddlePredictor(config);
// set input
auto input_image = predictor->GetInput(0);
input_image->Resize({1, 3, 300, 300});
auto input_image_data = input_image->mutable_data<float>();
std::ifstream read_file("/data/local/tmp/pjc/ssd_img.txt");
if (!read_file.is_open()) {
LOG(INFO) << "read image file fail";
return;
}
auto input_shape = input_image->shape();
int64_t input_image_size = 1;
for (auto t : input_shape) {
input_image_size *= t;
}
for (int i = 0; i < input_image_size; i++) {
read_file >> input_image_data[i];
}
// warmup and run
for (int i = 0; i < warmup_times; ++i) {
predictor->Run();
}
auto start = lite::GetCurrentUS();
for (int i = 0; i < repeat; ++i) {
predictor->Run();
}
// show result
auto end = lite::GetCurrentUS();
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (end - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
auto out = predictor->GetOutput(0);
auto out_data = out->data<float>();
LOG(INFO) << "output shape:";
auto out_shape = out->shape();
for (auto t : out_shape) {
LOG(INFO) << t;
}
LOG(INFO) << "output data:";
int output_len = 20;
for (int i = 0; i < output_len; i++) {
LOG(INFO) << out_data[i];
}
}
#endif
} // namespace lite_api
} // namespace paddle
TEST(Faster_RCNN, test_arm) {
std::string save_optimized_model_dir;
if (FLAGS_is_run_model_optimize) {
save_optimized_model_dir = FLAGS_model_dir + "opt";
paddle::lite_api::OutputOptModel(FLAGS_model_dir, save_optimized_model_dir);
}
std::string run_model_dir =
FLAGS_is_run_model_optimize ? save_optimized_model_dir : FLAGS_model_dir;
paddle::lite_api::Run(
run_model_dir, FLAGS_repeats, FLAGS_threads, FLAGS_warmup);
}
......@@ -25,13 +25,12 @@
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place> &valid_places,
const Place &preferred_place) {
void TestModel(const std::vector<Place> &valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto *input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......@@ -80,22 +79,20 @@ void TestModel(const std::vector<Place> &valid_places,
TEST(EfficientNetB0, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
// Place{TARGET(kOpenCL), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
TEST(EfficientNetB0, test_opencl) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)}));
TestModel(valid_places);
}
} // namespace lite
......
......@@ -30,14 +30,9 @@ TEST(InceptionV4, test) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "lite/api/light_api.h"
#include <algorithm>
namespace paddle {
namespace lite {
......@@ -22,44 +23,94 @@ void LightPredictor::Build(const std::string& model_dir,
const std::string& param_buffer,
lite_api::LiteModelType model_type,
bool model_from_memory) {
cpp::ProgramDesc desc;
switch (model_type) {
#ifndef LITE_ON_TINY_PUBLISH
case lite_api::LiteModelType::kProtobuf:
LoadModelPb(model_dir, "", "", scope_.get(), &desc);
LoadModelPb(model_dir, "", "", scope_.get(), &cpp_program_desc_);
break;
#endif
case lite_api::LiteModelType::kNaiveBuffer: {
if (model_from_memory) {
LoadModelNaiveFromMemory(
model_buffer, param_buffer, scope_.get(), &desc);
model_buffer, param_buffer, scope_.get(), &cpp_program_desc_);
} else {
LoadModelNaive(model_dir, scope_.get(), &desc);
LoadModelNaive(model_dir, scope_.get(), &cpp_program_desc_);
}
break;
}
default:
LOG(FATAL) << "Unknown model type";
}
BuildRuntimeProgram(desc);
BuildRuntimeProgram(cpp_program_desc_);
PrepareFeedFetch();
}
Tensor* LightPredictor::GetInput(size_t offset) {
auto* _feed_list = program_->exec_scope()->FindVar("feed");
CHECK(_feed_list) << "no feed variable in exec_scope";
auto* feed_list = _feed_list->GetMutable<std::vector<Tensor>>();
if (offset >= feed_list->size()) {
feed_list->resize(offset + 1);
CHECK(input_names_.size() > offset)
<< "The network has " << input_names_.size() << " inputs"
<< ", the offset should be less than this.";
auto* in_var = program_->exec_scope()->FindVar(input_names_[offset]);
CHECK(in_var) << "no fatch variable " << input_names_[offset]
<< " in exec_scope";
return in_var->GetMutable<lite::Tensor>();
}
// get input by name
Tensor* LightPredictor::GetInputByName(const std::string& name) {
auto element = std::find(input_names_.begin(), input_names_.end(), name);
if (element == input_names_.end()) {
LOG(ERROR) << "Model do not have input named with: [" << name
<< "], model's inputs include:";
for (int i = 0; i < input_names_.size(); i++) {
LOG(ERROR) << "[" << input_names_[i] << "]";
}
return nullptr;
} else {
int position = std::distance(input_names_.begin(), element);
return GetInput(position);
}
return &feed_list->at(offset);
}
const Tensor* LightPredictor::GetOutput(size_t offset) {
auto* _fetch_list = program_->exec_scope()->FindVar("fetch");
CHECK(_fetch_list) << "no fatch variable in exec_scope";
auto& fetch_list = *_fetch_list->GetMutable<std::vector<lite::Tensor>>();
CHECK_LT(offset, fetch_list.size()) << "offset " << offset << " overflow";
return &fetch_list.at(offset);
CHECK(output_names_.size() > offset)
<< "The network has " << output_names_.size() << " outputs"
<< ", the offset should be less than this.";
auto* out_var = program_->exec_scope()->FindVar(output_names_.at(offset));
CHECK(out_var) << "no fatch variable " << output_names_.at(offset)
<< " in exec_scope";
return out_var->GetMutable<lite::Tensor>();
}
// get inputs names
std::vector<std::string> LightPredictor::GetInputNames() {
return input_names_;
}
// get outputnames
std::vector<std::string> LightPredictor::GetOutputNames() {
return output_names_;
}
// append the names of inputs and outputs into input_names_ and output_names_
void LightPredictor::PrepareFeedFetch() {
auto current_block = cpp_program_desc_.GetBlock<cpp::BlockDesc>(0);
std::vector<cpp::OpDesc*> feeds;
std::vector<cpp::OpDesc*> fetchs;
for (int i = 0; i < current_block->OpsSize(); i++) {
auto op = current_block->GetOp<cpp::OpDesc>(i);
if (op->Type() == "feed") {
feeds.push_back(op);
} else if (op->Type() == "fetch") {
fetchs.push_back(op);
}
}
input_names_.resize(feeds.size());
output_names_.resize(fetchs.size());
for (int i = 0; i < feeds.size(); i++) {
input_names_[feeds[i]->GetAttr<int>("col")] =
feeds[i]->Output("Out").front();
}
for (int i = 0; i < fetchs.size(); i++) {
output_names_[fetchs[i]->GetAttr<int>("col")] =
fetchs[i]->Input("X").front();
}
}
void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) {
......@@ -84,9 +135,11 @@ void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) {
});
CHECK(it != kernels.end());
(*it)->SetContext(ContextScheduler::Global().NewContext((*it)->target()));
insts.emplace_back(op, std::move(*it));
}
program_.reset(new RuntimeProgram(std::move(insts)));
CHECK(program.exec_scope());
program_->set_exec_scope(program.exec_scope());
}
......
......@@ -18,6 +18,7 @@
*/
#pragma once
#include <map>
#include <memory>
#include <string>
#include <utility>
......@@ -52,7 +53,8 @@ class LITE_API LightPredictor {
// Get offset-th col of feed inputs.
Tensor* GetInput(size_t offset);
// get input by name.
Tensor* GetInputByName(const std::string& name);
// Get offset-th col of fetch outputs.
const Tensor* GetOutput(size_t offset);
......@@ -61,6 +63,11 @@ class LITE_API LightPredictor {
return &var->Get<lite::Tensor>();
}
// get inputnames and get outputnames.
std::vector<std::string> GetInputNames();
std::vector<std::string> GetOutputNames();
void PrepareFeedFetch();
private:
void Build(
const std::string& model_dir,
......@@ -74,6 +81,37 @@ class LITE_API LightPredictor {
private:
std::shared_ptr<Scope> scope_;
std::unique_ptr<RuntimeProgram> program_;
cpp::ProgramDesc cpp_program_desc_;
std::vector<std::string> input_names_;
std::vector<std::string> output_names_;
};
class LightPredictorImpl : public lite_api::PaddlePredictor {
public:
LightPredictorImpl() = default;
std::unique_ptr<lite_api::Tensor> GetInput(int i) override;
std::unique_ptr<const lite_api::Tensor> GetOutput(int i) const override;
void Run() override;
std::shared_ptr<lite_api::PaddlePredictor> Clone() override;
std::string GetVersion() const override;
std::vector<std::string> GetInputNames() override;
std::vector<std::string> GetOutputNames() override;
std::unique_ptr<const lite_api::Tensor> GetTensor(
const std::string& name) const override;
// Get InputTebsor by name
std::unique_ptr<lite_api::Tensor> GetInputByName(
const std::string& name) override;
void Init(const lite_api::MobileConfig& config);
private:
std::unique_ptr<lite::LightPredictor> raw_predictor_;
};
} // namespace lite
......
......@@ -13,64 +13,78 @@
// limitations under the License.
#include "lite/api/light_api.h"
#include <string>
#include "lite/api/paddle_api.h"
#include "lite/core/version.h"
#include "lite/model_parser/model_parser.h"
namespace paddle {
namespace lite_api {
class LightPredictorImpl : public PaddlePredictor {
public:
LightPredictorImpl() = default;
std::unique_ptr<Tensor> GetInput(int i) override;
std::unique_ptr<const Tensor> GetOutput(int i) const override;
void Run() override;
std::unique_ptr<const Tensor> GetTensor(
const std::string& name) const override;
namespace lite {
void LightPredictorImpl::Init(const lite_api::MobileConfig& config) {
// LightPredictor Only support NaiveBuffer backend in publish lib
raw_predictor_.reset(
new LightPredictor(config.model_dir(),
config.model_buffer(),
config.param_buffer(),
config.model_from_memory(),
lite_api::LiteModelType::kNaiveBuffer));
mode_ = config.power_mode();
threads_ = config.threads();
}
void Init(const MobileConfig& config);
std::unique_ptr<lite_api::Tensor> LightPredictorImpl::GetInput(int i) {
return std::unique_ptr<lite_api::Tensor>(
new lite_api::Tensor(raw_predictor_->GetInput(i)));
}
private:
std::unique_ptr<lite::LightPredictor> raw_predictor_;
};
std::unique_ptr<const lite_api::Tensor> LightPredictorImpl::GetOutput(
int i) const {
return std::unique_ptr<lite_api::Tensor>(
new lite_api::Tensor(raw_predictor_->GetOutput(i)));
}
void LightPredictorImpl::Init(const MobileConfig& config) {
// LightPredictor Only support NaiveBuffer backend in publish lib
void LightPredictorImpl::Run() {
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Init();
lite::DeviceInfo::Global().SetRunMode(config.power_mode(), config.threads());
lite::DeviceInfo::Global().SetRunMode(mode_, threads_);
#endif
raw_predictor_.reset(new lite::LightPredictor(config.model_dir(),
config.model_buffer(),
config.param_buffer(),
config.model_from_memory(),
LiteModelType::kNaiveBuffer));
raw_predictor_->Run();
}
std::unique_ptr<Tensor> LightPredictorImpl::GetInput(int i) {
return std::unique_ptr<Tensor>(new Tensor(raw_predictor_->GetInput(i)));
std::shared_ptr<lite_api::PaddlePredictor> LightPredictorImpl::Clone() {
LOG(FATAL) << "The Clone API is not supported in LigthPredictor";
}
std::unique_ptr<const Tensor> LightPredictorImpl::GetOutput(int i) const {
return std::unique_ptr<Tensor>(new Tensor(raw_predictor_->GetOutput(i)));
std::string LightPredictorImpl::GetVersion() const { return lite::version(); }
std::unique_ptr<const lite_api::Tensor> LightPredictorImpl::GetTensor(
const std::string& name) const {
return std::unique_ptr<const lite_api::Tensor>(
new lite_api::Tensor(raw_predictor_->GetTensor(name)));
}
std::unique_ptr<lite_api::Tensor> LightPredictorImpl::GetInputByName(
const std::string& name) {
return std::unique_ptr<lite_api::Tensor>(
new lite_api::Tensor(raw_predictor_->GetInputByName(name)));
}
void LightPredictorImpl::Run() { raw_predictor_->Run(); }
std::vector<std::string> LightPredictorImpl::GetInputNames() {
return raw_predictor_->GetInputNames();
}
std::unique_ptr<const Tensor> LightPredictorImpl::GetTensor(
const std::string& name) const {
return std::unique_ptr<const Tensor>(
new Tensor(raw_predictor_->GetTensor(name)));
std::vector<std::string> LightPredictorImpl::GetOutputNames() {
return raw_predictor_->GetOutputNames();
}
} // namespace lite
namespace lite_api {
template <>
std::shared_ptr<PaddlePredictor> CreatePaddlePredictor(
const MobileConfig& config) {
auto x = std::make_shared<LightPredictorImpl>();
auto x = std::make_shared<lite::LightPredictorImpl>();
x->Init(config);
return x;
}
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#ifndef LITE_ON_TINY_PUBLISH
#include "lite/api/paddle_use_passes.h"
#endif
namespace paddle {
namespace lite_api {
void RunModel() {
// 1. Set MobileConfig
MobileConfig mobile_config;
// 2. Create PaddlePredictor by MobileConfig
std::shared_ptr<PaddlePredictor> mobile_predictor =
CreatePaddlePredictor<MobileConfig>(mobile_config);
}
} // namespace lite_api
} // namespace paddle
......@@ -36,6 +36,18 @@ TEST(LightAPI, load) {
data[i] = i;
}
predictor.PrepareFeedFetch();
const std::vector<std::string> inputs = predictor.GetInputNames();
LOG(INFO) << "input size: " << inputs.size();
for (int i = 0; i < inputs.size(); i++) {
LOG(INFO) << "inputnames: " << inputs[i];
}
const std::vector<std::string> outputs = predictor.GetOutputNames();
for (int i = 0; i < outputs.size(); i++) {
LOG(INFO) << "outputnames: " << outputs[i];
}
predictor.Run();
const auto* output = predictor.GetOutput(0);
......
......@@ -24,24 +24,16 @@ namespace lite {
const lite::Tensor* RunHvyModel() {
lite::Predictor predictor;
#ifndef LITE_WITH_CUDA
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kX86), PRECISION(kFloat)}});
#else
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
});
#endif
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)}, // origin cuda
valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({100, 100})));
......
......@@ -14,6 +14,7 @@
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <fstream>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
......@@ -22,23 +23,36 @@
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
DEFINE_string(input_img_txt_path,
"",
"if set input_img_txt_path, read the img filename as input.");
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
void TestModel(const std::vector<Place>& valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto* data = input_tensor->mutable_data<float>();
auto item_size = input_tensor->dims().production();
for (int i = 0; i < item_size; i++) {
data[i] = 1;
if (FLAGS_input_img_txt_path.empty()) {
for (int i = 0; i < item_size; i++) {
data[i] = 1;
}
} else {
std::fstream fs(FLAGS_input_img_txt_path, std::ios::in);
if (!fs.is_open()) {
LOG(FATAL) << "open input_img_txt error.";
}
for (int i = 0; i < item_size; i++) {
fs >> data[i];
}
}
for (int i = 0; i < FLAGS_warmup; ++i) {
......@@ -58,8 +72,9 @@ void TestModel(const std::vector<Place>& valid_places,
std::vector<std::vector<float>> results;
// i = 1
// ground truth result from fluid
results.emplace_back(std::vector<float>(
{0.000227548, 0.000262385, 0.000260347, 0.000293865, 0.00025008}));
{0.0002451055, 0.0002585023, 0.0002659616, 0.0002823}));
auto* out = predictor.GetOutput(0);
ASSERT_EQ(out->dims().size(), 2);
ASSERT_EQ(out->dims()[0], 1);
......@@ -73,16 +88,30 @@ void TestModel(const std::vector<Place>& valid_places,
1e-6);
}
}
auto* out_data = out->data<float>();
LOG(INFO) << "output data:";
for (int i = 0; i < out->numel(); i += step) {
LOG(INFO) << out_data[i];
}
float max_val = out_data[0];
int max_val_arg = 0;
for (int i = 1; i < out->numel(); i++) {
if (max_val < out_data[i]) {
max_val = out_data[i];
max_val_arg = i;
}
}
LOG(INFO) << "max val:" << max_val << ", max_val_arg:" << max_val_arg;
}
TEST(MobileNetV1, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kInt8)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kInt8)}));
TestModel(valid_places);
}
} // namespace lite
......
......@@ -26,13 +26,12 @@ namespace paddle {
namespace lite {
#ifdef LITE_WITH_ARM
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
void TestModel(const std::vector<Place>& valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 300, 300})));
......@@ -99,7 +98,6 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(MobileNetV1_SSD, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
......
......@@ -28,14 +28,13 @@ namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place,
const std::string& model_dir = FLAGS_model_dir,
bool save_model = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(model_dir, "", "", preferred_place, valid_places);
predictor.Build(model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......@@ -103,41 +102,32 @@ void TestModel(const std::vector<Place>& valid_places,
#ifdef LITE_WITH_NPU
TEST(MobileNetV1, test_npu) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kNPU), PRECISION(kFloat)},
});
TestModel(valid_places,
Place({TARGET(kARM), PRECISION(kFloat)}),
FLAGS_model_dir,
true /* save_model*/);
TestModel(valid_places, FLAGS_model_dir, true /* save_model*/);
TestModel(valid_places,
Place({TARGET(kARM), PRECISION(kFloat)}),
FLAGS_optimized_model,
false /* save model */);
TestModel(valid_places, FLAGS_optimized_model, false /* save model */);
}
#endif // LITE_WITH_NPU
TEST(MobileNetV1, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
#ifdef LITE_WITH_OPENCL
TEST(MobileNetV1, test_opencl) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)}));
TestModel(valid_places);
}
#endif // LITE_WITH_OPENCL
......
......@@ -26,13 +26,12 @@ namespace paddle {
namespace lite {
#ifdef LITE_WITH_ARM
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
void TestModel(const std::vector<Place>& valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 608, 608})));
......@@ -106,11 +105,10 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(MobileNetV1_YoloV3, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
#endif // LITE_WITH_ARM
......
......@@ -29,14 +29,13 @@ namespace lite {
#ifdef LITE_WITH_ARM
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place,
const std::string& model_dir = FLAGS_model_dir,
bool save_model = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(model_dir, "", "", preferred_place, valid_places);
predictor.Build(model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......@@ -103,41 +102,32 @@ void TestModel(const std::vector<Place>& valid_places,
#ifdef LITE_WITH_NPU
TEST(MobileNetV2, test_npu) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kNPU), PRECISION(kFloat)},
});
TestModel(valid_places,
Place({TARGET(kARM), PRECISION(kFloat)}),
FLAGS_model_dir,
true /* save_model*/);
TestModel(valid_places, FLAGS_model_dir, true /* save_model*/);
TestModel(valid_places,
Place({TARGET(kARM), PRECISION(kFloat)}),
FLAGS_optimized_model,
false /* save model */);
TestModel(valid_places, FLAGS_optimized_model, false /* save model */);
}
#endif // LITE_WITH_NPU
TEST(MobileNetV2, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
#ifdef LITE_WITH_OPENCL
TEST(MobileNetV2, test_opencl) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)}));
TestModel(valid_places);
}
#endif // LITE_WITH_OPENCL
......
......@@ -16,10 +16,14 @@
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest.h>
#endif
// "all_kernel_faked.cc" and "kernel_src_map.h" are created automatically during
// model_optimize_tool's compiling period
#include "all_kernel_faked.cc" // NOLINT
#include "kernel_src_map.h" // NOLINT
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/core/op_registry.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/string.h"
......@@ -33,6 +37,12 @@ DEFINE_string(
optimize_out_type,
"protobuf",
"store type of the output optimized model. protobuf/naive_buffer");
DEFINE_bool(display_kernels, false, "Display kernel information");
DEFINE_bool(record_tailoring_info,
false,
"Record kernels and operators information of the optimized model "
"for tailoring compiling, information are stored into optimized "
"model path as hidden files");
DEFINE_string(optimize_out, "", "path of the output optimized model");
DEFINE_string(valid_targets,
"arm",
......@@ -43,12 +53,22 @@ DEFINE_bool(prefer_int8_kernel, false, "Prefer to run model with int8 kernels");
namespace paddle {
namespace lite_api {
//! Display the kernel information.
void DisplayKernels() {
LOG(INFO) << ::paddle::lite::KernelRegistry::Global().DebugString();
}
void Main() {
if (!FLAGS_model_file.empty() && !FLAGS_param_file.empty()) {
LOG(WARNING)
<< "Load combined-param model. Option model_dir will be ignored";
}
if (FLAGS_display_kernels) {
DisplayKernels();
exit(0);
}
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_model_file(FLAGS_model_file);
......@@ -74,10 +94,11 @@ void Main() {
CHECK(!valid_places.empty())
<< "At least one target should be set, should set the "
"command argument 'valid_targets'";
if (FLAGS_prefer_int8_kernel) {
LOG(WARNING) << "Int8 mode is only support by ARM target";
valid_places.push_back(Place{TARGET(kARM), PRECISION(kInt8)});
config.set_preferred_place(Place{TARGET(kARM), PRECISION(kInt8)});
valid_places.insert(valid_places.begin(),
Place{TARGET(kARM), PRECISION(kInt8)});
}
config.set_valid_places(valid_places);
......@@ -91,8 +112,14 @@ void Main() {
} else {
LOG(FATAL) << "Unsupported Model type :" << FLAGS_optimize_out_type;
}
OpKernelInfoCollector::Global().SetKernel2path(kernel2path_map);
predictor->SaveOptimizedModel(FLAGS_optimize_out, model_type);
predictor->SaveOptimizedModel(
FLAGS_optimize_out, model_type, FLAGS_record_tailoring_info);
if (FLAGS_record_tailoring_info) {
LOG(INFO) << "Record the information of tailored model into :"
<< FLAGS_optimize_out;
}
}
} // namespace lite_api
......
......@@ -28,18 +28,16 @@ namespace lite {
TEST(model, test) {
#ifdef LITE_WITH_ARM
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kInt8)}});
auto precision = PRECISION(kFloat);
if (FLAGS_int8) {
precision = PRECISION(kInt8);
}
predictor.Build(
FLAGS_model_dir, "", "", Place{TARGET(kARM), precision}, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
int im_width = FLAGS_im_width;
int im_height = FLAGS_im_height;
auto* input_tensor = predictor.GetInput(0);
......@@ -60,11 +58,11 @@ TEST(model, test) {
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
}
auto* output_tensors = predictor.GetOutputs();
auto output_tensors = predictor.GetOutputs();
LOG(INFO) << "======output:========";
for (auto t : *output_tensors) {
LOG(INFO) << t;
for (auto* t : output_tensors) {
LOG(INFO) << *t;
}
LOG(INFO)
<< "=====RUN_finished!!============= Speed Report ===================";
......
......@@ -21,13 +21,23 @@
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/device_info.h"
#include "lite/tests/utils/timer.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/string.h"
#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/basic_profiler.h"
#endif // LITE_WITH_PROFILE
using paddle::lite::Timer;
DEFINE_string(input_shape,
"1,3,224,224",
"input shapes, separated by colon and comma");
DEFINE_bool(use_optimize_nb,
false,
"optimized & naive buffer model for mobile devices");
namespace paddle {
namespace lite_api {
......@@ -36,7 +46,6 @@ void OutputOptModel(const std::string& load_model_dir,
const std::vector<std::vector<int64_t>>& input_shapes) {
lite_api::CxxConfig config;
config.set_model_dir(load_model_dir);
config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)});
config.set_valid_places({
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
......@@ -59,15 +68,18 @@ void OutputOptModel(const std::string& load_model_dir,
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
void Run(const std::vector<std::vector<int64_t>>& input_shapes,
const std::string& model_dir,
const int repeat,
const PowerMode power_mode,
const int thread_num,
const int repeat,
const int warmup_times = 0) {
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Init();
lite::DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, thread_num);
#ifdef LITE_WITH_PROFILE
lite::profile::BasicProfiler<lite::profile::BasicTimer>::Global().SetWarmup(
warmup_times);
#endif
lite_api::MobileConfig config;
config.set_model_dir(model_dir);
config.set_power_mode(power_mode);
config.set_threads(thread_num);
auto predictor = lite_api::CreatePaddlePredictor(config);
......@@ -88,17 +100,22 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
predictor->Run();
}
auto start = lite::GetCurrentUS();
for (int i = 0; i < repeat; ++i) {
Timer ti;
for (int j = 0; j < repeat; ++j) {
ti.start();
predictor->Run();
ti.end();
LOG(INFO) << "iter: " << j << ", time: " << ti.latest_time() << " ms";
}
auto end = lite::GetCurrentUS();
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << model_dir << ", threads num " << thread_num
<< ", warmup: " << warmup_times << ", repeats: " << repeat
<< ", spend " << (end - start) / repeat / 1000.0
<< " ms in average.";
LOG(INFO) << "Model: " << model_dir
<< ", power_mode: " << static_cast<int>(power_mode)
<< ", threads num " << thread_num << ", warmup: " << warmup_times
<< ", repeats: " << repeat << ", avg time: " << ti.get_average_ms()
<< " ms"
<< ", min time: " << ti.get_min_time() << " ms"
<< ", max time: " << ti.get_max_time() << " ms.";
auto output = predictor->GetOutput(0);
auto out = output->data<float>();
......@@ -123,7 +140,12 @@ int main(int argc, char** argv) {
<< "--model_dir /path/to/your/model";
exit(0);
}
std::string save_optimized_model_dir = FLAGS_model_dir + "opt2";
std::string save_optimized_model_dir = "";
if (FLAGS_use_optimize_nb) {
save_optimized_model_dir = FLAGS_model_dir;
} else {
save_optimized_model_dir = FLAGS_model_dir + "opt2";
}
auto split_string =
[](const std::string& str_in) -> std::vector<std::string> {
......@@ -165,17 +187,21 @@ int main(int argc, char** argv) {
input_shapes.push_back(get_shape(str_input_shapes[i]));
}
// Output optimized model
paddle::lite_api::OutputOptModel(
FLAGS_model_dir, save_optimized_model_dir, input_shapes);
if (!FLAGS_use_optimize_nb) {
// Output optimized model
paddle::lite_api::OutputOptModel(
FLAGS_model_dir, save_optimized_model_dir, input_shapes);
}
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
// Run inference using optimized model
paddle::lite_api::Run(input_shapes,
save_optimized_model_dir,
FLAGS_repeats,
FLAGS_threads,
FLAGS_warmup);
paddle::lite_api::Run(
input_shapes,
save_optimized_model_dir,
static_cast<paddle::lite_api::PowerMode>(FLAGS_power_mode),
FLAGS_threads,
FLAGS_repeats,
FLAGS_warmup);
#endif
return 0;
}
......@@ -25,14 +25,12 @@
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place,
bool use_npu = false) {
void TestModel(const std::vector<Place>& valid_places, bool use_npu = false) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 1, 48, 512})));
......@@ -104,11 +102,10 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(OcrAttention, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
} // namespace lite
......
......@@ -13,8 +13,14 @@
// limitations under the License.
#include "lite/api/paddle_api.h"
#include "lite/core/device_info.h"
#include "lite/core/target_wrapper.h"
#include "lite/core/tensor.h"
#ifdef LITE_WITH_CUDA
#include "lite/backends/cuda/target_wrapper.h"
#endif
namespace paddle {
namespace lite_api {
......@@ -40,26 +46,115 @@ template <>
const int8_t *Tensor::data() const {
return ctensor(raw_tensor_)->data<int8_t>();
}
template <>
const int64_t *Tensor::data() const {
return ctensor(raw_tensor_)->data<int64_t>();
}
template <>
float *Tensor::mutable_data() const {
return tensor(raw_tensor_)->mutable_data<float>();
const int32_t *Tensor::data() const {
return ctensor(raw_tensor_)->data<int32_t>();
}
template <>
int8_t *Tensor::mutable_data() const {
return tensor(raw_tensor_)->mutable_data<int8_t>();
int *Tensor::mutable_data(TargetType type) const {
return tensor(raw_tensor_)->mutable_data<int>(type);
}
template <>
float *Tensor::mutable_data(TargetType type) const {
return tensor(raw_tensor_)->mutable_data<float>(type);
}
template <>
int8_t *Tensor::mutable_data(TargetType type) const {
return tensor(raw_tensor_)->mutable_data<int8_t>(type);
}
template <>
int64_t *Tensor::mutable_data(TargetType type) const {
return tensor(raw_tensor_)->mutable_data<int64_t>(type);
}
template <typename T, TargetType type>
void Tensor::CopyFromCpu(const T *src_data) {
T *data = tensor(raw_tensor_)->mutable_data<T>(type);
int64_t num = tensor(raw_tensor_)->numel();
CHECK(num > 0) << "You should call Resize interface first";
if (type == TargetType::kHost || type == TargetType::kARM) {
lite::TargetWrapperHost::MemcpySync(
data, src_data, num * sizeof(T), lite::IoDirection::HtoH);
} else if (type == TargetType::kCUDA) {
#ifdef LITE_WITH_CUDA
lite::TargetWrapperCuda::MemcpySync(
data, src_data, num * sizeof(T), lite::IoDirection::HtoD);
#else
LOG(FATAL) << "Please compile the lib with CUDA.";
#endif
} else {
LOG(FATAL) << "The CopyFromCpu interface just support kHost, kARM, kCUDA";
}
}
template <typename T>
void Tensor::CopyToCpu(T *data) {
const T *src_data = tensor(raw_tensor_)->data<T>();
int64_t num = tensor(raw_tensor_)->numel();
CHECK(num > 0) << "You should call Resize interface first";
auto type = tensor(raw_tensor_)->target();
if (type == TargetType::kHost || type == TargetType::kARM) {
lite::TargetWrapperHost::MemcpySync(
data, src_data, num * sizeof(T), lite::IoDirection::HtoH);
} else if (type == TargetType::kCUDA) {
#ifdef LITE_WITH_CUDA
lite::TargetWrapperCuda::MemcpySync(
data, src_data, num * sizeof(T), lite::IoDirection::DtoH);
#else
LOG(FATAL) << "Please compile the lib with CUDA.";
#endif
} else {
LOG(FATAL) << "The CopyToCpu interface just support kHost, kARM, kCUDA";
}
}
template void Tensor::CopyFromCpu<int, TargetType::kHost>(const int *);
template void Tensor::CopyFromCpu<float, TargetType::kHost>(const float *);
template void Tensor::CopyFromCpu<int8_t, TargetType::kHost>(const int8_t *);
template void Tensor::CopyFromCpu<int, TargetType::kARM>(const int *);
template void Tensor::CopyFromCpu<float, TargetType::kARM>(const float *);
template void Tensor::CopyFromCpu<int8_t, TargetType::kARM>(const int8_t *);
template void Tensor::CopyFromCpu<int, TargetType::kCUDA>(const int *);
template void Tensor::CopyFromCpu<float, TargetType::kCUDA>(const float *);
template void Tensor::CopyFromCpu<int8_t, TargetType::kCUDA>(const int8_t *);
template void Tensor::CopyToCpu(int8_t *);
template void Tensor::CopyToCpu(float *);
template void Tensor::CopyToCpu(int *);
shape_t Tensor::shape() const {
return ctensor(raw_tensor_)->dims().Vectorize();
}
TargetType Tensor::target() const {
auto type = ctensor(raw_tensor_)->target();
if (type == TargetType::kUnk) {
CHECK(false) << "This tensor was not initialized.";
}
return type;
}
PrecisionType Tensor::precision() const {
auto precision = ctensor(raw_tensor_)->precision();
if (precision == PrecisionType::kUnk) {
CHECK(false) << "This tensor was not initialized.";
}
return precision;
}
lod_t Tensor::lod() const { return ctensor(raw_tensor_)->lod(); }
void Tensor::SetLoD(const lod_t &lod) { tensor(raw_tensor_)->set_lod(lod); }
void PaddlePredictor::SaveOptimizedModel(const std::string &model_dir,
LiteModelType model_type) {
LiteModelType model_type,
bool record_info) {
LOG(FATAL)
<< "The SaveOptimizedModel API is only supported by CxxConfig predictor.";
}
......@@ -69,5 +164,30 @@ std::shared_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT &) {
return std::shared_ptr<PaddlePredictor>();
}
ConfigBase::ConfigBase(PowerMode mode, int threads) {
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Init();
lite::DeviceInfo::Global().SetRunMode(mode, threads);
mode_ = lite::DeviceInfo::Global().mode();
threads_ = lite::DeviceInfo::Global().threads();
#endif
}
void ConfigBase::set_power_mode(paddle::lite_api::PowerMode mode) {
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Global().SetRunMode(mode, threads_);
mode_ = lite::DeviceInfo::Global().mode();
threads_ = lite::DeviceInfo::Global().threads();
#endif
}
void ConfigBase::set_threads(int threads) {
#ifdef LITE_WITH_ARM
lite::DeviceInfo::Global().SetRunMode(mode_, threads);
mode_ = lite::DeviceInfo::Global().mode();
threads_ = lite::DeviceInfo::Global().threads();
#endif
}
} // namespace lite_api
} // namespace paddle
......@@ -43,10 +43,17 @@ struct LITE_API Tensor {
const T* data() const;
template <typename T>
T* mutable_data() const;
T* mutable_data(TargetType type = TargetType::kHost) const;
template <typename T, TargetType type = TargetType::kHost>
void CopyFromCpu(const T* data);
template <typename T>
void CopyToCpu(T* data);
/// Shape of the tensor.
shape_t shape() const;
TargetType target() const;
PrecisionType precision() const;
// LoD of the tensor
lod_t lod() const;
......@@ -71,6 +78,17 @@ class LITE_API PaddlePredictor {
virtual std::unique_ptr<const Tensor> GetOutput(int i) const = 0;
virtual void Run() = 0;
virtual std::shared_ptr<PaddlePredictor> Clone() = 0;
virtual std::string GetVersion() const = 0;
// Get input names
virtual std::vector<std::string> GetInputNames() = 0;
// Get output names
virtual std::vector<std::string> GetOutputNames() = 0;
// Get Input by name
virtual std::unique_ptr<Tensor> GetInputByName(const std::string& name) = 0;
/// Get a readonly tensor, return null if no one called `name` exists.
virtual std::unique_ptr<const Tensor> GetTensor(
......@@ -80,31 +98,43 @@ class LITE_API PaddlePredictor {
/// CxxConfig, and the persisted model can be reused for MobileConfig.
virtual void SaveOptimizedModel(
const std::string& model_dir,
LiteModelType model_type = LiteModelType::kProtobuf);
LiteModelType model_type = LiteModelType::kProtobuf,
bool record_info = false);
virtual ~PaddlePredictor() = default;
protected:
int threads_{1};
lite_api::PowerMode mode_{lite_api::LITE_POWER_NO_BIND};
};
/// Base class for all the configs.
class LITE_API ConfigBase {
std::string model_dir_;
int threads_{1};
PowerMode mode_{LITE_POWER_NO_BIND};
public:
explicit ConfigBase(PowerMode mode = LITE_POWER_NO_BIND, int threads = 1);
// set Model_dir
void set_model_dir(const std::string& x) { model_dir_ = x; }
const std::string& model_dir() const { return model_dir_; }
// set Power_mode
void set_power_mode(PowerMode mode);
PowerMode power_mode() const { return mode_; }
// set Thread
void set_threads(int threads);
int threads() const { return threads_; }
};
/// CxxConfig is the config for the Full feature predictor.
class LITE_API CxxConfig : public ConfigBase {
Place preferred_place_;
std::vector<Place> valid_places_;
std::string model_file_;
std::string param_file_;
bool model_from_memory_{false};
public:
void set_preferred_place(const Place& x) { preferred_place_ = x; }
void set_valid_places(const std::vector<Place>& x) { valid_places_ = x; }
void set_model_file(const std::string& path) { model_file_ = path; }
void set_param_file(const std::string& path) { param_file_ = path; }
......@@ -117,7 +147,6 @@ class LITE_API CxxConfig : public ConfigBase {
model_from_memory_ = true;
}
const Place& preferred_place() const { return preferred_place_; }
const std::vector<Place>& valid_places() const { return valid_places_; }
std::string model_file() const { return model_file_; }
std::string param_file() const { return param_file_; }
......@@ -127,21 +156,11 @@ class LITE_API CxxConfig : public ConfigBase {
/// MobileConfig is the config for the light weight predictor, it will skip
/// IR optimization or other unnecessary stages.
class LITE_API MobileConfig : public ConfigBase {
PowerMode mode_{LITE_POWER_HIGH};
int threads_{1};
std::string model_buffer_;
std::string param_buffer_;
bool model_from_memory_{false};
public:
MobileConfig(Place preferred_place = Place(TARGET(kARM),
PRECISION(kFloat),
DATALAYOUT(kNCHW)),
PowerMode mode = LITE_POWER_HIGH,
int threads = 1)
: mode_(mode), threads_(threads) {}
void set_power_mode(PowerMode mode) { mode_ = mode; }
void set_threads(int threads) { threads_ = threads; }
void set_model_buffer(const char* model_buffer,
size_t model_buffer_size,
const char* param_buffer,
......@@ -151,8 +170,6 @@ class LITE_API MobileConfig : public ConfigBase {
model_from_memory_ = true;
}
PowerMode power_mode() const { return mode_; }
int threads() const { return threads_; }
bool model_from_memory() const { return model_from_memory_; }
const std::string& model_buffer() const { return model_buffer_; }
const std::string& param_buffer() const { return param_buffer_; }
......
......@@ -28,7 +28,6 @@ namespace lite_api {
TEST(CxxApi, run) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_preferred_place(Place{TARGET(kX86), PRECISION(kFloat)});
config.set_valid_places({
Place{TARGET(kX86), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
......@@ -36,7 +35,18 @@ TEST(CxxApi, run) {
auto predictor = lite_api::CreatePaddlePredictor(config);
auto input_tensor = predictor->GetInput(0);
LOG(INFO) << "Version: " << predictor->GetVersion();
auto inputs = predictor->GetInputNames();
LOG(INFO) << "input size: " << inputs.size();
for (int i = 0; i < inputs.size(); i++) {
LOG(INFO) << "inputnames: " << inputs[i];
}
auto outputs = predictor->GetOutputNames();
for (int i = 0; i < outputs.size(); i++) {
LOG(INFO) << "outputnames: " << outputs[i];
}
auto input_tensor = predictor->GetInputByName(inputs[0]);
input_tensor->Resize(std::vector<int64_t>({100, 100}));
auto* data = input_tensor->mutable_data<float>();
for (int i = 0; i < 100 * 100; i++) {
......@@ -45,7 +55,7 @@ TEST(CxxApi, run) {
predictor->Run();
auto output = predictor->GetOutput(0);
auto output = predictor->GetTensor(outputs[0]);
auto* out = output->data<float>();
LOG(INFO) << out[0];
LOG(INFO) << out[1];
......@@ -54,8 +64,8 @@ TEST(CxxApi, run) {
EXPECT_NEAR(out[1], -28.8729, 1e-3);
predictor->SaveOptimizedModel(FLAGS_model_dir + ".opt2");
predictor->SaveOptimizedModel(FLAGS_model_dir + ".opt2.naive",
LiteModelType::kNaiveBuffer);
predictor->SaveOptimizedModel(
FLAGS_model_dir + ".opt2.naive", LiteModelType::kNaiveBuffer, true);
}
// Demo1 for Mobile Devices :Load model from file and run
......@@ -66,6 +76,18 @@ TEST(LightApi, run) {
auto predictor = lite_api::CreatePaddlePredictor(config);
auto inputs = predictor->GetInputNames();
LOG(INFO) << "input size: " << inputs.size();
for (int i = 0; i < inputs.size(); i++) {
LOG(INFO) << "inputnames: " << inputs.at(i);
}
auto outputs = predictor->GetOutputNames();
for (int i = 0; i < outputs.size(); i++) {
LOG(INFO) << "outputnames: " << outputs.at(i);
}
LOG(INFO) << "Version: " << predictor->GetVersion();
auto input_tensor = predictor->GetInput(0);
input_tensor->Resize(std::vector<int64_t>({100, 100}));
auto* data = input_tensor->mutable_data<float>();
......
......@@ -25,7 +25,7 @@
#define USE_LITE_KERNEL(op_type__, target__, precision__, layout__, alias__) \
extern int touch_##op_type__##target__##precision__##layout__##alias__(); \
int op_type__##target__##precision__##layout__##alias__ \
int op_type__##target__##precision__##layout__##alias__##__use_lite_kernel \
__attribute__((unused)) = \
touch_##op_type__##target__##precision__##layout__##alias__();
......
......@@ -46,8 +46,16 @@ std::string Place::DebugString() const {
}
const std::string& TargetToStr(TargetType target) {
static const std::string target2string[] = {
"unk", "host", "x86", "cuda", "arm", "opencl", "any", "fpga", "npu"};
static const std::string target2string[] = {"unk",
"host",
"x86",
"cuda",
"arm",
"opencl",
"any",
"fpga",
"npu",
"xpu"};
auto x = static_cast<int>(target);
CHECK_LT(x, static_cast<int>(TARGET(NUM)));
return target2string[x];
......@@ -84,7 +92,8 @@ const std::string& TargetRepr(TargetType target) {
"kOpenCL",
"kAny",
"kFPGA",
"kNPU"};
"kNPU",
"kXPU"};
auto x = static_cast<int>(target);
CHECK_LT(x, static_cast<int>(TARGET(NUM)));
return target2string[x];
......@@ -113,5 +122,37 @@ const std::string& DataLayoutRepr(DataLayoutType layout) {
return datalayout2string[x];
}
std::set<TargetType> ExpandValidTargets(TargetType target) {
static const std::set<TargetType> valid_set({TARGET(kX86),
TARGET(kCUDA),
TARGET(kARM),
TARGET(kOpenCL),
TARGET(kNPU),
TARGET(kXPU),
TARGET(kFPGA)});
if (target == TARGET(kAny)) {
return valid_set;
}
return std::set<TargetType>({target});
}
std::set<PrecisionType> ExpandValidPrecisions(PrecisionType precision) {
static const std::set<PrecisionType> valid_set(
{PRECISION(kFloat), PRECISION(kInt8), PRECISION(kFP16), PRECISION(kAny)});
if (precision == PRECISION(kAny)) {
return valid_set;
}
return std::set<PrecisionType>({precision});
}
std::set<DataLayoutType> ExpandValidLayouts(DataLayoutType layout) {
static const std::set<DataLayoutType> valid_set(
{DATALAYOUT(kNCHW), DATALAYOUT(kAny), DATALAYOUT(kNHWC)});
if (layout == DATALAYOUT(kAny)) {
return valid_set;
}
return std::set<DataLayoutType>({layout});
}
} // namespace lite_api
} // namespace paddle
......@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <set>
#include <string>
// Generic helper definitions for shared library support
......@@ -50,8 +51,9 @@ enum class TargetType : int {
kOpenCL = 5,
kFPGA = 7,
kNPU = 8,
kXPU = 9,
kAny = 6, // any target
NUM = 9, // number of fields.
NUM = 10, // number of fields.
};
enum class PrecisionType : int {
kUnk = 0,
......@@ -101,6 +103,8 @@ static size_t PrecisionTypeLength(PrecisionType type) {
return 1;
case PrecisionType::kInt32:
return 4;
case PrecisionType::kInt64:
return 8;
case PrecisionType::kFP16:
return 2;
default:
......@@ -124,6 +128,17 @@ const std::string& PrecisionRepr(PrecisionType precision);
const std::string& DataLayoutRepr(DataLayoutType layout);
// Get a set of all the elements represented by the target.
std::set<TargetType> ExpandValidTargets(TargetType target = TARGET(kAny));
// Get a set of all the elements represented by the precision.
std::set<PrecisionType> ExpandValidPrecisions(
PrecisionType precision = PRECISION(kAny));
// Get a set of all the elements represented by the layout.
std::set<DataLayoutType> ExpandValidLayouts(
DataLayoutType layout = DATALAYOUT(kAny));
/*
* Place specifies the execution context of a Kernel or input/output for a
* kernel. It is used to make the analysis of the MIR more clear and accurate.
......
......@@ -31,6 +31,7 @@ USE_MIR_PASS(lite_conv_bn_fuse_pass);
USE_MIR_PASS(lite_fc_fuse_pass);
USE_MIR_PASS(lite_shuffle_channel_fuse_pass);
USE_MIR_PASS(lite_transpose_softmax_transpose_fuse_pass);
USE_MIR_PASS(lite_interpolate_fuse_pass);
USE_MIR_PASS(identity_scale_eliminate_pass);
USE_MIR_PASS(lite_conv_elementwise_fuse_pass);
USE_MIR_PASS(lite_conv_activation_fuse_pass);
......@@ -38,3 +39,4 @@ USE_MIR_PASS(lite_elementwise_add_activation_fuse_pass);
USE_MIR_PASS(lite_quant_dequant_fuse_pass);
USE_MIR_PASS(type_precision_cast_pass);
USE_MIR_PASS(type_layout_cast_pass);
USE_MIR_PASS(memory_optimize_pass);
if (NOT LITE_WITH_PYTHON)
return()
endif()
add_subdirectory(pybind)
#add_subdirectory(interface)
set(PYBIND_DEPS pybind python paddle_api_light paddle_api)
if (NOT LITE_ON_TINY_PUBLISH)
set(PYBIND_DEPS ${PYBIND_DEPS} paddle_api_full)
endif()
lite_cc_library(lite_pybind SHARED SRCS pybind.cc DEPS ${PYBIND_DEPS})
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/api/python/pybind/pybind.h"
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <cstring>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include <unordered_set>
#include <utility>
#include <vector>
#ifndef LITE_ON_TINY_PUBLISH
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_passes.h"
#endif
#include "lite/api/light_api.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/core/tensor.h"
namespace py = pybind11;
namespace paddle {
namespace lite {
namespace pybind {
using lite_api::Tensor;
using lite_api::CxxConfig;
using lite_api::MobileConfig;
using lite_api::PowerMode;
using lite_api::TargetType;
using lite_api::PrecisionType;
using lite_api::DataLayoutType;
using lite_api::Place;
using lite::LightPredictorImpl;
#ifndef LITE_ON_TINY_PUBLISH
using lite::CxxPaddleApiImpl;
static void BindLiteCxxPredictor(py::module *m);
#endif
static void BindLiteLightPredictor(py::module *m);
static void BindLiteCxxConfig(py::module *m);
static void BindLiteMobileConfig(py::module *m);
static void BindLitePowerMode(py::module *m);
static void BindLitePlace(py::module *m);
static void BindLiteTensor(py::module *m);
void BindLiteApi(py::module *m) {
BindLiteCxxConfig(m);
BindLiteMobileConfig(m);
BindLitePowerMode(m);
BindLitePlace(m);
BindLiteTensor(m);
#ifndef LITE_ON_TINY_PUBLISH
BindLiteCxxPredictor(m);
#endif
BindLiteLightPredictor(m);
// Global helper methods
#ifndef LITE_ON_TINY_PUBLISH
m->def("create_paddle_predictor",
[](const CxxConfig &config) -> std::unique_ptr<CxxPaddleApiImpl> {
auto x = std::unique_ptr<CxxPaddleApiImpl>(new CxxPaddleApiImpl());
x->Init(config);
return std::move(x);
});
#endif
m->def("create_paddle_predictor",
[](const MobileConfig &config) -> std::unique_ptr<LightPredictorImpl> {
auto x =
std::unique_ptr<LightPredictorImpl>(new LightPredictorImpl());
x->Init(config);
return std::move(x);
});
}
void BindLiteCxxConfig(py::module *m) {
py::class_<CxxConfig> cxx_config(*m, "CxxConfig");
cxx_config.def(py::init<>())
.def("set_model_dir", &CxxConfig::set_model_dir)
.def("model_dir", &CxxConfig::model_dir)
.def("set_model_file", &CxxConfig::set_model_file)
.def("model_file", &CxxConfig::model_file)
.def("set_param_file", &CxxConfig::set_param_file)
.def("param_file", &CxxConfig::param_file)
.def("set_valid_places", &CxxConfig::set_valid_places)
.def("set_model_buffer", &CxxConfig::set_model_buffer)
.def("model_from_memory", &CxxConfig::model_from_memory);
#ifdef LITE_WITH_ARM
cxx_config.def("set_threads", &CxxConfig::set_threads)
.def("threads", &CxxConfig::threads)
.def("set_power_mode", &CxxConfig::set_power_mode)
.def("power_mode", &CxxConfig::power_mode);
#endif
}
// TODO(sangoly): Should MobileConfig be renamed to LightConfig ??
void BindLiteMobileConfig(py::module *m) {
py::class_<MobileConfig> mobile_config(*m, "MobileConfig");
mobile_config.def(py::init<>())
.def("set_model_dir", &MobileConfig::set_model_dir)
.def("model_dir", &MobileConfig::model_dir)
.def("set_model_buffer", &MobileConfig::set_model_buffer)
.def("model_from_memory", &MobileConfig::model_from_memory);
#ifdef LITE_WITH_ARM
mobile_config.def("set_threads", &MobileConfig::set_threads)
.def("threads", &MobileConfig::threads)
.def("set_power_mode", &MobileConfig::set_power_mode)
.def("power_mode", &MobileConfig::power_mode);
#endif
}
void BindLitePowerMode(py::module *m) {
py::enum_<PowerMode>(*m, "PowerMode")
.value("LITE_POWER_HIGH", PowerMode::LITE_POWER_HIGH)
.value("LITE_POWER_LOW", PowerMode::LITE_POWER_LOW)
.value("LITE_POWER_FULL", PowerMode::LITE_POWER_FULL)
.value("LITE_POWER_NO_BIND", PowerMode::LITE_POWER_NO_BIND)
.value("LITE_POWER_RAND_HIGH", PowerMode::LITE_POWER_RAND_HIGH)
.value("LITE_POWER_RAND_LOW", PowerMode::LITE_POWER_RAND_LOW);
}
void BindLitePlace(py::module *m) {
// TargetType
py::enum_<TargetType>(*m, "TargetType")
.value("Host", TargetType::kHost)
.value("X86", TargetType::kX86)
.value("CUDA", TargetType::kCUDA)
.value("ARM", TargetType::kARM)
.value("OpenCL", TargetType::kOpenCL)
.value("FPGA", TargetType::kFPGA)
.value("NPU", TargetType::kNPU)
.value("Any", TargetType::kAny);
// PrecisionType
py::enum_<PrecisionType>(*m, "PrecisionType")
.value("FP16", PrecisionType::kFP16)
.value("FP32", PrecisionType::kFloat)
.value("INT8", PrecisionType::kInt8)
.value("INT16", PrecisionType::kInt16)
.value("INT32", PrecisionType::kInt32)
.value("INT64", PrecisionType::kInt64)
.value("BOOL", PrecisionType::kBool)
.value("Any", PrecisionType::kAny);
// DataLayoutType
py::enum_<DataLayoutType>(*m, "DataLayoutType")
.value("NCHW", DataLayoutType::kNCHW)
.value("NHWC", DataLayoutType::kNHWC)
.value("Any", DataLayoutType::kAny);
// Place
py::class_<Place>(*m, "Place")
.def(py::init<TargetType, PrecisionType, DataLayoutType, int16_t>(),
py::arg("target"),
py::arg("percision") = PrecisionType::kFloat,
py::arg("layout") = DataLayoutType::kNCHW,
py::arg("device") = 0)
.def("is_valid", &Place::is_valid);
}
void BindLiteTensor(py::module *m) {
auto data_size_func = [](const std::vector<int64_t> &shape) -> int64_t {
int64_t res = 1;
for (size_t i = 0; i < shape.size(); i++) {
res *= shape[i];
}
return res;
};
py::class_<Tensor> tensor(*m, "Tensor");
tensor.def("resize", &Tensor::Resize)
.def("shape", &Tensor::shape)
.def("target", &Tensor::target)
.def("precision", &Tensor::precision)
.def("lod", &Tensor::lod)
.def("set_lod", &Tensor::SetLoD);
#define DO_GETTER_ONCE(data_type__, name__) \
tensor.def(#name__, [=](Tensor &self) -> std::vector<data_type__> { \
std::vector<data_type__> data; \
auto shape = self.shape(); \
int64_t num = data_size_func(shape); \
data.resize(num); \
self.CopyToCpu<data_type__>(data.data()); \
return data; \
});
#define DO_SETTER_ONCE(data_type__, name__) \
tensor.def( \
#name__, \
[](Tensor &self, \
const std::vector<data_type__> &data, \
TargetType type = TargetType::kHost) { \
if (type == TargetType::kHost || type == TargetType::kARM) { \
self.CopyFromCpu<data_type__, TargetType::kHost>(data.data()); \
} else if (type == TargetType::kCUDA) { \
self.CopyFromCpu<data_type__, TargetType::kCUDA>(data.data()); \
} \
}, \
py::arg("data"), \
py::arg("type") = TargetType::kHost);
#define DATA_GETTER_SETTER_ONCE(data_type__, name__) \
DO_SETTER_ONCE(data_type__, set_##name__##_data) \
DO_GETTER_ONCE(data_type__, name__##_data)
DATA_GETTER_SETTER_ONCE(int8_t, int8);
DATA_GETTER_SETTER_ONCE(int32_t, int32);
DATA_GETTER_SETTER_ONCE(float, float);
#undef DO_GETTER_ONCE
#undef DO_SETTER_ONCE
#undef DATA_GETTER_SETTER_ONCE
}
#ifndef LITE_ON_TINY_PUBLISH
void BindLiteCxxPredictor(py::module *m) {
py::class_<CxxPaddleApiImpl>(*m, "CxxPredictor")
.def(py::init<>())
.def("get_input", &CxxPaddleApiImpl::GetInput)
.def("get_output", &CxxPaddleApiImpl::GetOutput)
.def("run", &CxxPaddleApiImpl::Run)
.def("get_version", &CxxPaddleApiImpl::GetVersion)
.def("save_optimized_model",
[](CxxPaddleApiImpl &self, const std::string &output_dir) {
self.SaveOptimizedModel(output_dir,
lite_api::LiteModelType::kNaiveBuffer);
});
}
#endif
void BindLiteLightPredictor(py::module *m) {
py::class_<LightPredictorImpl>(*m, "LightPredictor")
.def(py::init<>())
.def("get_input", &LightPredictorImpl::GetInput)
.def("get_output", &LightPredictorImpl::GetOutput)
.def("run", &LightPredictorImpl::Run)
.def("get_version", &LightPredictorImpl::GetVersion);
}
} // namespace pybind
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Python.h>
#include <pybind11/pybind11.h>
namespace paddle {
namespace lite {
namespace pybind {
void BindLiteApi(pybind11::module *m);
PYBIND11_MODULE(lite_core, m) {
m.doc() = "C++ core of Paddle-Lite";
BindLiteApi(&m);
}
} // namespace pybind
} // namespace lite
} // namespace paddle
......@@ -28,14 +28,9 @@ namespace lite {
#ifdef LITE_WITH_ARM
TEST(ResNet18, test) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......
......@@ -26,13 +26,12 @@ namespace paddle {
namespace lite {
#ifdef LITE_WITH_ARM
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
void TestModel(const std::vector<Place>& valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
......@@ -82,22 +81,20 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(ResNet50, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
#ifdef LITE_WITH_OPENCL
TEST(ResNet50, test_opencl) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
Place{TARGET(kOpenCL), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kOpenCL), PRECISION(kFloat)}));
TestModel(valid_places);
}
#endif // LITE_WITH_OPENCL
......
......@@ -29,8 +29,7 @@ namespace lite {
TEST(ResNet50, test) {
lite::Predictor predictor;
std::vector<Place> valid_places(
{Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)},
Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNHWC)}});
{Place{TARGET(kFPGA), PRECISION(kFP16), DATALAYOUT(kNHWC)}});
predictor.Build(FLAGS_model_dir,
"",
......
......@@ -25,13 +25,12 @@
namespace paddle {
namespace lite {
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
void TestModel(const std::vector<Place>& valid_places) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim((std::vector<DDim::value_type>({1, 3, 224, 224}))));
......@@ -80,12 +79,11 @@ void TestModel(const std::vector<Place>& valid_places,
TEST(ShuffleNetV2, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
// Place{TARGET(kOpenCL), PRECISION(kFloat)},
});
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
TestModel(valid_places);
}
} // namespace lite
......
......@@ -12,56 +12,54 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/lite_api_test_helper.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
// for googlenet
DEFINE_string(model_dir, "", "");
#include "lite/api/test_helper.h"
#include "lite/utils/cp_logging.h"
namespace paddle {
namespace lite {
#ifdef LITE_WITH_X86
TEST(CXXApi, test_lite_googlenet) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
// LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<<FLAGS_test_lite_googlenet_dir;
std::string model_dir = FLAGS_model_dir;
predictor.Build(
model_dir, "", "", Place{TARGET(kX86), PRECISION(kFloat)}, valid_places);
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kX86), PRECISION(kFloat)},
lite_api::Place{TARGET(kHost), PRECISION(kFloat)}});
auto predictor = lite_api::CreatePaddlePredictor(config);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto input_tensor = predictor->GetInput(0);
std::vector<int64_t> input_shape{1, 3, 224, 224};
input_tensor->Resize(input_shape);
auto* data = input_tensor->mutable_data<float>();
for (int i = 0; i < input_tensor->dims().production(); i++) {
int input_num = 1;
for (int i = 0; i < input_shape.size(); ++i) {
input_num *= input_shape[i];
}
for (int i = 0; i < input_num; i++) {
data[i] = 1;
}
predictor.Run();
auto* out = predictor.GetOutput(0);
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor->Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor->Run();
}
LOG(INFO) << "================== Speed Report ===================";
LOG(INFO) << "Model: " << FLAGS_model_dir << ", threads num " << FLAGS_threads
<< ", warmup: " << FLAGS_warmup << ", repeats: " << FLAGS_repeats
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
auto out = predictor->GetOutput(0);
std::vector<float> results(
{0.00034298553, 0.0008200012, 0.0005046297, 0.000839279,
0.00052616704, 0.0003447803, 0.0010877076, 0.00081762316,
......@@ -71,9 +69,9 @@ TEST(CXXApi, test_lite_googlenet) {
for (size_t i = 0; i < results.size(); ++i) {
EXPECT_NEAR(out->data<float>()[i * 51], results[i], 1e-5);
}
ASSERT_EQ(out->dims().size(), 2);
ASSERT_EQ(out->dims()[0], 1);
ASSERT_EQ(out->dims()[1], 1000);
ASSERT_EQ(out->shape().size(), 2);
ASSERT_EQ(out->shape()[0], 1);
ASSERT_EQ(out->shape()[1], 1000);
}
#endif
} // namespace lite
......
......@@ -22,6 +22,13 @@
DEFINE_string(model_dir, "", "model dir");
DEFINE_int32(warmup, 0, "warmup times");
DEFINE_int32(repeats, 1, "repeats times");
DEFINE_int32(power_mode,
3,
"arm power mode: "
"0 for big cluster, "
"1 for little cluster, "
"2 for all cores, "
"3 for no bind");
DEFINE_int32(threads, 1, "threads num");
DEFINE_int32(im_width, 224, "image width");
DEFINE_int32(im_height, 224, "image height");
......
......@@ -12,70 +12,46 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/lite_api_test_helper.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/utils/cp_logging.h"
namespace paddle {
namespace lite {
TEST(InceptionV4, test_inceptionv4_lite_x86) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
// LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<<FLAGS_test_lite_googlenet_dir;
std::string model_dir = FLAGS_model_dir;
std::vector<std::string> passes({"static_kernel_pick_pass",
"variable_place_inference_pass",
"type_target_cast_pass",
"variable_place_inference_pass",
"io_copy_kernel_pick_pass",
"variable_place_inference_pass",
"runtime_context_assign_pass"});
predictor.Build(model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)},
valid_places,
passes);
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kX86), PRECISION(kFloat)},
lite_api::Place{TARGET(kHost), PRECISION(kFloat)}});
auto predictor = lite_api::CreatePaddlePredictor(config);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto input_tensor = predictor->GetInput(0);
std::vector<int64_t> input_shape{1, 3, 224, 224};
input_tensor->Resize(input_shape);
auto* data = input_tensor->mutable_data<float>();
for (int i = 0; i < input_tensor->dims().production(); i++) {
int input_num = 1;
for (int i = 0; i < input_shape.size(); ++i) {
input_num *= input_shape[i];
}
for (int i = 0; i < input_num; i++) {
data[i] = 1;
}
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
predictor->Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
predictor->Run();
}
LOG(INFO) << "================== Speed Report ===================";
......@@ -83,7 +59,6 @@ TEST(InceptionV4, test_inceptionv4_lite_x86) {
<< ", repeats: " << FLAGS_repeats << ", spend "
<< (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
std::vector<std::vector<float>> results;
// i = 1
results.emplace_back(std::vector<float>(
......@@ -93,15 +68,15 @@ TEST(InceptionV4, test_inceptionv4_lite_x86) {
0.0009782845, 0.0009230255, 0.0010548076, 0.0010974824,
0.0010612885, 0.00089107914, 0.0010112736, 0.00097655767}));
auto* out = predictor.GetOutput(0);
ASSERT_EQ(out->dims().size(), 2);
ASSERT_EQ(out->dims()[0], 1);
ASSERT_EQ(out->dims()[1], 1000);
auto out = predictor->GetOutput(0);
ASSERT_EQ(out->shape().size(), 2);
ASSERT_EQ(out->shape()[0], 1);
ASSERT_EQ(out->shape()[1], 1000);
int step = 50;
for (int i = 0; i < results.size(); ++i) {
for (int j = 0; j < results[i].size(); ++j) {
EXPECT_NEAR(out->data<float>()[j * step + (out->dims()[1] * i)],
EXPECT_NEAR(out->data<float>()[j * step + (out->shape()[1] * i)],
results[i][j],
1e-6);
}
......
......@@ -12,68 +12,46 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/lite_api_test_helper.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/utils/cp_logging.h"
namespace paddle {
namespace lite {
TEST(Mobilenet_v1, test_mobilenetv1_lite_x86) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kX86), PRECISION(kFloat)},
lite_api::Place{TARGET(kHost), PRECISION(kFloat)}});
auto predictor = lite_api::CreatePaddlePredictor(config);
std::string model_dir = FLAGS_model_dir;
std::vector<std::string> passes({"static_kernel_pick_pass",
"variable_place_inference_pass",
"type_target_cast_pass",
"variable_place_inference_pass",
"io_copy_kernel_pick_pass",
"variable_place_inference_pass",
"runtime_context_assign_pass"});
predictor.Build(model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)},
valid_places,
passes);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto input_tensor = predictor->GetInput(0);
std::vector<int64_t> input_shape{1, 3, 224, 224};
input_tensor->Resize(input_shape);
auto* data = input_tensor->mutable_data<float>();
for (int i = 0; i < input_tensor->dims().production(); i++) {
int input_num = 1;
for (int i = 0; i < input_shape.size(); ++i) {
input_num *= input_shape[i];
}
for (int i = 0; i < input_num; i++) {
data[i] = 1;
}
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
predictor->Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
predictor->Run();
}
LOG(INFO) << "================== Speed Report ===================";
......@@ -81,7 +59,6 @@ TEST(Mobilenet_v1, test_mobilenetv1_lite_x86) {
<< ", repeats: " << FLAGS_repeats << ", spend "
<< (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
std::vector<std::vector<float>> results;
// i = 1
results.emplace_back(std::vector<float>(
......@@ -90,15 +67,15 @@ TEST(Mobilenet_v1, test_mobilenetv1_lite_x86) {
0.0010323516, 0.00010079765, 0.00011006987, 0.0017364529,
0.0048292773, 0.0013995157, 0.0018453331, 0.0002428986,
0.00020211363, 0.00013668182, 0.0005855956, 0.00025901722}));
auto* out = predictor.GetOutput(0);
ASSERT_EQ(out->dims().size(), 2);
ASSERT_EQ(out->dims()[0], 1);
ASSERT_EQ(out->dims()[1], 1000);
auto out = predictor->GetOutput(0);
ASSERT_EQ(out->shape().size(), 2);
ASSERT_EQ(out->shape()[0], 1);
ASSERT_EQ(out->shape()[1], 1000);
int step = 50;
for (int i = 0; i < results.size(); ++i) {
for (int j = 0; j < results[i].size(); ++j) {
EXPECT_NEAR(out->data<float>()[j * step + (out->dims()[1] * i)],
EXPECT_NEAR(out->data<float>()[j * step + (out->shape()[1] * i)],
results[i][j],
1e-6);
}
......
......@@ -12,71 +12,47 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/lite_api_test_helper.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/utils/cp_logging.h"
// for googlenet
namespace paddle {
namespace lite {
TEST(Mobilenet_v2, test_mobilenetv2_lite_x86) {
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kX86), PRECISION(kFloat)}});
// LOG(INFO)<<"FLAGS_eval_googlenet_dir:"<<FLAGS_test_lite_googlenet_dir;
std::string model_dir = FLAGS_model_dir;
std::vector<std::string> passes({"static_kernel_pick_pass",
"variable_place_inference_pass",
"type_target_cast_pass",
"variable_place_inference_pass",
"io_copy_kernel_pick_pass",
"variable_place_inference_pass",
"runtime_context_assign_pass"});
predictor.Build(model_dir,
"",
"",
Place{TARGET(kX86), PRECISION(kFloat)},
valid_places,
passes);
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kX86), PRECISION(kFloat)},
lite_api::Place{TARGET(kHost), PRECISION(kFloat)}});
auto predictor = lite_api::CreatePaddlePredictor(config);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 224, 224})));
auto input_tensor = predictor->GetInput(0);
std::vector<int64_t> input_shape{1, 3, 224, 224};
input_tensor->Resize(input_shape);
auto* data = input_tensor->mutable_data<float>();
for (int i = 0; i < input_tensor->dims().production(); i++) {
int input_num = 1;
for (int i = 0; i < input_shape.size(); ++i) {
input_num *= input_shape[i];
}
for (int i = 0; i < input_num; i++) {
data[i] = 1;
}
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
predictor->Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
predictor->Run();
}
LOG(INFO) << "================== Speed Report ===================";
......@@ -84,7 +60,6 @@ TEST(Mobilenet_v2, test_mobilenetv2_lite_x86) {
<< ", repeats: " << FLAGS_repeats << ", spend "
<< (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
std::vector<std::vector<float>> results;
// i = 1
results.emplace_back(std::vector<float>(
......@@ -93,15 +68,15 @@ TEST(Mobilenet_v2, test_mobilenetv2_lite_x86) {
0.0009059976, 9.5378724e-05, 5.386537e-05, 0.0006427285,
0.0070957416, 0.0016094646, 0.0018807327, 0.00010506048,
6.823785e-05, 0.00012269315, 0.0007806194, 0.00022354358}));
auto* out = predictor.GetOutput(0);
ASSERT_EQ(out->dims().size(), 2);
ASSERT_EQ(out->dims()[0], 1);
ASSERT_EQ(out->dims()[1], 1000);
auto out = predictor->GetOutput(0);
ASSERT_EQ(out->shape().size(), 2);
ASSERT_EQ(out->shape()[0], 1);
ASSERT_EQ(out->shape()[1], 1000);
int step = 50;
for (int i = 0; i < results.size(); ++i) {
for (int j = 0; j < results[i].size(); ++j) {
EXPECT_NEAR(out->data<float>()[j * step + (out->dims()[1] * i)],
EXPECT_NEAR(out->data<float>()[j * step + (out->shape()[1] * i)],
results[i][j],
1e-6);
}
......
......@@ -14,59 +14,44 @@
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <fstream>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/lite_api_test_helper.h"
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
#include "lite/utils/cp_logging.h"
namespace paddle {
namespace lite {
#ifdef LITE_WITH_ARM
void TestModel(const std::vector<Place>& valid_places,
const Place& preferred_place) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
TEST(Resnet50, test_resnet50_lite_x86) {
lite_api::CxxConfig config;
config.set_model_dir(FLAGS_model_dir);
config.set_valid_places({lite_api::Place{TARGET(kX86), PRECISION(kFloat)},
lite_api::Place{TARGET(kHost), PRECISION(kFloat)}});
auto predictor = lite_api::CreatePaddlePredictor(config);
predictor.Build(FLAGS_model_dir, "", "", preferred_place, valid_places);
auto* input_image = predictor.GetInput(0);
input_image->Resize({1, 3, 1333, 800});
auto* input_image_data = input_image->mutable_data<float>();
std::ifstream read_file("/data/local/tmp/pjc/faster_rcnn_img.txt");
for (int i = 0; i < input_image->numel(); i++) {
read_file >> input_image_data[i];
auto input_tensor = predictor->GetInput(0);
std::vector<int64_t> input_shape{1, 3, 224, 224};
input_tensor->Resize(input_shape);
auto* data = input_tensor->mutable_data<float>();
int input_num = 1;
for (int i = 0; i < input_shape.size(); ++i) {
input_num *= input_shape[i];
}
for (int i = 0; i < input_num; i++) {
data[i] = 1;
}
read_file.close();
LOG(INFO) << "image data:" << input_image_data[0] << " "
<< input_image_data[input_image->numel() - 1];
auto* im_info = predictor.GetInput(1);
im_info->Resize({1, 3});
auto* im_info_data = im_info->mutable_data<float>();
im_info_data[0] = 1333;
im_info_data[1] = 800;
im_info_data[2] = 1;
auto* im_shape = predictor.GetInput(2);
im_shape->Resize({1, 3});
auto* im_shape_data = im_shape->mutable_data<float>();
im_shape_data[0] = 1333;
im_shape_data[1] = 800;
im_shape_data[2] = 1;
for (int i = 0; i < FLAGS_warmup; ++i) {
predictor.Run();
predictor->Run();
}
auto start = GetCurrentUS();
for (int i = 0; i < FLAGS_repeats; ++i) {
predictor.Run();
predictor->Run();
}
LOG(INFO) << "================== Speed Report ===================";
......@@ -75,25 +60,28 @@ void TestModel(const std::vector<Place>& valid_places,
<< ", spend " << (GetCurrentUS() - start) / FLAGS_repeats / 1000.0
<< " ms in average.";
auto* out = predictor.GetOutput(0);
auto* out_data = out->data<float>();
LOG(INFO) << "==========output data===============";
LOG(INFO) << out->dims();
for (int i = 0; i < out->numel(); i++) {
LOG(INFO) << out_data[i];
}
}
TEST(Faster_RCNN, test_arm) {
std::vector<Place> valid_places({
Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)},
});
std::vector<std::vector<float>> results;
// i = 1
results.emplace_back(std::vector<float>(
{0.00024139918, 0.00020566184, 0.00022418296, 0.00041731037,
0.0005366107, 0.00016948722, 0.00028638865, 0.0009257241,
0.00072681636, 8.531815e-05, 0.0002129998, 0.0021168243,
0.006387163, 0.0037145028, 0.0012812682, 0.00045948103,
0.00013535398, 0.0002483765, 0.00076759676, 0.0002773295}));
auto out = predictor->GetOutput(0);
ASSERT_EQ(out->shape().size(), 2);
ASSERT_EQ(out->shape()[0], 1);
ASSERT_EQ(out->shape()[1], 1000);
TestModel(valid_places, Place({TARGET(kARM), PRECISION(kFloat)}));
int step = 50;
for (int i = 0; i < results.size(); ++i) {
for (int j = 0; j < results[i].size(); ++j) {
EXPECT_NEAR(out->data<float>()[j * step + (out->shape()[1] * i)],
results[i][j],
1e-6);
}
}
}
#endif // LITE_WITH_ARM
} // namespace lite
} // namespace paddle
此差异已折叠。
此差异已折叠。
......@@ -30,14 +30,9 @@ TEST(unet, test) {
DeviceInfo::Init();
DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_HIGH, FLAGS_threads);
lite::Predictor predictor;
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)},
Place{TARGET(kARM), PRECISION(kFloat)}});
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(FLAGS_model_dir,
"",
"",
Place{TARGET(kARM), PRECISION(kFloat)},
valid_places);
predictor.Build(FLAGS_model_dir, "", "", valid_places);
auto* input_tensor = predictor.GetInput(0);
input_tensor->Resize(DDim(std::vector<DDim::value_type>({1, 3, 512, 512})));
......
add_subdirectory(opencl)
add_subdirectory(arm)
add_subdirectory(x86)
add_subdirectory(cuda)
add_subdirectory(fpga)
add_subdirectory(host)
add_subdirectory(opencl)
add_subdirectory(npu)
add_subdirectory(xpu)
......@@ -58,6 +58,17 @@ void act_exp(const T* din, T* dout, int size, int threads);
template <typename T>
void act_floor(const T* din, T* dout, int size, int threads);
template <typename T>
void act_hard_sigmoid(const T* din,
T* dout,
const int64_t size,
const float slope,
const float offset,
int threads);
template <typename T>
void act_rsqrt(const T* din, T* dout, int size, int threads);
} // namespace math
} // namespace arm
} // namespace lite
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -128,21 +128,21 @@ void conv_depthwise_3x3s2p0_bias_s_relu(float* dout,
const int w_out,
ARMContext* ctx);
void conv_depthwise_3x3p0(const float* din,
float* dout,
int num,
int ch_out,
int h_out,
int w_out,
int ch_in,
int h_in,
int w_in,
const float* weights,
const float* bias,
int stride,
bool flag_bias,
bool flag_relu,
ARMContext* ctx) {
void conv_depthwise_3x3p0_fp32(const float* din,
float* dout,
int num,
int ch_out,
int h_out,
int w_out,
int ch_in,
int h_in,
int w_in,
const float* weights,
const float* bias,
int stride,
bool flag_bias,
bool flag_relu,
ARMContext* ctx) {
if (stride == 1) {
if (flag_relu) {
if (w_in > 5) {
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册