diff --git a/CMakeLists.txt b/CMakeLists.txt index 59d6fcb07d27e1f3ab259e69d36708b775c1852a..f05e52ee447e06ba812ce5ac52e238dcebc9bbbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,9 @@ set(THIRD_PARTY_BUILD_TYPE Release) option(WITH_AVX "Compile Paddle Serving with AVX intrinsics" OFF) option(WITH_MKL "Compile Paddle Serving with MKL support." OFF) option(WITH_GPU "Compile Paddle Serving with NVIDIA GPU" OFF) +option(WITH_LITE "Compile Paddle Serving with Paddle Lite Engine" OFF) +option(WITH_XPU "Compile Paddle Serving with Baidu Kunlun" OFF) +option(WITH_PYTHON "Compile Paddle Serving with Python" ON) option(CLIENT "Compile Paddle Serving Client" OFF) option(SERVER "Compile Paddle Serving Server" OFF) option(APP "Compile Paddle Serving App package" OFF) @@ -66,40 +69,40 @@ if (NOT DEFINED WITH_MKLDNN) endif() endif() -if (SERVER) -include(external/jsoncpp) -#include(external/rocksdb) -endif() if (SERVER OR CLIENT) -include(external/snappy) -include(external/leveldb) -include(external/zlib) -include(external/boost) -include(external/protobuf) -include(external/brpc) -include(external/gflags) -include(external/glog) -include(external/pybind11) -include(external/python) -include(generic) -include(flags) + include(external/snappy) + include(external/leveldb) + include(external/zlib) + include(external/boost) + include(external/protobuf) + include(external/brpc) + include(external/gflags) + include(external/glog) + if (WITH_PYTHON) + include(external/pybind11) + include(external/python) + endif() + include(generic) + include(flags) endif() if (APP) -include(external/zlib) -include(external/boost) -include(external/protobuf) -include(external/gflags) -include(external/glog) -include(external/pybind11) -include(external/python) -include(generic) + include(external/zlib) + include(external/boost) + include(external/protobuf) + include(external/gflags) + include(external/glog) + include(external/pybind11) + include(external/python) + include(generic) endif() if (SERVER) -include(external/cudnn) -include(paddlepaddle) + include(external/jsoncpp) + #include(external/rocksdb) + include(external/cudnn) + include(paddlepaddle) endif() message("paddle serving source dir: " ${PADDLE_SERVING_SOURCE_DIR}) @@ -125,26 +128,24 @@ set(EXTERNAL_LIBS ) if(SERVER) -if(WITH_MKLML) - list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) -endif() -endif() - + if(WITH_MKLML) + list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) + endif() -if(SERVER) -if(WITH_MKLDNN) - list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) -endif() -endif() + if(WITH_MKLDNN) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) + endif() -if (SERVER) list(APPEND EXTERNAL_LIBS paddlepaddle) endif() + add_subdirectory(core) if(SERVER) -add_subdirectory(paddle_inference) + add_subdirectory(paddle_inference) endif() -add_subdirectory(python) +if (WITH_PYTHON) + add_subdirectory(python) +endif() diff --git a/cmake/external/boost.cmake b/cmake/external/boost.cmake index 117b8727f68b90c60ece896d5890d41ba04aac8e..0ab248f8c8a0bca9fa6f97f4520a5a9781c9b239 100644 --- a/cmake/external/boost.cmake +++ b/cmake/external/boost.cmake @@ -22,6 +22,7 @@ set(BOOST_PROJECT "extern_boost") # version of boost, say, 1.66.0, doesn't build on CentOS 6. We # checked that the devtools package of CentOS 6 installs boost 1.41.0. # So we use 1.41.0 here. + set(BOOST_VER "1.74.0") set(BOOST_TAR "boost_1_74_0" CACHE STRING "" FORCE) set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake index 42eae8d4512c013e5457c2aceaa93e6308a87b8e..9fe5e89cbc89edd2238653b6cf5aeda41184a8a6 100644 --- a/cmake/external/brpc.cmake +++ b/cmake/external/brpc.cmake @@ -38,13 +38,21 @@ INCLUDE_DIRECTORIES(${BRPC_INCLUDE_DIR}) # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog") +if(WITH_LITE) + set(BRPC_REPO "https://github.com/zhangjun/incubator-brpc.git") + set(BRPC_TAG "master") +else() + set(BRPC_REPO "https://github.com/wangjiawei04/brpc") + set(BRPC_TAG "6d79e0b17f25107c35b705ea58d888083f59ff47") +endif() + # If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF ExternalProject_Add( extern_brpc ${EXTERNAL_PROJECT_LOG_ARGS} # TODO(gongwb): change to de newst repo when they changed. - GIT_REPOSITORY "https://github.com/wangjiawei04/brpc" - GIT_TAG "serving-0.4.1" + GIT_REPOSITORY ${BRPC_REPO} + GIT_TAG ${BRPC_TAG} PREFIX ${BRPC_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/cmake/generic.cmake b/cmake/generic.cmake index dd2fe4dc94e7213d6ad15d37f74ab1c6d41d660a..375a1f7d219ca7de34b6362f11c9ab30e75e5304 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -93,7 +93,11 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) if(NOT APPLE) find_package(Threads REQUIRED) link_libraries(${CMAKE_THREAD_LIBS_INIT}) - set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") + if(WITH_LITE OR WITH_XPU) + set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -fopenmp -pthread -ldl -lrt") + else() + set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") + endif() endif(NOT APPLE) set_property(GLOBAL PROPERTY FLUID_MODULES "") diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index ad95b3ef6db215fddf165d0718d46037749af31f..0e202d3b06537646e489510c781cf125e87e3e07 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -39,6 +39,12 @@ if (WITH_GPU) else() SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl") endif() +elseif (WITH_LITE) + if (WITH_XPU) + SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm-xpu") + else() + SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm") + endif() else() if (WITH_AVX) if (WITH_MKLML) @@ -51,7 +57,12 @@ else() endif() endif() -SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz") +if(WITH_LITE) + SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference.tgz") +else() + SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz") +endif() + MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}") if (WITH_GPU OR WITH_MKLML) if (WITH_TRT) @@ -117,11 +128,24 @@ ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so) if (WITH_TRT) -ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so) + ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so) + + ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so) +endif() -ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so) +if (WITH_LITE) + ADD_LIBRARY(paddle_api_full_bundled STATIC IMPORTED GLOBAL) + SET_PROPERTY(TARGET paddle_api_full_bundled PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/lite/cxx/lib/libpaddle_api_full_bundled.a) + + if (WITH_XPU) + ADD_LIBRARY(xpuapi SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET xpuapi PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xpu/lib/libxpuapi.so) + + ADD_LIBRARY(xpurt SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET xpurt PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xpu/lib/libxpurt.so) + endif() endif() ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL) @@ -132,7 +156,14 @@ LIST(APPEND external_project_dependencies paddle) LIST(APPEND paddle_depend_libs xxhash) +if(WITH_LITE) + LIST(APPEND paddle_depend_libs paddle_api_full_bundled) + if(WITH_XPU) + LIST(APPEND paddle_depend_libs xpuapi xpurt) + endif() +endif() + if(WITH_TRT) -LIST(APPEND paddle_depend_libs - nvinfer nvinfer_plugin) + LIST(APPEND paddle_depend_libs + nvinfer nvinfer_plugin) endif() diff --git a/core/configure/CMakeLists.txt b/core/configure/CMakeLists.txt index 8476192dd33c8fdf2583c3c5fc48b8d3e0ba0b9e..8e2b62eb64549bbd2b60f6e744eca3245f884bac 100644 --- a/core/configure/CMakeLists.txt +++ b/core/configure/CMakeLists.txt @@ -27,6 +27,8 @@ install(FILES ${inc} DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure) endif() +if (WITH_PYTHON) + py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto) add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(general_model_config_py_proto general_model_config_py_proto_init) @@ -70,7 +72,7 @@ if (SERVER) py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto) add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(server_config_py_proto server_config_py_proto_init) -if (NOT WITH_GPU) +if (NOT WITH_GPU AND NOT WITH_LITE) add_custom_command(TARGET server_config_py_proto POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto @@ -114,3 +116,5 @@ add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() endif() + +endif() diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto index c008ee857bb7c69672e399ce44b2420d5db7fb3c..ea03d44f2cf3ff42b3b603ff9ddca7127fe8c15a 100644 --- a/core/configure/proto/server_configure.proto +++ b/core/configure/proto/server_configure.proto @@ -45,6 +45,8 @@ message EngineDesc { optional bool force_update_static_cache = 15; optional bool enable_ir_optimization = 16; optional bool use_trt = 17; + optional bool use_lite = 18; + optional bool use_xpu = 19; }; // model_toolkit conf diff --git a/core/general-server/CMakeLists.txt b/core/general-server/CMakeLists.txt index aa1b7badc9140301d84bdbd94b3324b52176e837..be6c3477551cb71c3499f6a6c713dd44600b7d58 100644 --- a/core/general-server/CMakeLists.txt +++ b/core/general-server/CMakeLists.txt @@ -6,6 +6,11 @@ add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube- if (WITH_GPU) add_dependencies(serving fluid_gpu_engine) endif() + +if (WITH_LITE) + add_dependencies(serving fluid_arm_engine) +endif() + target_include_directories(serving PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor ) @@ -15,6 +20,11 @@ if(WITH_GPU) -Wl,--no-whole-archive) endif() +if(WITH_LITE) + target_link_libraries(serving -Wl,--whole-archive fluid_arm_engine + -Wl,--no-whole-archive) +endif() + target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine -Wl,--no-whole-archive) diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index 8a6b6707b26474200b57769908e91055b5479a41..ba0c18e06c298553af10836fd488c6cffcd92226 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -38,6 +38,8 @@ class InferEngineCreationParams { _static_optimization = false; _force_update_static_cache = false; _use_trt = false; + _use_lite = false; + _use_xpu = false; } void set_path(const std::string& path) { _path = path; } @@ -52,6 +54,10 @@ class InferEngineCreationParams { void set_use_trt(bool use_trt) { _use_trt = use_trt; } + void set_use_lite(bool use_lite) { _use_lite = use_lite; } + + void set_use_xpu(bool use_xpu) { _use_xpu = use_xpu; } + bool enable_memory_optimization() const { return _enable_memory_optimization; } @@ -60,6 +66,10 @@ class InferEngineCreationParams { bool use_trt() const { return _use_trt; } + bool use_lite() const { return _use_lite; } + + bool use_xpu() const { return _use_xpu; } + void set_static_optimization(bool static_optimization = false) { _static_optimization = static_optimization; } @@ -79,6 +89,9 @@ class InferEngineCreationParams { << "model_path = " << _path << ", " << "enable_memory_optimization = " << _enable_memory_optimization << ", " + << "enable_tensorrt = " << _use_trt << ", " + << "enable_lite = " << _use_lite << ", " + << "enable_xpu = " << _use_xpu << ", " << "enable_ir_optimization = " << _enable_ir_optimization << ", " << "static_optimization = " << _static_optimization << ", " << "force_update_static_cache = " << _force_update_static_cache; @@ -91,6 +104,8 @@ class InferEngineCreationParams { bool _static_optimization; bool _force_update_static_cache; bool _use_trt; + bool _use_lite; + bool _use_xpu; }; class InferEngine { @@ -179,6 +194,14 @@ class ReloadableInferEngine : public InferEngine { _infer_engine_params.set_use_trt(conf.use_trt()); } + if (conf.has_use_lite()) { + _infer_engine_params.set_use_lite(conf.use_lite()); + } + + if (conf.has_use_xpu()) { + _infer_engine_params.set_use_xpu(conf.use_xpu()); + } + if (!check_need_reload() || load(_infer_engine_params) != 0) { LOG(ERROR) << "Failed load model_data_path" << _model_data_path; return -1; diff --git a/paddle_inference/CMakeLists.txt b/paddle_inference/CMakeLists.txt index dcc49b0c21ce97411a17f645f1de5bcad5f5dc73..4d41f87fbeffb26cf9fc0135f92499c080325e2f 100644 --- a/paddle_inference/CMakeLists.txt +++ b/paddle_inference/CMakeLists.txt @@ -13,8 +13,13 @@ # limitations under the License if (NOT CLIENT_ONLY) -add_subdirectory(inferencer-fluid-cpu) -if (WITH_GPU) -add_subdirectory(inferencer-fluid-gpu) -endif() + add_subdirectory(inferencer-fluid-cpu) + + if (WITH_GPU) + add_subdirectory(inferencer-fluid-gpu) + endif() + + if (WITH_LITE) + add_subdirectory(inferencer-fluid-arm) + endif() endif() diff --git a/paddle_inference/inferencer-fluid-arm/CMakeLists.txt b/paddle_inference/inferencer-fluid-arm/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf415d9e039e84ddef964c5a84fc79b5970ed41f --- /dev/null +++ b/paddle_inference/inferencer-fluid-arm/CMakeLists.txt @@ -0,0 +1,10 @@ +FILE(GLOB fluid_arm_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp) +add_library(fluid_arm_engine ${fluid_arm_engine_srcs}) +target_include_directories(fluid_arm_engine PUBLIC + ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/) +add_dependencies(fluid_arm_engine pdserving extern_paddle configure) +target_link_libraries(fluid_arm_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) + +install(TARGETS fluid_arm_engine + ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib + ) diff --git a/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h b/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h new file mode 100644 index 0000000000000000000000000000000000000000..92408cdacc581f7f9323840b87518df8ab8136ed --- /dev/null +++ b/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h @@ -0,0 +1,289 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include "core/configure/include/configure_parser.h" +#include "core/configure/inferencer_configure.pb.h" +#include "core/predictor/framework/infer.h" +#include "paddle_inference_api.h" // NOLINT + +namespace baidu { +namespace paddle_serving { +namespace fluid_arm { + +class AutoLock { + public: + explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) { + pthread_mutex_lock(&mutex); + } + + ~AutoLock() { pthread_mutex_unlock(&_mut); } + + private: + pthread_mutex_t& _mut; +}; + +class GlobalPaddleCreateMutex { + public: + pthread_mutex_t& mutex() { return _mut; } + + static pthread_mutex_t& instance() { + static GlobalPaddleCreateMutex gmutex; + return gmutex.mutex(); + } + + private: + GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); } + + pthread_mutex_t _mut; +}; + +using paddle_infer::Config; +using paddle_infer::Predictor; +using paddle_infer::Tensor; +using paddle_infer::PrecisionType; +using paddle_infer::CreatePredictor; + +// data interface +class FluidFamilyCore { + public: + virtual ~FluidFamilyCore() {} + virtual std::vector GetInputNames() { + return _core->GetInputNames(); + } + + virtual std::unique_ptr GetInputHandle(const std::string& name) { + return _core->GetInputHandle(name); + } + + virtual std::vector GetOutputNames() { + return _core->GetOutputNames(); + } + + virtual std::unique_ptr GetOutputHandle(const std::string& name) { + return _core->GetOutputHandle(name); + } + + virtual bool Run() { + if (!_core->Run()) { + LOG(ERROR) << "Failed call Run with paddle predictor"; + return false; + } + return true; + } + + virtual int create(const predictor::InferEngineCreationParams& params) = 0; + + virtual int clone(void* origin_core) { + if (origin_core == NULL) { + LOG(ERROR) << "origin paddle Predictor is null."; + return -1; + } + Predictor* p_predictor = (Predictor*)origin_core; + _core = p_predictor->Clone(); + if (_core.get() == NULL) { + LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; + return -1; + } + return 0; + } + + virtual void* get() { return _core.get(); } + + protected: + std::shared_ptr _core; +}; + +// infer interface +class FluidArmAnalysisCore : public FluidFamilyCore { + public: + int create(const predictor::InferEngineCreationParams& params) { + std::string data_path = params.get_path(); + if (access(data_path.c_str(), F_OK) == -1) { + LOG(ERROR) << "create paddle predictor failed, path not exits: " + << data_path; + return -1; + } + + Config config; + config.SetParamsFile(data_path + "/__params__"); + config.SetProgFile(data_path + "/__model__"); + config.DisableGpu(); + config.SetCpuMathLibraryNumThreads(1); + + if (params.enable_memory_optimization()) { + config.EnableMemoryOptim(); + } + + if (params.enable_memory_optimization()) { + config.EnableMemoryOptim(); + } + + if (params.use_lite()) { + config.EnableLiteEngine(PrecisionType::kFloat32, true); + } + + if (params.use_xpu()) { + config.EnableXpu(100); + } + + config.SwitchSpecifyInputNames(true); + AutoLock lock(GlobalPaddleCreateMutex::instance()); + _core = CreatePredictor(config); + if (NULL == _core.get()) { + LOG(ERROR) << "create paddle predictor failed, path: " << data_path; + return -1; + } + + VLOG(2) << "create paddle predictor sucess, path: " << data_path; + return 0; + } +}; + +class FluidArmAnalysisDirCore : public FluidFamilyCore { + public: + int create(const predictor::InferEngineCreationParams& params) { + std::string data_path = params.get_path(); + if (access(data_path.c_str(), F_OK) == -1) { + LOG(ERROR) << "create paddle predictor failed, path not exits: " + << data_path; + return -1; + } + + Config config; + config.SetModel(data_path); + config.DisableGpu(); + config.SwitchSpecifyInputNames(true); + config.SetCpuMathLibraryNumThreads(1); + + if (params.enable_memory_optimization()) { + config.EnableMemoryOptim(); + } + + if (params.enable_ir_optimization()) { + config.SwitchIrOptim(true); + } else { + config.SwitchIrOptim(false); + } + + if (params.use_lite()) { + config.EnableLiteEngine(PrecisionType::kFloat32, true); + } + + if (params.use_xpu()) { + config.EnableXpu(100); + } + + AutoLock lock(GlobalPaddleCreateMutex::instance()); + _core = CreatePredictor(config); + if (NULL == _core.get()) { + LOG(ERROR) << "create paddle predictor failed, path: " << data_path; + return -1; + } + + VLOG(2) << "create paddle predictor sucess, path: " << data_path; + return 0; + } +}; + +class Parameter { + public: + Parameter() : _row(0), _col(0), _params(NULL) {} + ~Parameter() { + VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]"; + destroy(); + } + + int init(int row, int col, const char* file_name) { + destroy(); + _file_name = file_name; + _row = row; + _col = col; + _params = reinterpret_cast(malloc(_row * _col * sizeof(float))); + if (_params == NULL) { + LOG(ERROR) << "Load " << _file_name << " malloc error."; + return -1; + } + VLOG(2) << "Load parameter file[" << _file_name << "] success."; + return 0; + } + + void destroy() { + _row = 0; + _col = 0; + if (_params != NULL) { + free(_params); + _params = NULL; + } + } + + int load() { + if (_params == NULL || _row <= 0 || _col <= 0) { + LOG(ERROR) << "load parameter error [not inited]."; + return -1; + } + + FILE* fs = fopen(_file_name.c_str(), "rb"); + if (fs == NULL) { + LOG(ERROR) << "load " << _file_name << " fopen error."; + return -1; + } + static const uint32_t MODEL_FILE_HEAD_LEN = 16; + char head[MODEL_FILE_HEAD_LEN] = {0}; + if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) { + destroy(); + LOG(ERROR) << "Load " << _file_name << " read head error."; + if (fs != NULL) { + fclose(fs); + fs = NULL; + } + return -1; + } + + uint32_t matrix_size = _row * _col; + if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) { + if (fs != NULL) { + fclose(fs); + fs = NULL; + } + VLOG(2) << "load " << _file_name << " read ok."; + return 0; + } else { + LOG(ERROR) << "load " << _file_name << " read error."; + destroy(); + if (fs != NULL) { + fclose(fs); + fs = NULL; + } + return -1; + } + return 0; + } + + public: + std::string _file_name; + int _row; + int _col; + float* _params; +}; + +} // namespace fluid_arm +} // namespace paddle_serving +} // namespace baidu diff --git a/paddle_inference/inferencer-fluid-arm/src/fluid_arm_engine.cpp b/paddle_inference/inferencer-fluid-arm/src/fluid_arm_engine.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2c853c63b135b14939a9938ddeec779d54484393 --- /dev/null +++ b/paddle_inference/inferencer-fluid-arm/src/fluid_arm_engine.cpp @@ -0,0 +1,35 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h" +#include "core/predictor/framework/factory.h" + +namespace baidu { +namespace paddle_serving { +namespace fluid_arm { + +REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( + ::baidu::paddle_serving::predictor::FluidInferEngine, + ::baidu::paddle_serving::predictor::InferEngine, + "FLUID_ARM_ANALYSIS"); + +REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( + ::baidu::paddle_serving::predictor::FluidInferEngine< + FluidArmAnalysisDirCore>, + ::baidu::paddle_serving::predictor::InferEngine, + "FLUID_ARM_ANALYSIS_DIR"); + +} // namespace fluid_arm +} // namespace paddle_serving +} // namespace baidu diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 23e0b6b507f53f1ab60a32854891b79b377638ce..f3762df4616e7e971772b2955954af946132329f 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -7,7 +7,7 @@ if (CLIENT) endif() if (SERVER) - if (NOT WITH_GPU) + if (NOT WITH_GPU AND NOT WITH_LITE) file(INSTALL pipeline DESTINATION paddle_serving_server) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) else() @@ -34,7 +34,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in endif() if (SERVER) - if (NOT WITH_GPU) + if (NOT WITH_GPU AND NOT WITH_LITE) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) else() @@ -72,7 +72,7 @@ add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINA endif() if (SERVER) - if(NOT WITH_GPU) + if(NOT WITH_GPU AND NOT WITH_LITE) add_custom_command( OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/ @@ -90,6 +90,16 @@ if (SERVER) COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) + elseif(WITH_LITE) + add_custom_command( + OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp + COMMAND cp -r + ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py + "server_gpu" arm + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel + DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) + add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) else() add_custom_command( OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py index c734e308f07a5e1d1ea74f430aa2ffb2e2a4244b..2a2fcabea89f2e44fad963faace696d7d0af5c93 100644 --- a/python/paddle_serving_app/local_predict.py +++ b/python/paddle_serving_app/local_predict.py @@ -57,6 +57,8 @@ class LocalPredictor(object): mem_optim=True, ir_optim=False, use_trt=False, + use_lite=False, + use_xpu=False, use_feed_fetch_ops=False): """ Load model config and set the engine config for the paddle predictor @@ -70,6 +72,8 @@ class LocalPredictor(object): mem_optim: memory optimization, True default. ir_optim: open calculation chart optimization, False default. use_trt: use nvidia TensorRT optimization, False default + use_lite: use Paddle-Lite Engint, False default + use_xpu: run predict on Baidu Kunlun, False default use_feed_fetch_ops: use feed/fetch ops, False default. """ client_config = "{}/serving_server_conf.prototxt".format(model_path) @@ -80,9 +84,9 @@ class LocalPredictor(object): config = AnalysisConfig(model_path) logger.info("load_model_config params: model_path:{}, use_gpu:{},\ gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{},\ - use_trt:{}, use_feed_fetch_ops:{}".format( + use_trt:{}, use_lite:{}, use_xpu: {}, use_feed_fetch_ops:{}".format( model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim, - ir_optim, use_trt, use_feed_fetch_ops)) + ir_optim, use_trt, use_lite, use_xpu, use_feed_fetch_ops)) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] @@ -119,6 +123,17 @@ class LocalPredictor(object): use_static=False, use_calib_mode=False) + if use_lite: + config.enable_lite_engine( + precision_mode = PrecisionType.Float32, + zero_copy = True, + passes_filter = [], + ops_filter = [] + ) + + if use_xpu: + config.enable_xpu(100 * 1024 * 1024) + self.predictor = create_paddle_predictor(config) def predict(self, feed=None, fetch=None, batch=False, log_id=0): diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index eec5d0a4a7e35bb735a776bb244a00c3a0c39d9f..13f6a61c600995be95b051d3b2691ae68e5e788e 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -77,6 +77,10 @@ def serve_args(): help="Use Multi-language-service") parser.add_argument( "--use_trt", default=False, action="store_true", help="Use TensorRT") + parser.add_argument( + "--use_lite", default=False, action="store_true", help="Use PaddleLite") + parser.add_argument( + "--use_xpu", default=False, action="store_true", help="Use XPU") parser.add_argument( "--product_name", type=str, @@ -210,6 +214,8 @@ class Server(object): self.use_local_bin = False self.gpuid = 0 self.use_trt = False + self.use_lite = False + self.use_xpu = False self.model_config_paths = None # for multi-model in a workflow self.product_name = None self.container_id = None @@ -279,6 +285,12 @@ class Server(object): def set_trt(self): self.use_trt = True + def set_lite(self): + self.use_lite = True + + def set_xpu(self): + self.use_xpu = True + def _prepare_engine(self, model_config_paths, device): if self.model_toolkit_conf == None: self.model_toolkit_conf = server_sdk.ModelToolkitConf() @@ -299,11 +311,17 @@ class Server(object): engine.static_optimization = False engine.force_update_static_cache = False engine.use_trt = self.use_trt + engine.use_lite = self.use_lite + engine.use_xpu = self.use_xpu + + if device == "cpu": engine.type = "FLUID_CPU_ANALYSIS_DIR" elif device == "gpu": engine.type = "FLUID_GPU_ANALYSIS_DIR" + elif device == "arm": + engine.type = "FLUID_ARM_ANALYSIS_DIR" self.model_toolkit_conf.engines.extend([engine]) @@ -405,10 +423,12 @@ class Server(object): for line in version_file.readlines(): if re.match("cuda_version", line): cuda_version = line.split("\"")[1] - if cuda_version != "trt": - device_version = "serving-gpu-cuda" + cuda_version + "-" - else: + if cuda_version == "trt": device_version = "serving-gpu-" + cuda_version + "-" + elif cuda_version == "arm": + device_version = "serving-" + cuda_version + "-" + else: + device_version = "serving-gpu-cuda" + cuda_version + "-" folder_name = device_version + serving_server_version tar_name = folder_name + ".tar.gz" @@ -507,36 +527,65 @@ class Server(object): time.sleep(1) else: print("Use local bin : {}".format(self.bin_path)) - self.check_cuda() - command = "{} " \ - "-enable_model_toolkit " \ - "-inferservice_path {} " \ - "-inferservice_file {} " \ - "-max_concurrency {} " \ - "-num_threads {} " \ - "-port {} " \ - "-reload_interval_s {} " \ - "-resource_path {} " \ - "-resource_file {} " \ - "-workflow_path {} " \ - "-workflow_file {} " \ - "-bthread_concurrency {} " \ - "-gpuid {} " \ - "-max_body_size {} ".format( - self.bin_path, - self.workdir, - self.infer_service_fn, - self.max_concurrency, - self.num_threads, - self.port, - self.reload_interval_s, - self.workdir, - self.resource_fn, - self.workdir, - self.workflow_fn, - self.num_threads, - self.gpuid, - self.max_body_size) + #self.check_cuda() + if self.use_lite: + command = "{} " \ + "-enable_model_toolkit " \ + "-inferservice_path {} " \ + "-inferservice_file {} " \ + "-max_concurrency {} " \ + "-num_threads {} " \ + "-port {} " \ + "-reload_interval_s {} " \ + "-resource_path {} " \ + "-resource_file {} " \ + "-workflow_path {} " \ + "-workflow_file {} " \ + "-bthread_concurrency {} " \ + "-max_body_size {} ".format( + self.bin_path, + self.workdir, + self.infer_service_fn, + self.max_concurrency, + self.num_threads, + self.port, + self.reload_interval_s, + self.workdir, + self.resource_fn, + self.workdir, + self.workflow_fn, + self.num_threads, + self.max_body_size) + else: + command = "{} " \ + "-enable_model_toolkit " \ + "-inferservice_path {} " \ + "-inferservice_file {} " \ + "-max_concurrency {} " \ + "-num_threads {} " \ + "-port {} " \ + "-reload_interval_s {} " \ + "-resource_path {} " \ + "-resource_file {} " \ + "-workflow_path {} " \ + "-workflow_file {} " \ + "-bthread_concurrency {} " \ + "-gpuid {} " \ + "-max_body_size {} ".format( + self.bin_path, + self.workdir, + self.infer_service_fn, + self.max_concurrency, + self.num_threads, + self.port, + self.reload_interval_s, + self.workdir, + self.resource_fn, + self.workdir, + self.workflow_fn, + self.num_threads, + self.gpuid, + self.max_body_size) print("Going to Run Comand") print(command) diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index c2b170fbeb3f9ee772e86c216fe3776f34187743..ffa4c2336fd4307f67fd2f3578a1aa3102850ce9 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -38,7 +38,9 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss ir_optim = args.ir_optim max_body_size = args.max_body_size use_multilang = args.use_multilang - workdir = "{}_{}".format(args.workdir, gpuid) + workdir = args.workdir + if gpuid >= 0: + workdir = "{}_{}".format(args.workdir, gpuid) if model == "": print("You must specify your serving model") @@ -67,6 +69,13 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss if args.use_trt: server.set_trt() + if args.use_lite: + server.set_lite() + device = "arm" + + if args.use_xpu: + server.set_xpu() + if args.product_name != None: server.set_product_name(args.product_name) if args.container_id != None: @@ -95,7 +104,10 @@ def start_multi_card(args): # pylint: disable=doc-string-missing exit(-1) else: env_gpus = [] - if len(gpus) <= 0: + if args.use_lite: + print("run arm server.") + start_gpu_card_model(-1, -1, args) + elif len(gpus) <= 0: print("gpu_ids not set, going to run cpu service.") start_gpu_card_model(-1, -1, args) else: @@ -128,7 +140,8 @@ if __name__ == "__main__": if len(gpu_ids) > 0: web_service.set_gpus(gpu_ids) web_service.prepare_server( - workdir=args.workdir, port=args.port, device=args.device) + workdir=args.workdir, port=args.port, device=args.device, + use_lite=args.use_lite, use_xpu=args.use_xpu, ir_optim=args.ir_optim) web_service.run_rpc_service() app_instance = Flask(__name__) diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py index 8389f92cbfda7a209ff0fe4a77497ba2db1dbe1f..4b89d90ee6893c3fafd596dc8f6c5cabc3a248bf 100644 --- a/python/paddle_serving_server_gpu/web_service.py +++ b/python/paddle_serving_server_gpu/web_service.py @@ -83,10 +83,15 @@ class WebService(object): gpuid=0, thread_num=2, mem_optim=True, + use_lite=False, + use_xpu=False, ir_optim=False): device = "gpu" if gpuid == -1: - device = "cpu" + if use_lite: + device = "arm" + else: + device = "cpu" op_maker = serving.OpMaker() read_op = op_maker.create('general_reader') general_infer_op = op_maker.create('general_infer') @@ -103,6 +108,11 @@ class WebService(object): server.set_memory_optimize(mem_optim) server.set_ir_optimize(ir_optim) + if use_lite: + server.set_lite() + if use_xpu: + server.set_xpu() + server.load_model_config(self.model_config) if gpuid >= 0: server.set_gpuid(gpuid) @@ -125,9 +135,11 @@ class WebService(object): workdir="", port=9393, device="gpu", + use_lite=False, + use_xpu=False, + ir_optim=False, gpuid=0, - mem_optim=True, - ir_optim=False): + mem_optim=True): print("This API will be deprecated later. Please do not use it") self.workdir = workdir self.port = port @@ -150,6 +162,8 @@ class WebService(object): -1, thread_num=2, mem_optim=mem_optim, + use_lite=use_lite, + use_xpu=use_xpu, ir_optim=ir_optim)) else: for i, gpuid in enumerate(self.gpus): @@ -160,6 +174,8 @@ class WebService(object): gpuid, thread_num=2, mem_optim=mem_optim, + use_lite=use_lite, + use_xpu=use_xpu, ir_optim=ir_optim)) def _launch_web_service(self): diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py index a73627b69a37325b9895fa8a3217314d0371f539..f519ca2d115128bc6a6e5778dba992bc82bda5c1 100644 --- a/python/pipeline/local_service_handler.py +++ b/python/pipeline/local_service_handler.py @@ -44,6 +44,8 @@ class LocalServiceHandler(object): ir_optim=False, available_port_generator=None, use_trt=False, + use_lite=False, + use_xpu=False, use_profile=False): """ Initialization of localservicehandler @@ -60,6 +62,8 @@ class LocalServiceHandler(object): ir_optim: use calculation chart optimization, False default. available_port_generator: generate available ports use_trt: use nvidia tensorRt engine, False default. + use_lite: use Paddle-Lite engine, False default. + use_xpu: run predict on Baidu Kunlun, False default. use_profile: use profiling, False default. Returns: @@ -74,10 +78,16 @@ class LocalServiceHandler(object): if devices == "": # cpu devices = [-1] - self._device_type = "cpu" - self._port_list.append(available_port_generator.next()) - _LOGGER.info("Model({}) will be launch in cpu device. Port({})" - .format(model_config, self._port_list)) + if use_lite: + self._device_type = "arm" + self._port_list.append(available_port_generator.next()) + _LOGGER.info("Model({}) will be launch in arm device. Port({})" + .format(model_config, self._port_list)) + else: + self._device_type = "cpu" + self._port_list.append(available_port_generator.next()) + _LOGGER.info("Model({}) will be launch in cpu device. Port({})" + .format(model_config, self._port_list)) else: # gpu self._device_type = "gpu" @@ -96,6 +106,8 @@ class LocalServiceHandler(object): self._rpc_service_list = [] self._server_pros = [] self._use_trt = use_trt + self._use_lite = use_lite + self._use_xpu = use_xpu self._use_profile = use_profile self.fetch_names_ = fetch_names @@ -138,8 +150,11 @@ class LocalServiceHandler(object): if self._local_predictor_client is None: self._local_predictor_client = LocalPredictor() use_gpu = False + use_lite = False if self._device_type == "gpu": use_gpu = True + elif self._device_type == "arm": + use_lite = True self._local_predictor_client.load_model_config( model_path=self._model_config, use_gpu=use_gpu, @@ -148,7 +163,9 @@ class LocalServiceHandler(object): thread_num=self._thread_num, mem_optim=self._mem_optim, ir_optim=self._ir_optim, - use_trt=self._use_trt) + use_trt=self._use_trt, + use_lite=use_lite, + use_xpu=self._use_xpu) return self._local_predictor_client def get_client_config(self): @@ -185,7 +202,7 @@ class LocalServiceHandler(object): server = Server() else: - #gpu + #gpu or arm from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server op_maker = OpMaker() read_op = op_maker.create('general_reader') diff --git a/python/setup.py.app.in b/python/setup.py.app.in index 8480ed8471e60c7e7eb8f14bf11a1cc2d23204cf..5cb2d137c8491e461e8b3149e8faf7c82512020a 100644 --- a/python/setup.py.app.in +++ b/python/setup.py.app.in @@ -32,7 +32,7 @@ if '${PACK}' == 'ON': REQUIRED_PACKAGES = [ - 'six >= 1.10.0', 'sentencepiece<=0.1.92', 'opencv-python<=4.2.0.32', 'pillow', + 'six >= 1.10.0', 'sentencepiece', 'opencv-python', 'pillow', 'pyclipper' ]