未验证 提交 75333c57 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #946 from zhangjun/arm-devel

[WIP]support arm and xpu
...@@ -49,6 +49,9 @@ set(THIRD_PARTY_BUILD_TYPE Release) ...@@ -49,6 +49,9 @@ set(THIRD_PARTY_BUILD_TYPE Release)
option(WITH_AVX "Compile Paddle Serving with AVX intrinsics" OFF) option(WITH_AVX "Compile Paddle Serving with AVX intrinsics" OFF)
option(WITH_MKL "Compile Paddle Serving with MKL support." OFF) option(WITH_MKL "Compile Paddle Serving with MKL support." OFF)
option(WITH_GPU "Compile Paddle Serving with NVIDIA GPU" OFF) option(WITH_GPU "Compile Paddle Serving with NVIDIA GPU" OFF)
option(WITH_LITE "Compile Paddle Serving with Paddle Lite Engine" OFF)
option(WITH_XPU "Compile Paddle Serving with Baidu Kunlun" OFF)
option(WITH_PYTHON "Compile Paddle Serving with Python" ON)
option(CLIENT "Compile Paddle Serving Client" OFF) option(CLIENT "Compile Paddle Serving Client" OFF)
option(SERVER "Compile Paddle Serving Server" OFF) option(SERVER "Compile Paddle Serving Server" OFF)
option(APP "Compile Paddle Serving App package" OFF) option(APP "Compile Paddle Serving App package" OFF)
...@@ -66,40 +69,40 @@ if (NOT DEFINED WITH_MKLDNN) ...@@ -66,40 +69,40 @@ if (NOT DEFINED WITH_MKLDNN)
endif() endif()
endif() endif()
if (SERVER)
include(external/jsoncpp)
#include(external/rocksdb)
endif()
if (SERVER OR CLIENT) if (SERVER OR CLIENT)
include(external/snappy) include(external/snappy)
include(external/leveldb) include(external/leveldb)
include(external/zlib) include(external/zlib)
include(external/boost) include(external/boost)
include(external/protobuf) include(external/protobuf)
include(external/brpc) include(external/brpc)
include(external/gflags) include(external/gflags)
include(external/glog) include(external/glog)
include(external/pybind11) if (WITH_PYTHON)
include(external/python) include(external/pybind11)
include(generic) include(external/python)
include(flags) endif()
include(generic)
include(flags)
endif() endif()
if (APP) if (APP)
include(external/zlib) include(external/zlib)
include(external/boost) include(external/boost)
include(external/protobuf) include(external/protobuf)
include(external/gflags) include(external/gflags)
include(external/glog) include(external/glog)
include(external/pybind11) include(external/pybind11)
include(external/python) include(external/python)
include(generic) include(generic)
endif() endif()
if (SERVER) if (SERVER)
include(external/cudnn) include(external/jsoncpp)
include(paddlepaddle) #include(external/rocksdb)
include(external/cudnn)
include(paddlepaddle)
endif() endif()
message("paddle serving source dir: " ${PADDLE_SERVING_SOURCE_DIR}) message("paddle serving source dir: " ${PADDLE_SERVING_SOURCE_DIR})
...@@ -125,26 +128,24 @@ set(EXTERNAL_LIBS ...@@ -125,26 +128,24 @@ set(EXTERNAL_LIBS
) )
if(SERVER) if(SERVER)
if(WITH_MKLML) if(WITH_MKLML)
list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
endif() endif()
endif()
if(SERVER) if(WITH_MKLDNN)
if(WITH_MKLDNN) list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB}) endif()
endif()
endif()
if (SERVER)
list(APPEND EXTERNAL_LIBS paddlepaddle) list(APPEND EXTERNAL_LIBS paddlepaddle)
endif() endif()
add_subdirectory(core) add_subdirectory(core)
if(SERVER) if(SERVER)
add_subdirectory(paddle_inference) add_subdirectory(paddle_inference)
endif() endif()
add_subdirectory(python) if (WITH_PYTHON)
add_subdirectory(python)
endif()
...@@ -22,6 +22,7 @@ set(BOOST_PROJECT "extern_boost") ...@@ -22,6 +22,7 @@ set(BOOST_PROJECT "extern_boost")
# version of boost, say, 1.66.0, doesn't build on CentOS 6. We # version of boost, say, 1.66.0, doesn't build on CentOS 6. We
# checked that the devtools package of CentOS 6 installs boost 1.41.0. # checked that the devtools package of CentOS 6 installs boost 1.41.0.
# So we use 1.41.0 here. # So we use 1.41.0 here.
set(BOOST_VER "1.74.0") set(BOOST_VER "1.74.0")
set(BOOST_TAR "boost_1_74_0" CACHE STRING "" FORCE) set(BOOST_TAR "boost_1_74_0" CACHE STRING "" FORCE)
set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
......
...@@ -38,13 +38,21 @@ INCLUDE_DIRECTORIES(${BRPC_INCLUDE_DIR}) ...@@ -38,13 +38,21 @@ INCLUDE_DIRECTORIES(${BRPC_INCLUDE_DIR})
# Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args
set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog") set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog")
if(WITH_LITE)
set(BRPC_REPO "https://github.com/zhangjun/incubator-brpc.git")
set(BRPC_TAG "master")
else()
set(BRPC_REPO "https://github.com/wangjiawei04/brpc")
set(BRPC_TAG "6d79e0b17f25107c35b705ea58d888083f59ff47")
endif()
# If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF # If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF
ExternalProject_Add( ExternalProject_Add(
extern_brpc extern_brpc
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
# TODO(gongwb): change to de newst repo when they changed. # TODO(gongwb): change to de newst repo when they changed.
GIT_REPOSITORY "https://github.com/wangjiawei04/brpc" GIT_REPOSITORY ${BRPC_REPO}
GIT_TAG "serving-0.4.1" GIT_TAG ${BRPC_TAG}
PREFIX ${BRPC_SOURCES_DIR} PREFIX ${BRPC_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
......
...@@ -93,7 +93,11 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) ...@@ -93,7 +93,11 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
if(NOT APPLE) if(NOT APPLE)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT}) link_libraries(${CMAKE_THREAD_LIBS_INIT})
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") if(WITH_LITE OR WITH_XPU)
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -fopenmp -pthread -ldl -lrt")
else()
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
endif()
endif(NOT APPLE) endif(NOT APPLE)
set_property(GLOBAL PROPERTY FLUID_MODULES "") set_property(GLOBAL PROPERTY FLUID_MODULES "")
......
...@@ -39,6 +39,12 @@ if (WITH_GPU) ...@@ -39,6 +39,12 @@ if (WITH_GPU)
else() else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
endif() endif()
elseif (WITH_LITE)
if (WITH_XPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm-xpu")
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm")
endif()
else() else()
if (WITH_AVX) if (WITH_AVX)
if (WITH_MKLML) if (WITH_MKLML)
...@@ -51,7 +57,12 @@ else() ...@@ -51,7 +57,12 @@ else()
endif() endif()
endif() endif()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz") if(WITH_LITE)
SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
else()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
endif()
MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}") MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}")
if (WITH_GPU OR WITH_MKLML) if (WITH_GPU OR WITH_MKLML)
if (WITH_TRT) if (WITH_TRT)
...@@ -117,11 +128,24 @@ ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL) ...@@ -117,11 +128,24 @@ ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so) SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
if (WITH_TRT) if (WITH_TRT)
ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL) ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so) SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so)
ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so)
endif()
ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL) if (WITH_LITE)
SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so) ADD_LIBRARY(paddle_api_full_bundled STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_api_full_bundled PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/lite/cxx/lib/libpaddle_api_full_bundled.a)
if (WITH_XPU)
ADD_LIBRARY(xpuapi SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET xpuapi PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xpu/lib/libxpuapi.so)
ADD_LIBRARY(xpurt SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET xpurt PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xpu/lib/libxpurt.so)
endif()
endif() endif()
ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL) ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
...@@ -132,7 +156,14 @@ LIST(APPEND external_project_dependencies paddle) ...@@ -132,7 +156,14 @@ LIST(APPEND external_project_dependencies paddle)
LIST(APPEND paddle_depend_libs LIST(APPEND paddle_depend_libs
xxhash) xxhash)
if(WITH_LITE)
LIST(APPEND paddle_depend_libs paddle_api_full_bundled)
if(WITH_XPU)
LIST(APPEND paddle_depend_libs xpuapi xpurt)
endif()
endif()
if(WITH_TRT) if(WITH_TRT)
LIST(APPEND paddle_depend_libs LIST(APPEND paddle_depend_libs
nvinfer nvinfer_plugin) nvinfer nvinfer_plugin)
endif() endif()
...@@ -27,6 +27,8 @@ install(FILES ${inc} ...@@ -27,6 +27,8 @@ install(FILES ${inc}
DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure) DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure)
endif() endif()
if (WITH_PYTHON)
py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto) py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto)
add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(general_model_config_py_proto general_model_config_py_proto_init) add_dependencies(general_model_config_py_proto general_model_config_py_proto_init)
...@@ -70,7 +72,7 @@ if (SERVER) ...@@ -70,7 +72,7 @@ if (SERVER)
py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto) py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(server_config_py_proto server_config_py_proto_init) add_dependencies(server_config_py_proto server_config_py_proto_init)
if (NOT WITH_GPU) if (NOT WITH_GPU AND NOT WITH_LITE)
add_custom_command(TARGET server_config_py_proto POST_BUILD add_custom_command(TARGET server_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
...@@ -114,3 +116,5 @@ add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD ...@@ -114,3 +116,5 @@ add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
endif() endif()
endif()
...@@ -45,6 +45,8 @@ message EngineDesc { ...@@ -45,6 +45,8 @@ message EngineDesc {
optional bool force_update_static_cache = 15; optional bool force_update_static_cache = 15;
optional bool enable_ir_optimization = 16; optional bool enable_ir_optimization = 16;
optional bool use_trt = 17; optional bool use_trt = 17;
optional bool use_lite = 18;
optional bool use_xpu = 19;
}; };
// model_toolkit conf // model_toolkit conf
......
...@@ -6,6 +6,11 @@ add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube- ...@@ -6,6 +6,11 @@ add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube-
if (WITH_GPU) if (WITH_GPU)
add_dependencies(serving fluid_gpu_engine) add_dependencies(serving fluid_gpu_engine)
endif() endif()
if (WITH_LITE)
add_dependencies(serving fluid_arm_engine)
endif()
target_include_directories(serving PUBLIC target_include_directories(serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor ${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
) )
...@@ -15,6 +20,11 @@ if(WITH_GPU) ...@@ -15,6 +20,11 @@ if(WITH_GPU)
-Wl,--no-whole-archive) -Wl,--no-whole-archive)
endif() endif()
if(WITH_LITE)
target_link_libraries(serving -Wl,--whole-archive fluid_arm_engine
-Wl,--no-whole-archive)
endif()
target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine
-Wl,--no-whole-archive) -Wl,--no-whole-archive)
......
...@@ -38,6 +38,8 @@ class InferEngineCreationParams { ...@@ -38,6 +38,8 @@ class InferEngineCreationParams {
_static_optimization = false; _static_optimization = false;
_force_update_static_cache = false; _force_update_static_cache = false;
_use_trt = false; _use_trt = false;
_use_lite = false;
_use_xpu = false;
} }
void set_path(const std::string& path) { _path = path; } void set_path(const std::string& path) { _path = path; }
...@@ -52,6 +54,10 @@ class InferEngineCreationParams { ...@@ -52,6 +54,10 @@ class InferEngineCreationParams {
void set_use_trt(bool use_trt) { _use_trt = use_trt; } void set_use_trt(bool use_trt) { _use_trt = use_trt; }
void set_use_lite(bool use_lite) { _use_lite = use_lite; }
void set_use_xpu(bool use_xpu) { _use_xpu = use_xpu; }
bool enable_memory_optimization() const { bool enable_memory_optimization() const {
return _enable_memory_optimization; return _enable_memory_optimization;
} }
...@@ -60,6 +66,10 @@ class InferEngineCreationParams { ...@@ -60,6 +66,10 @@ class InferEngineCreationParams {
bool use_trt() const { return _use_trt; } bool use_trt() const { return _use_trt; }
bool use_lite() const { return _use_lite; }
bool use_xpu() const { return _use_xpu; }
void set_static_optimization(bool static_optimization = false) { void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization; _static_optimization = static_optimization;
} }
...@@ -79,6 +89,9 @@ class InferEngineCreationParams { ...@@ -79,6 +89,9 @@ class InferEngineCreationParams {
<< "model_path = " << _path << ", " << "model_path = " << _path << ", "
<< "enable_memory_optimization = " << _enable_memory_optimization << "enable_memory_optimization = " << _enable_memory_optimization
<< ", " << ", "
<< "enable_tensorrt = " << _use_trt << ", "
<< "enable_lite = " << _use_lite << ", "
<< "enable_xpu = " << _use_xpu << ", "
<< "enable_ir_optimization = " << _enable_ir_optimization << ", " << "enable_ir_optimization = " << _enable_ir_optimization << ", "
<< "static_optimization = " << _static_optimization << ", " << "static_optimization = " << _static_optimization << ", "
<< "force_update_static_cache = " << _force_update_static_cache; << "force_update_static_cache = " << _force_update_static_cache;
...@@ -91,6 +104,8 @@ class InferEngineCreationParams { ...@@ -91,6 +104,8 @@ class InferEngineCreationParams {
bool _static_optimization; bool _static_optimization;
bool _force_update_static_cache; bool _force_update_static_cache;
bool _use_trt; bool _use_trt;
bool _use_lite;
bool _use_xpu;
}; };
class InferEngine { class InferEngine {
...@@ -179,6 +194,14 @@ class ReloadableInferEngine : public InferEngine { ...@@ -179,6 +194,14 @@ class ReloadableInferEngine : public InferEngine {
_infer_engine_params.set_use_trt(conf.use_trt()); _infer_engine_params.set_use_trt(conf.use_trt());
} }
if (conf.has_use_lite()) {
_infer_engine_params.set_use_lite(conf.use_lite());
}
if (conf.has_use_xpu()) {
_infer_engine_params.set_use_xpu(conf.use_xpu());
}
if (!check_need_reload() || load(_infer_engine_params) != 0) { if (!check_need_reload() || load(_infer_engine_params) != 0) {
LOG(ERROR) << "Failed load model_data_path" << _model_data_path; LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
return -1; return -1;
......
...@@ -13,8 +13,13 @@ ...@@ -13,8 +13,13 @@
# limitations under the License # limitations under the License
if (NOT CLIENT_ONLY) if (NOT CLIENT_ONLY)
add_subdirectory(inferencer-fluid-cpu) add_subdirectory(inferencer-fluid-cpu)
if (WITH_GPU)
add_subdirectory(inferencer-fluid-gpu) if (WITH_GPU)
endif() add_subdirectory(inferencer-fluid-gpu)
endif()
if (WITH_LITE)
add_subdirectory(inferencer-fluid-arm)
endif()
endif() endif()
FILE(GLOB fluid_arm_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_arm_engine ${fluid_arm_engine_srcs})
target_include_directories(fluid_arm_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_arm_engine pdserving extern_paddle configure)
target_link_libraries(fluid_arm_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS fluid_arm_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pthread.h>
#include <fstream>
#include <map>
#include <string>
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace fluid_arm {
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
private:
pthread_mutex_t& _mut;
};
class GlobalPaddleCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::PrecisionType;
using paddle_infer::CreatePredictor;
// data interface
class FluidFamilyCore {
public:
virtual ~FluidFamilyCore() {}
virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames();
}
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name);
}
virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames();
}
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
Predictor* p_predictor = (Predictor*)origin_core;
_core = p_predictor->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
return -1;
}
return 0;
}
virtual void* get() { return _core.get(); }
protected:
std::shared_ptr<Predictor> _core;
};
// infer interface
class FluidArmAnalysisCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetParamsFile(data_path + "/__params__");
config.SetProgFile(data_path + "/__model__");
config.DisableGpu();
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
config.EnableXpu(100);
}
config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidArmAnalysisDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetModel(data_path);
config.DisableGpu();
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
config.SwitchIrOptim(false);
}
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
config.EnableXpu(100);
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class Parameter {
public:
Parameter() : _row(0), _col(0), _params(NULL) {}
~Parameter() {
VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]";
destroy();
}
int init(int row, int col, const char* file_name) {
destroy();
_file_name = file_name;
_row = row;
_col = col;
_params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
if (_params == NULL) {
LOG(ERROR) << "Load " << _file_name << " malloc error.";
return -1;
}
VLOG(2) << "Load parameter file[" << _file_name << "] success.";
return 0;
}
void destroy() {
_row = 0;
_col = 0;
if (_params != NULL) {
free(_params);
_params = NULL;
}
}
int load() {
if (_params == NULL || _row <= 0 || _col <= 0) {
LOG(ERROR) << "load parameter error [not inited].";
return -1;
}
FILE* fs = fopen(_file_name.c_str(), "rb");
if (fs == NULL) {
LOG(ERROR) << "load " << _file_name << " fopen error.";
return -1;
}
static const uint32_t MODEL_FILE_HEAD_LEN = 16;
char head[MODEL_FILE_HEAD_LEN] = {0};
if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
destroy();
LOG(ERROR) << "Load " << _file_name << " read head error.";
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
uint32_t matrix_size = _row * _col;
if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
VLOG(2) << "load " << _file_name << " read ok.";
return 0;
} else {
LOG(ERROR) << "load " << _file_name << " read error.";
destroy();
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
return 0;
}
public:
std::string _file_name;
int _row;
int _col;
float* _params;
};
} // namespace fluid_arm
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h"
#include "core/predictor/framework/factory.h"
namespace baidu {
namespace paddle_serving {
namespace fluid_arm {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_ANALYSIS");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidArmAnalysisDirCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_ANALYSIS_DIR");
} // namespace fluid_arm
} // namespace paddle_serving
} // namespace baidu
...@@ -7,7 +7,7 @@ if (CLIENT) ...@@ -7,7 +7,7 @@ if (CLIENT)
endif() endif()
if (SERVER) if (SERVER)
if (NOT WITH_GPU) if (NOT WITH_GPU AND NOT WITH_LITE)
file(INSTALL pipeline DESTINATION paddle_serving_server) file(INSTALL pipeline DESTINATION paddle_serving_server)
file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
else() else()
...@@ -34,7 +34,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in ...@@ -34,7 +34,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in
endif() endif()
if (SERVER) if (SERVER)
if (NOT WITH_GPU) if (NOT WITH_GPU AND NOT WITH_LITE)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py) ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
else() else()
...@@ -72,7 +72,7 @@ add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINA ...@@ -72,7 +72,7 @@ add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINA
endif() endif()
if (SERVER) if (SERVER)
if(NOT WITH_GPU) if(NOT WITH_GPU AND NOT WITH_LITE)
add_custom_command( add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/ COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/
...@@ -90,6 +90,16 @@ if (SERVER) ...@@ -90,6 +90,16 @@ if (SERVER)
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
elseif(WITH_LITE)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" arm
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
else() else()
add_custom_command( add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
......
...@@ -57,6 +57,8 @@ class LocalPredictor(object): ...@@ -57,6 +57,8 @@ class LocalPredictor(object):
mem_optim=True, mem_optim=True,
ir_optim=False, ir_optim=False,
use_trt=False, use_trt=False,
use_lite=False,
use_xpu=False,
use_feed_fetch_ops=False): use_feed_fetch_ops=False):
""" """
Load model config and set the engine config for the paddle predictor Load model config and set the engine config for the paddle predictor
...@@ -70,6 +72,8 @@ class LocalPredictor(object): ...@@ -70,6 +72,8 @@ class LocalPredictor(object):
mem_optim: memory optimization, True default. mem_optim: memory optimization, True default.
ir_optim: open calculation chart optimization, False default. ir_optim: open calculation chart optimization, False default.
use_trt: use nvidia TensorRT optimization, False default use_trt: use nvidia TensorRT optimization, False default
use_lite: use Paddle-Lite Engint, False default
use_xpu: run predict on Baidu Kunlun, False default
use_feed_fetch_ops: use feed/fetch ops, False default. use_feed_fetch_ops: use feed/fetch ops, False default.
""" """
client_config = "{}/serving_server_conf.prototxt".format(model_path) client_config = "{}/serving_server_conf.prototxt".format(model_path)
...@@ -80,9 +84,9 @@ class LocalPredictor(object): ...@@ -80,9 +84,9 @@ class LocalPredictor(object):
config = AnalysisConfig(model_path) config = AnalysisConfig(model_path)
logger.info("load_model_config params: model_path:{}, use_gpu:{},\ logger.info("load_model_config params: model_path:{}, use_gpu:{},\
gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{},\ gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{},\
use_trt:{}, use_feed_fetch_ops:{}".format( use_trt:{}, use_lite:{}, use_xpu: {}, use_feed_fetch_ops:{}".format(
model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim, model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim,
ir_optim, use_trt, use_feed_fetch_ops)) ir_optim, use_trt, use_lite, use_xpu, use_feed_fetch_ops))
self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
...@@ -119,6 +123,17 @@ class LocalPredictor(object): ...@@ -119,6 +123,17 @@ class LocalPredictor(object):
use_static=False, use_static=False,
use_calib_mode=False) use_calib_mode=False)
if use_lite:
config.enable_lite_engine(
precision_mode = PrecisionType.Float32,
zero_copy = True,
passes_filter = [],
ops_filter = []
)
if use_xpu:
config.enable_xpu(100 * 1024 * 1024)
self.predictor = create_paddle_predictor(config) self.predictor = create_paddle_predictor(config)
def predict(self, feed=None, fetch=None, batch=False, log_id=0): def predict(self, feed=None, fetch=None, batch=False, log_id=0):
......
...@@ -77,6 +77,10 @@ def serve_args(): ...@@ -77,6 +77,10 @@ def serve_args():
help="Use Multi-language-service") help="Use Multi-language-service")
parser.add_argument( parser.add_argument(
"--use_trt", default=False, action="store_true", help="Use TensorRT") "--use_trt", default=False, action="store_true", help="Use TensorRT")
parser.add_argument(
"--use_lite", default=False, action="store_true", help="Use PaddleLite")
parser.add_argument(
"--use_xpu", default=False, action="store_true", help="Use XPU")
parser.add_argument( parser.add_argument(
"--product_name", "--product_name",
type=str, type=str,
...@@ -210,6 +214,8 @@ class Server(object): ...@@ -210,6 +214,8 @@ class Server(object):
self.use_local_bin = False self.use_local_bin = False
self.gpuid = 0 self.gpuid = 0
self.use_trt = False self.use_trt = False
self.use_lite = False
self.use_xpu = False
self.model_config_paths = None # for multi-model in a workflow self.model_config_paths = None # for multi-model in a workflow
self.product_name = None self.product_name = None
self.container_id = None self.container_id = None
...@@ -279,6 +285,12 @@ class Server(object): ...@@ -279,6 +285,12 @@ class Server(object):
def set_trt(self): def set_trt(self):
self.use_trt = True self.use_trt = True
def set_lite(self):
self.use_lite = True
def set_xpu(self):
self.use_xpu = True
def _prepare_engine(self, model_config_paths, device): def _prepare_engine(self, model_config_paths, device):
if self.model_toolkit_conf == None: if self.model_toolkit_conf == None:
self.model_toolkit_conf = server_sdk.ModelToolkitConf() self.model_toolkit_conf = server_sdk.ModelToolkitConf()
...@@ -299,11 +311,17 @@ class Server(object): ...@@ -299,11 +311,17 @@ class Server(object):
engine.static_optimization = False engine.static_optimization = False
engine.force_update_static_cache = False engine.force_update_static_cache = False
engine.use_trt = self.use_trt engine.use_trt = self.use_trt
engine.use_lite = self.use_lite
engine.use_xpu = self.use_xpu
if device == "cpu": if device == "cpu":
engine.type = "FLUID_CPU_ANALYSIS_DIR" engine.type = "FLUID_CPU_ANALYSIS_DIR"
elif device == "gpu": elif device == "gpu":
engine.type = "FLUID_GPU_ANALYSIS_DIR" engine.type = "FLUID_GPU_ANALYSIS_DIR"
elif device == "arm":
engine.type = "FLUID_ARM_ANALYSIS_DIR"
self.model_toolkit_conf.engines.extend([engine]) self.model_toolkit_conf.engines.extend([engine])
...@@ -405,10 +423,12 @@ class Server(object): ...@@ -405,10 +423,12 @@ class Server(object):
for line in version_file.readlines(): for line in version_file.readlines():
if re.match("cuda_version", line): if re.match("cuda_version", line):
cuda_version = line.split("\"")[1] cuda_version = line.split("\"")[1]
if cuda_version != "trt": if cuda_version == "trt":
device_version = "serving-gpu-cuda" + cuda_version + "-"
else:
device_version = "serving-gpu-" + cuda_version + "-" device_version = "serving-gpu-" + cuda_version + "-"
elif cuda_version == "arm":
device_version = "serving-" + cuda_version + "-"
else:
device_version = "serving-gpu-cuda" + cuda_version + "-"
folder_name = device_version + serving_server_version folder_name = device_version + serving_server_version
tar_name = folder_name + ".tar.gz" tar_name = folder_name + ".tar.gz"
...@@ -507,36 +527,65 @@ class Server(object): ...@@ -507,36 +527,65 @@ class Server(object):
time.sleep(1) time.sleep(1)
else: else:
print("Use local bin : {}".format(self.bin_path)) print("Use local bin : {}".format(self.bin_path))
self.check_cuda() #self.check_cuda()
command = "{} " \ if self.use_lite:
"-enable_model_toolkit " \ command = "{} " \
"-inferservice_path {} " \ "-enable_model_toolkit " \
"-inferservice_file {} " \ "-inferservice_path {} " \
"-max_concurrency {} " \ "-inferservice_file {} " \
"-num_threads {} " \ "-max_concurrency {} " \
"-port {} " \ "-num_threads {} " \
"-reload_interval_s {} " \ "-port {} " \
"-resource_path {} " \ "-reload_interval_s {} " \
"-resource_file {} " \ "-resource_path {} " \
"-workflow_path {} " \ "-resource_file {} " \
"-workflow_file {} " \ "-workflow_path {} " \
"-bthread_concurrency {} " \ "-workflow_file {} " \
"-gpuid {} " \ "-bthread_concurrency {} " \
"-max_body_size {} ".format( "-max_body_size {} ".format(
self.bin_path, self.bin_path,
self.workdir, self.workdir,
self.infer_service_fn, self.infer_service_fn,
self.max_concurrency, self.max_concurrency,
self.num_threads, self.num_threads,
self.port, self.port,
self.reload_interval_s, self.reload_interval_s,
self.workdir, self.workdir,
self.resource_fn, self.resource_fn,
self.workdir, self.workdir,
self.workflow_fn, self.workflow_fn,
self.num_threads, self.num_threads,
self.gpuid, self.max_body_size)
self.max_body_size) else:
command = "{} " \
"-enable_model_toolkit " \
"-inferservice_path {} " \
"-inferservice_file {} " \
"-max_concurrency {} " \
"-num_threads {} " \
"-port {} " \
"-reload_interval_s {} " \
"-resource_path {} " \
"-resource_file {} " \
"-workflow_path {} " \
"-workflow_file {} " \
"-bthread_concurrency {} " \
"-gpuid {} " \
"-max_body_size {} ".format(
self.bin_path,
self.workdir,
self.infer_service_fn,
self.max_concurrency,
self.num_threads,
self.port,
self.reload_interval_s,
self.workdir,
self.resource_fn,
self.workdir,
self.workflow_fn,
self.num_threads,
self.gpuid,
self.max_body_size)
print("Going to Run Comand") print("Going to Run Comand")
print(command) print(command)
......
...@@ -38,7 +38,9 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss ...@@ -38,7 +38,9 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
ir_optim = args.ir_optim ir_optim = args.ir_optim
max_body_size = args.max_body_size max_body_size = args.max_body_size
use_multilang = args.use_multilang use_multilang = args.use_multilang
workdir = "{}_{}".format(args.workdir, gpuid) workdir = args.workdir
if gpuid >= 0:
workdir = "{}_{}".format(args.workdir, gpuid)
if model == "": if model == "":
print("You must specify your serving model") print("You must specify your serving model")
...@@ -67,6 +69,13 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss ...@@ -67,6 +69,13 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
if args.use_trt: if args.use_trt:
server.set_trt() server.set_trt()
if args.use_lite:
server.set_lite()
device = "arm"
if args.use_xpu:
server.set_xpu()
if args.product_name != None: if args.product_name != None:
server.set_product_name(args.product_name) server.set_product_name(args.product_name)
if args.container_id != None: if args.container_id != None:
...@@ -95,7 +104,10 @@ def start_multi_card(args): # pylint: disable=doc-string-missing ...@@ -95,7 +104,10 @@ def start_multi_card(args): # pylint: disable=doc-string-missing
exit(-1) exit(-1)
else: else:
env_gpus = [] env_gpus = []
if len(gpus) <= 0: if args.use_lite:
print("run arm server.")
start_gpu_card_model(-1, -1, args)
elif len(gpus) <= 0:
print("gpu_ids not set, going to run cpu service.") print("gpu_ids not set, going to run cpu service.")
start_gpu_card_model(-1, -1, args) start_gpu_card_model(-1, -1, args)
else: else:
...@@ -128,7 +140,8 @@ if __name__ == "__main__": ...@@ -128,7 +140,8 @@ if __name__ == "__main__":
if len(gpu_ids) > 0: if len(gpu_ids) > 0:
web_service.set_gpus(gpu_ids) web_service.set_gpus(gpu_ids)
web_service.prepare_server( web_service.prepare_server(
workdir=args.workdir, port=args.port, device=args.device) workdir=args.workdir, port=args.port, device=args.device,
use_lite=args.use_lite, use_xpu=args.use_xpu, ir_optim=args.ir_optim)
web_service.run_rpc_service() web_service.run_rpc_service()
app_instance = Flask(__name__) app_instance = Flask(__name__)
......
...@@ -83,10 +83,15 @@ class WebService(object): ...@@ -83,10 +83,15 @@ class WebService(object):
gpuid=0, gpuid=0,
thread_num=2, thread_num=2,
mem_optim=True, mem_optim=True,
use_lite=False,
use_xpu=False,
ir_optim=False): ir_optim=False):
device = "gpu" device = "gpu"
if gpuid == -1: if gpuid == -1:
device = "cpu" if use_lite:
device = "arm"
else:
device = "cpu"
op_maker = serving.OpMaker() op_maker = serving.OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer') general_infer_op = op_maker.create('general_infer')
...@@ -103,6 +108,11 @@ class WebService(object): ...@@ -103,6 +108,11 @@ class WebService(object):
server.set_memory_optimize(mem_optim) server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim) server.set_ir_optimize(ir_optim)
if use_lite:
server.set_lite()
if use_xpu:
server.set_xpu()
server.load_model_config(self.model_config) server.load_model_config(self.model_config)
if gpuid >= 0: if gpuid >= 0:
server.set_gpuid(gpuid) server.set_gpuid(gpuid)
...@@ -125,9 +135,11 @@ class WebService(object): ...@@ -125,9 +135,11 @@ class WebService(object):
workdir="", workdir="",
port=9393, port=9393,
device="gpu", device="gpu",
use_lite=False,
use_xpu=False,
ir_optim=False,
gpuid=0, gpuid=0,
mem_optim=True, mem_optim=True):
ir_optim=False):
print("This API will be deprecated later. Please do not use it") print("This API will be deprecated later. Please do not use it")
self.workdir = workdir self.workdir = workdir
self.port = port self.port = port
...@@ -150,6 +162,8 @@ class WebService(object): ...@@ -150,6 +162,8 @@ class WebService(object):
-1, -1,
thread_num=2, thread_num=2,
mem_optim=mem_optim, mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
ir_optim=ir_optim)) ir_optim=ir_optim))
else: else:
for i, gpuid in enumerate(self.gpus): for i, gpuid in enumerate(self.gpus):
...@@ -160,6 +174,8 @@ class WebService(object): ...@@ -160,6 +174,8 @@ class WebService(object):
gpuid, gpuid,
thread_num=2, thread_num=2,
mem_optim=mem_optim, mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
ir_optim=ir_optim)) ir_optim=ir_optim))
def _launch_web_service(self): def _launch_web_service(self):
......
...@@ -44,6 +44,8 @@ class LocalServiceHandler(object): ...@@ -44,6 +44,8 @@ class LocalServiceHandler(object):
ir_optim=False, ir_optim=False,
available_port_generator=None, available_port_generator=None,
use_trt=False, use_trt=False,
use_lite=False,
use_xpu=False,
use_profile=False): use_profile=False):
""" """
Initialization of localservicehandler Initialization of localservicehandler
...@@ -60,6 +62,8 @@ class LocalServiceHandler(object): ...@@ -60,6 +62,8 @@ class LocalServiceHandler(object):
ir_optim: use calculation chart optimization, False default. ir_optim: use calculation chart optimization, False default.
available_port_generator: generate available ports available_port_generator: generate available ports
use_trt: use nvidia tensorRt engine, False default. use_trt: use nvidia tensorRt engine, False default.
use_lite: use Paddle-Lite engine, False default.
use_xpu: run predict on Baidu Kunlun, False default.
use_profile: use profiling, False default. use_profile: use profiling, False default.
Returns: Returns:
...@@ -74,10 +78,16 @@ class LocalServiceHandler(object): ...@@ -74,10 +78,16 @@ class LocalServiceHandler(object):
if devices == "": if devices == "":
# cpu # cpu
devices = [-1] devices = [-1]
self._device_type = "cpu" if use_lite:
self._port_list.append(available_port_generator.next()) self._device_type = "arm"
_LOGGER.info("Model({}) will be launch in cpu device. Port({})" self._port_list.append(available_port_generator.next())
.format(model_config, self._port_list)) _LOGGER.info("Model({}) will be launch in arm device. Port({})"
.format(model_config, self._port_list))
else:
self._device_type = "cpu"
self._port_list.append(available_port_generator.next())
_LOGGER.info("Model({}) will be launch in cpu device. Port({})"
.format(model_config, self._port_list))
else: else:
# gpu # gpu
self._device_type = "gpu" self._device_type = "gpu"
...@@ -96,6 +106,8 @@ class LocalServiceHandler(object): ...@@ -96,6 +106,8 @@ class LocalServiceHandler(object):
self._rpc_service_list = [] self._rpc_service_list = []
self._server_pros = [] self._server_pros = []
self._use_trt = use_trt self._use_trt = use_trt
self._use_lite = use_lite
self._use_xpu = use_xpu
self._use_profile = use_profile self._use_profile = use_profile
self.fetch_names_ = fetch_names self.fetch_names_ = fetch_names
...@@ -138,8 +150,11 @@ class LocalServiceHandler(object): ...@@ -138,8 +150,11 @@ class LocalServiceHandler(object):
if self._local_predictor_client is None: if self._local_predictor_client is None:
self._local_predictor_client = LocalPredictor() self._local_predictor_client = LocalPredictor()
use_gpu = False use_gpu = False
use_lite = False
if self._device_type == "gpu": if self._device_type == "gpu":
use_gpu = True use_gpu = True
elif self._device_type == "arm":
use_lite = True
self._local_predictor_client.load_model_config( self._local_predictor_client.load_model_config(
model_path=self._model_config, model_path=self._model_config,
use_gpu=use_gpu, use_gpu=use_gpu,
...@@ -148,7 +163,9 @@ class LocalServiceHandler(object): ...@@ -148,7 +163,9 @@ class LocalServiceHandler(object):
thread_num=self._thread_num, thread_num=self._thread_num,
mem_optim=self._mem_optim, mem_optim=self._mem_optim,
ir_optim=self._ir_optim, ir_optim=self._ir_optim,
use_trt=self._use_trt) use_trt=self._use_trt,
use_lite=use_lite,
use_xpu=self._use_xpu)
return self._local_predictor_client return self._local_predictor_client
def get_client_config(self): def get_client_config(self):
...@@ -185,7 +202,7 @@ class LocalServiceHandler(object): ...@@ -185,7 +202,7 @@ class LocalServiceHandler(object):
server = Server() server = Server()
else: else:
#gpu #gpu or arm
from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
op_maker = OpMaker() op_maker = OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
......
...@@ -32,7 +32,7 @@ if '${PACK}' == 'ON': ...@@ -32,7 +32,7 @@ if '${PACK}' == 'ON':
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'sentencepiece<=0.1.92', 'opencv-python<=4.2.0.32', 'pillow', 'six >= 1.10.0', 'sentencepiece', 'opencv-python', 'pillow',
'pyclipper' 'pyclipper'
] ]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册