未验证 提交 adccfda1 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #1080 from zhangjun/infer

opt c++ inference code, remove redundant code
......@@ -18,7 +18,7 @@ SET(PADDLE_SOURCES_DIR ${THIRD_PARTY_PATH}/Paddle)
SET(PADDLE_DOWNLOAD_DIR ${PADDLE_SOURCES_DIR}/src/extern_paddle)
SET(PADDLE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/Paddle/)
SET(PADDLE_INCLUDE_DIR "${PADDLE_INSTALL_DIR}/include" CACHE PATH "PaddlePaddle include directory." FORCE)
SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a" CACHE FILEPATH "Paddle library." FORCE)
SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a" CACHE FILEPATH "Paddle library." FORCE)
message("paddle install dir: " ${PADDLE_INSTALL_DIR})
......@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of:
# latest: latest develop build
# version number like 1.5.2
SET(PADDLE_VERSION "2.0.0")
SET(PADDLE_VERSION "2.0.1")
if (WITH_GPU)
if(CUDA_VERSION EQUAL 11.0)
set(CUDA_SUFFIX "cuda11-cudnn8-avx-mkl")
......@@ -55,9 +55,9 @@ if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-${CUDA_SUFFIX}")
elseif (WITH_LITE)
if (WITH_XPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm-xpu")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}-xpu")
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}")
endif()
else()
if (WITH_AVX)
......@@ -139,8 +139,8 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)
ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a)
if (WITH_TRT)
ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
......
......@@ -20,7 +20,7 @@ message EngineDesc {
required string type = 2;
required string reloadable_meta = 3;
required string reloadable_type = 4;
required string model_data_path = 5;
required string model_dir = 5;
required int32 runtime_thread_num = 6;
required int32 batch_infer_size = 7;
required int32 enable_batch_align = 8;
......@@ -41,12 +41,13 @@ message EngineDesc {
optional SparseParamServiceType sparse_param_service_type = 11;
optional string sparse_param_service_table_name = 12;
optional bool enable_memory_optimization = 13;
optional bool static_optimization = 14;
optional bool force_update_static_cache = 15;
optional bool enable_ir_optimization = 16;
optional bool use_trt = 17;
optional bool use_lite = 18;
optional bool use_xpu = 19;
optional bool enable_ir_optimization = 14;
optional bool use_trt = 15;
optional bool use_lite = 16;
optional bool use_xpu = 17;
optional bool use_gpu = 18;
optional bool combined_model = 19;
optional bool encrypted_model = 20;
};
// model_toolkit conf
......
......@@ -69,8 +69,6 @@ int test_write_conf() {
engine->set_sparse_param_service_type(EngineDesc::LOCAL);
engine->set_sparse_param_service_table_name("local_kv");
engine->set_enable_memory_optimization(true);
engine->set_static_optimization(false);
engine->set_force_update_static_cache(false);
int ret = baidu::paddle_serving::configure::write_proto_conf(
&model_toolkit_conf, output_dir, model_toolkit_conf_file);
......
......@@ -2,33 +2,25 @@ include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../../)
include(op/CMakeLists.txt)
include(proto/CMakeLists.txt)
add_executable(serving ${serving_srcs})
add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube-api utils)
add_dependencies(serving pdcodegen paddle_inference_engine pdserving paddle_inference cube-api utils)
if (WITH_GPU)
add_dependencies(serving fluid_gpu_engine)
add_dependencies(serving paddle_inference_engine)
endif()
if (WITH_LITE)
add_dependencies(serving fluid_arm_engine)
add_dependencies(serving paddle_inference_engine)
endif()
target_include_directories(serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
)
include_directories(${CUDNN_ROOT}/include/)
if(WITH_GPU)
target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
-Wl,--no-whole-archive)
endif()
if(WITH_LITE)
target_link_libraries(serving -Wl,--whole-archive fluid_arm_engine
-Wl,--no-whole-archive)
endif()
)
include_directories(${CUDNN_ROOT}/include/)
target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine
target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
-Wl,--no-whole-archive)
target_link_libraries(serving paddle_fluid ${paddle_depend_libs})
target_link_libraries(serving paddle_inference ${paddle_depend_libs})
target_link_libraries(serving brpc)
target_link_libraries(serving protobuf)
target_link_libraries(serving pdserving)
......
......@@ -12,12 +12,12 @@ set_source_files_properties(
${pdserving_srcs}
PROPERTIES
COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure extern_paddle paddle_fluid)
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure extern_paddle paddle_inference)
if (WITH_TRT)
add_definitions(-DWITH_TRT)
endif()
target_link_libraries(pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_fluid ${paddle_depend_libs})
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_inference ${paddle_depend_libs})
# install
install(TARGETS pdserving
RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin
......
......@@ -14,6 +14,7 @@
#pragma once
#include <string>
#include <fstream>
#include "core/predictor/common/inner_common.h"
#include "core/predictor/common/macros.h"
......@@ -148,6 +149,16 @@ class IsDerivedFrom {
}
};
static void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
} // namespace predictor
} // namespace paddle_serving
} // namespace baidu
......@@ -16,6 +16,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <pthread.h>
#include <string>
#include <utility>
#include <vector>
......@@ -29,83 +30,29 @@ namespace predictor {
using configure::ModelToolkitConf;
class InferEngineCreationParams {
class AutoLock {
public:
InferEngineCreationParams() {
_path = "";
_enable_memory_optimization = false;
_enable_ir_optimization = false;
_static_optimization = false;
_force_update_static_cache = false;
_use_trt = false;
_use_lite = false;
_use_xpu = false;
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
void set_path(const std::string& path) { _path = path; }
void set_enable_memory_optimization(bool enable_memory_optimization) {
_enable_memory_optimization = enable_memory_optimization;
}
void set_enable_ir_optimization(bool enable_ir_optimization) {
_enable_ir_optimization = enable_ir_optimization;
}
void set_use_trt(bool use_trt) { _use_trt = use_trt; }
void set_use_lite(bool use_lite) { _use_lite = use_lite; }
void set_use_xpu(bool use_xpu) { _use_xpu = use_xpu; }
bool enable_memory_optimization() const {
return _enable_memory_optimization;
}
bool enable_ir_optimization() const { return _enable_ir_optimization; }
bool use_trt() const { return _use_trt; }
bool use_lite() const { return _use_lite; }
bool use_xpu() const { return _use_xpu; }
void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization;
}
void set_force_update_static_cache(bool force_update_static_cache = false) {
_force_update_static_cache = force_update_static_cache;
}
bool static_optimization() const { return _static_optimization; }
bool force_update_static_cache() const { return _force_update_static_cache; }
private:
pthread_mutex_t& _mut;
};
std::string get_path() const { return _path; }
class GlobalCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
void dump() const {
LOG(INFO) << "InferEngineCreationParams: "
<< "model_path = " << _path << ", "
<< "enable_memory_optimization = " << _enable_memory_optimization
<< ", "
<< "enable_tensorrt = " << _use_trt << ", "
<< "enable_lite = " << _use_lite << ", "
<< "enable_xpu = " << _use_xpu << ", "
<< "enable_ir_optimization = " << _enable_ir_optimization << ", "
<< "static_optimization = " << _static_optimization << ", "
<< "force_update_static_cache = " << _force_update_static_cache;
static pthread_mutex_t& instance() {
static GlobalCreateMutex gmutex;
return gmutex.mutex();
}
private:
std::string _path;
bool _enable_memory_optimization;
bool _enable_ir_optimization;
bool _static_optimization;
bool _force_update_static_cache;
bool _use_trt;
bool _use_lite;
bool _use_xpu;
GlobalCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
class InferEngine {
......@@ -152,57 +99,19 @@ class ReloadableInferEngine : public InferEngine {
uint64_t last_revision;
};
virtual int load(const InferEngineCreationParams& params) = 0;
virtual int load(const configure::EngineDesc& conf) = 0;
int proc_initialize_impl(const configure::EngineDesc& conf, bool version) {
_reload_tag_file = conf.reloadable_meta();
_reload_mode_tag = conf.reloadable_type();
_model_data_path = conf.model_data_path();
_model_data_path = conf.model_dir();
_infer_thread_num = conf.runtime_thread_num();
_infer_batch_size = conf.batch_infer_size();
_infer_batch_align = conf.enable_batch_align();
bool enable_memory_optimization = false;
if (conf.has_enable_memory_optimization()) {
enable_memory_optimization = conf.enable_memory_optimization();
}
bool static_optimization = false;
if (conf.has_static_optimization()) {
static_optimization = conf.static_optimization();
}
bool force_update_static_cache = false;
if (conf.has_force_update_static_cache()) {
force_update_static_cache = conf.force_update_static_cache();
}
_conf = conf;
if (conf.has_enable_ir_optimization()) {
_infer_engine_params.set_enable_ir_optimization(
conf.enable_ir_optimization());
}
_infer_engine_params.set_path(_model_data_path);
if (enable_memory_optimization) {
_infer_engine_params.set_enable_memory_optimization(true);
_infer_engine_params.set_static_optimization(static_optimization);
_infer_engine_params.set_force_update_static_cache(
force_update_static_cache);
}
if (conf.has_use_trt()) {
_infer_engine_params.set_use_trt(conf.use_trt());
}
if (conf.has_use_lite()) {
_infer_engine_params.set_use_lite(conf.use_lite());
}
if (conf.has_use_xpu()) {
_infer_engine_params.set_use_xpu(conf.use_xpu());
}
if (!check_need_reload() || load(_infer_engine_params) != 0) {
if (!check_need_reload() || load(conf) != 0) {
LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
return -1;
}
......@@ -230,7 +139,6 @@ class ReloadableInferEngine : public InferEngine {
if (_infer_thread_num > 0) {
return 0;
}
return thrd_initialize_impl();
}
......@@ -254,13 +162,13 @@ class ReloadableInferEngine : public InferEngine {
int reload() {
if (check_need_reload()) {
LOG(WARNING) << "begin reload model[" << _model_data_path << "].";
return load(_infer_engine_params);
return load(_conf);
}
return 0;
}
uint64_t version() const { return _version; }
uint32_t thread_num() const { return _infer_thread_num; }
private:
......@@ -322,7 +230,7 @@ class ReloadableInferEngine : public InferEngine {
protected:
std::string _model_data_path;
InferEngineCreationParams _infer_engine_params;
configure::EngineDesc _conf;
private:
std::string _reload_tag_file;
......@@ -361,25 +269,25 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
return ReloadableInferEngine::proc_initialize(conf, version);
}
virtual int load(const InferEngineCreationParams& params) {
virtual int load(const configure::EngineDesc& conf) {
if (_reload_vec.empty()) {
return 0;
}
for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) {
if (load_data(_reload_vec[ti], params) != 0) {
if (load_data(_reload_vec[ti], conf) != 0) {
LOG(ERROR) << "Failed reload engine model: " << ti;
return -1;
}
}
LOG(WARNING) << "Succ load engine, path: " << params.get_path();
LOG(WARNING) << "Succ load engine, path: " << conf.model_dir();
return 0;
}
int load_data(ModelData<EngineCore>* md,
const InferEngineCreationParams& params) {
const configure::EngineDesc& conf) {
uint32_t next_idx = (md->current_idx + 1) % 2;
if (md->cores[next_idx]) {
delete md->cores[next_idx];
......@@ -387,9 +295,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
md->cores[next_idx] = new (std::nothrow) EngineCore;
params.dump();
if (!md->cores[next_idx] || md->cores[next_idx]->create(params) != 0) {
LOG(ERROR) << "Failed create model, path: " << params.get_path();
//params.dump();
if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
return -1;
}
md->current_idx = next_idx;
......@@ -400,9 +308,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
// memory pool to be inited in non-serving-threads
ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
if (!md || load_data(md, _infer_engine_params) != 0) {
if (!md || load_data(md, _conf) != 0) {
LOG(ERROR) << "Failed create thread data from "
<< _infer_engine_params.get_path();
<< _conf.model_dir();
return -1;
}
......@@ -458,16 +366,16 @@ class CloneDBReloadableInferEngine
return DBReloadableInferEngine<EngineCore>::proc_initialize(conf, version);
}
virtual int load(const InferEngineCreationParams& params) {
virtual int load(const configure::EngineDesc& conf) {
// 加载进程级模型数据
if (!_pd ||
DBReloadableInferEngine<EngineCore>::load_data(_pd, params) != 0) {
LOG(ERROR) << "Failed to create common model from [" << params.get_path()
DBReloadableInferEngine<EngineCore>::load_data(_pd, conf) != 0) {
LOG(ERROR) << "Failed to create common model from [" << conf.model_dir()
<< "].";
return -1;
}
LOG(WARNING) << "Succ load common model[" << _pd->cores[_pd->current_idx]
<< "], path[" << params.get_path() << "].";
<< "], path[" << conf.model_dir() << "].";
if (DBReloadableInferEngine<EngineCore>::_reload_vec.empty()) {
return 0;
......@@ -483,7 +391,7 @@ class CloneDBReloadableInferEngine
}
}
LOG(WARNING) << "Succ load clone model, path[" << params.get_path() << "]";
LOG(WARNING) << "Succ load clone model, path[" << conf.model_dir() << "]";
return 0;
}
......@@ -527,18 +435,18 @@ class CloneDBReloadableInferEngine
_pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据
};
template <typename FluidFamilyCore>
template <typename PaddleInferenceCore>
#ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
class FluidInferEngine : public DBReloadableInferEngine<PaddleInferenceCore> {
#else
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore> {
#endif
public: // NOLINT
FluidInferEngine() {}
~FluidInferEngine() {}
std::vector<std::string> GetInputNames() {
FluidFamilyCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core();
PaddleInferenceCore* core =
DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
}
......@@ -546,8 +454,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
}
std::vector<std::string> GetOutputNames() {
FluidFamilyCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core();
PaddleInferenceCore* core =
DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
}
......@@ -556,8 +464,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
const std::string& name) {
FluidFamilyCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core();
PaddleInferenceCore* core =
DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
}
......@@ -566,8 +474,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
const std::string& name) {
FluidFamilyCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core();
PaddleInferenceCore* core =
DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetOutputHandle()";
}
......@@ -575,8 +483,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
}
int infer_impl() {
FluidFamilyCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core();
PaddleInferenceCore* core =
DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in infer_impl()";
return -1;
......
......@@ -13,13 +13,5 @@
# limitations under the License
if (NOT CLIENT_ONLY)
add_subdirectory(inferencer-fluid-cpu)
if (WITH_GPU)
add_subdirectory(inferencer-fluid-gpu)
endif()
if (WITH_LITE)
add_subdirectory(inferencer-fluid-arm)
endif()
add_subdirectory(paddle)
endif()
FILE(GLOB fluid_arm_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_arm_engine ${fluid_arm_engine_srcs})
target_include_directories(fluid_arm_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_arm_engine pdserving extern_paddle configure)
target_link_libraries(fluid_arm_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS fluid_arm_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
FILE(GLOB fluid_cpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_cpu_engine ${fluid_cpu_engine_srcs})
target_include_directories(fluid_cpu_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_cpu_engine pdserving extern_paddle configure)
target_link_libraries(fluid_cpu_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS fluid_cpu_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pthread.h>
#include <fstream>
#include <map>
#include <string>
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace fluid_cpu {
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
private:
pthread_mutex_t& _mut;
};
class GlobalPaddleCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::CreatePredictor;
// data interface
class FluidFamilyCore {
public:
virtual ~FluidFamilyCore() {}
virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames();
}
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name);
}
virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames();
}
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
Predictor* p_predictor = (Predictor*)origin_core;
_core = p_predictor->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
return -1;
}
return 0;
}
virtual void* get() { return _core.get(); }
protected:
std::shared_ptr<Predictor> _core;
};
// infer interface
class FluidCpuAnalysisCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetParamsFile(data_path + "/__params__");
config.SetProgFile(data_path + "/__model__");
config.DisableGpu();
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidCpuAnalysisDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetModel(data_path);
config.DisableGpu();
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
config.SwitchIrOptim(false);
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class Parameter {
public:
Parameter() : _row(0), _col(0), _params(NULL) {}
~Parameter() {
VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]";
destroy();
}
int init(int row, int col, const char* file_name) {
destroy();
_file_name = file_name;
_row = row;
_col = col;
_params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
if (_params == NULL) {
LOG(ERROR) << "Load " << _file_name << " malloc error.";
return -1;
}
VLOG(2) << "Load parameter file[" << _file_name << "] success.";
return 0;
}
void destroy() {
_row = 0;
_col = 0;
if (_params != NULL) {
free(_params);
_params = NULL;
}
}
int load() {
if (_params == NULL || _row <= 0 || _col <= 0) {
LOG(ERROR) << "load parameter error [not inited].";
return -1;
}
FILE* fs = fopen(_file_name.c_str(), "rb");
if (fs == NULL) {
LOG(ERROR) << "load " << _file_name << " fopen error.";
return -1;
}
static const uint32_t MODEL_FILE_HEAD_LEN = 16;
char head[MODEL_FILE_HEAD_LEN] = {0};
if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
destroy();
LOG(ERROR) << "Load " << _file_name << " read head error.";
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
uint32_t matrix_size = _row * _col;
if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
VLOG(2) << "load " << _file_name << " read ok.";
return 0;
} else {
LOG(ERROR) << "load " << _file_name << " read error.";
destroy();
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
return 0;
}
public:
std::string _file_name;
int _row;
int _col;
float* _params;
};
class FluidCpuAnalysisEncryptCore : public FluidFamilyCore {
public:
void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path note exits: "
<< data_path;
return -1;
}
std::string model_buffer, params_buffer, key_buffer;
ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
ReadBinaryFile(data_path + "key", &key_buffer);
VLOG(2) << "prepare for encryption model";
auto cipher = paddle::MakeCipher("");
std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
Config analysis_config;
// paddle::AnalysisConfig analysis_config;
analysis_config.SetModelBuffer(&real_model_buffer[0],
real_model_buffer.size(),
&real_params_buffer[0],
real_params_buffer.size());
analysis_config.DisableGpu();
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
analysis_config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
VLOG(2) << "decrypt model file sucess";
_core = CreatePredictor(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
} // namespace fluid_cpu
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h"
#include "core/predictor/framework/factory.h"
namespace baidu {
namespace paddle_serving {
namespace fluid_cpu {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidCpuAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidCpuAnalysisDirCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS_DIR");
#if 1
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidCpuAnalysisEncryptCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS_ENCRYPT");
#endif
} // namespace fluid_cpu
} // namespace paddle_serving
} // namespace baidu
FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs})
target_include_directories(fluid_gpu_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_gpu_engine pdserving extern_paddle configure)
target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS fluid_gpu_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pthread.h>
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
DECLARE_int32(gpuid);
namespace baidu {
namespace paddle_serving {
namespace fluid_gpu {
using configure::SigmoidConf;
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
private:
pthread_mutex_t& _mut;
};
class GlobalPaddleCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::CreatePredictor;
// data interface
class FluidFamilyCore {
public:
virtual ~FluidFamilyCore() {}
virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames();
}
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name);
}
virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames();
}
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
Predictor* p_predictor = (Predictor*)origin_core;
_core = p_predictor->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
return -1;
}
return 0;
}
virtual void* get() { return _core.get(); }
protected:
std::shared_ptr<Predictor> _core;
};
// infer interface
class FluidGpuAnalysisCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetParamsFile(data_path + "/__params__");
config.SetProgFile(data_path + "/__model__");
config.EnableUseGpu(100, FLAGS_gpuid);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidGpuAnalysisDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetModel(data_path);
config.EnableUseGpu(1500, FLAGS_gpuid);
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
int max_batch = 32;
int min_subgraph_size = 3;
if (params.use_trt()) {
config.EnableTensorRtEngine(1 << 20,
max_batch,
min_subgraph_size,
Config::Precision::kFloat32,
false,
false);
LOG(INFO) << "create TensorRT predictor";
} else {
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
config.SwitchIrOptim(false);
}
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class Parameter {
public:
Parameter() : _row(0), _col(0), _params(NULL) {}
~Parameter() {
LOG(INFO) << "before destroy Parameter, file_name[" << _file_name << "]";
destroy();
}
int init(int row, int col, const char* file_name) {
destroy();
_file_name = file_name;
_row = row;
_col = col;
_params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
if (_params == NULL) {
LOG(ERROR) << "Load " << _file_name << " malloc error.";
return -1;
}
VLOG(2) << "Load parameter file[" << _file_name << "] success.";
return 0;
}
void destroy() {
_row = 0;
_col = 0;
if (_params != NULL) {
free(_params);
_params = NULL;
}
}
int load() {
if (_params == NULL || _row <= 0 || _col <= 0) {
LOG(ERROR) << "load parameter error [not inited].";
return -1;
}
FILE* fs = fopen(_file_name.c_str(), "rb");
if (fs == NULL) {
LOG(ERROR) << "load " << _file_name << " fopen error.";
return -1;
}
static const uint32_t MODEL_FILE_HEAD_LEN = 16;
char head[MODEL_FILE_HEAD_LEN] = {0};
if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
destroy();
LOG(ERROR) << "Load " << _file_name << " read head error.";
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
uint32_t matrix_size = _row * _col;
if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
LOG(INFO) << "load " << _file_name << " read ok.";
return 0;
} else {
LOG(ERROR) << "load " << _file_name << " read error.";
destroy();
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
return 0;
}
public:
std::string _file_name;
int _row;
int _col;
float* _params;
};
class FluidGpuAnalysisEncryptCore : public FluidFamilyCore {
public:
void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path note exits: "
<< data_path;
return -1;
}
std::string model_buffer, params_buffer, key_buffer;
ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
ReadBinaryFile(data_path + "key", &key_buffer);
VLOG(2) << "prepare for encryption model";
auto cipher = paddle::MakeCipher("");
std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
Config analysis_config;
analysis_config.SetModelBuffer(&real_model_buffer[0],
real_model_buffer.size(),
&real_params_buffer[0],
real_params_buffer.size());
analysis_config.EnableUseGpu(100, FLAGS_gpuid);
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
analysis_config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
VLOG(2) << "decrypt model file sucess";
_core = CreatePredictor(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
} // namespace fluid_gpu
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h"
#include "core/predictor/framework/factory.h"
DEFINE_int32(gpuid, 0, "GPU device id to use");
namespace baidu {
namespace paddle_serving {
namespace fluid_gpu {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidGpuAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_GPU_ANALYSIS");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidGpuAnalysisDirCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_GPU_ANALYSIS_DIR");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidGpuAnalysisEncryptCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_GPU_ANALYSIS_ENCRPT")
} // namespace fluid_gpu
} // namespace paddle_serving
} // namespace baidu
FILE(GLOB paddle_inference_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(paddle_inference_engine ${paddle_inference_engine_srcs})
target_include_directories(paddle_inference_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(paddle_inference_engine pdserving extern_paddle configure)
target_link_libraries(paddle_inference_engine pdserving paddle_inference -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS paddle_inference_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -17,275 +17,174 @@
#include <pthread.h>
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/common/utils.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace fluid_arm {
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
private:
pthread_mutex_t& _mut;
};
class GlobalPaddleCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
namespace inference {
using paddle_infer::Config;
using paddle_infer::PrecisionType;
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::PrecisionType;
using paddle_infer::CreatePredictor;
// data interface
class FluidFamilyCore {
DECLARE_int32(gpuid);
static const int max_batch = 32;
static const int min_subgraph_size = 3;
// Engine Base
class PaddleEngineBase {
public:
virtual ~FluidFamilyCore() {}
virtual ~PaddleEngineBase() {}
virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames();
return _predictor->GetInputNames();
}
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name);
return _predictor->GetInputHandle(name);
}
virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames();
return _predictor->GetOutputNames();
}
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
return _predictor->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
if (!_predictor->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int create(const configure::EngineDesc& conf) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
virtual int clone(void* predictor) {
if (predictor == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
Predictor* p_predictor = (Predictor*)origin_core;
_core = p_predictor->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
Predictor* prep = static_cast<Predictor*>(predictor);
_predictor = prep->Clone();
if (_predictor.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << predictor;
return -1;
}
return 0;
}
virtual void* get() { return _core.get(); }
virtual void* get() { return _predictor.get(); }
protected:
std::shared_ptr<Predictor> _core;
std::shared_ptr<Predictor> _predictor;
};
// infer interface
class FluidArmAnalysisCore : public FluidFamilyCore {
// Paddle Inference Engine
class PaddleInferenceEngine : public PaddleEngineBase {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
int create(const configure::EngineDesc& engine_conf) {
std::string model_path = engine_conf.model_dir();
if (access(model_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
<< model_path;
return -1;
}
Config config;
config.SetParamsFile(data_path + "/__params__");
config.SetProgFile(data_path + "/__model__");
config.DisableGpu();
config.SetCpuMathLibraryNumThreads(1);
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
config.EnableXpu(2 * 1024 * 1024);
}
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
// todo, auto config(zhangjun)
if (engine_conf.has_combined_model()) {
if (!engine_conf.combined_model()) {
config.SetModel(model_path);
} else {
config.SetParamsFile(model_path + "/__params__");
config.SetProgFile(model_path + "/__model__");
}
} else {
config.SwitchIrOptim(false);
config.SetParamsFile(model_path + "/__params__");
config.SetProgFile(model_path + "/__model__");
}
config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
config.SetCpuMathLibraryNumThreads(1);
if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
// 2000MB GPU memory
config.EnableUseGpu(2000, FLAGS_gpuid);
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidArmAnalysisDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
config.EnableUseGpu(2000, FLAGS_gpuid);
}
config.EnableTensorRtEngine(1 << 20,
max_batch,
min_subgraph_size,
Config::Precision::kFloat32,
false,
false);
LOG(INFO) << "create TensorRT predictor";
}
Config config;
config.SetModel(data_path);
config.DisableGpu();
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.use_lite()) {
if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
// 2 MB l3 cache
config.EnableXpu(2 * 1024 * 1024);
}
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
if (engine_conf.has_enable_ir_optimization() &&
!engine_conf.enable_ir_optimization()) {
config.SwitchIrOptim(false);
} else {
config.SwitchIrOptim(true);
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class Parameter {
public:
Parameter() : _row(0), _col(0), _params(NULL) {}
~Parameter() {
VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]";
destroy();
}
int init(int row, int col, const char* file_name) {
destroy();
_file_name = file_name;
_row = row;
_col = col;
_params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
if (_params == NULL) {
LOG(ERROR) << "Load " << _file_name << " malloc error.";
return -1;
if (engine_conf.has_enable_memory_optimization() &&
engine_conf.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
VLOG(2) << "Load parameter file[" << _file_name << "] success.";
return 0;
}
void destroy() {
_row = 0;
_col = 0;
if (_params != NULL) {
free(_params);
_params = NULL;
}
}
if (engine_conf.has_encrypted_model() && engine_conf.encrypted_model()) {
// decrypt model
std::string model_buffer, params_buffer, key_buffer;
predictor::ReadBinaryFile(model_path + "encrypt_model", &model_buffer);
predictor::ReadBinaryFile(model_path + "encrypt_params", &params_buffer);
predictor::ReadBinaryFile(model_path + "key", &key_buffer);
int load() {
if (_params == NULL || _row <= 0 || _col <= 0) {
LOG(ERROR) << "load parameter error [not inited].";
return -1;
auto cipher = paddle::MakeCipher("");
std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
std::string real_params_buffer =
cipher->Decrypt(params_buffer, key_buffer);
config.SetModelBuffer(&real_model_buffer[0],
real_model_buffer.size(),
&real_params_buffer[0],
real_params_buffer.size());
}
FILE* fs = fopen(_file_name.c_str(), "rb");
if (fs == NULL) {
LOG(ERROR) << "load " << _file_name << " fopen error.";
return -1;
}
static const uint32_t MODEL_FILE_HEAD_LEN = 16;
char head[MODEL_FILE_HEAD_LEN] = {0};
if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
destroy();
LOG(ERROR) << "Load " << _file_name << " read head error.";
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
predictor::AutoLock lock(predictor::GlobalCreateMutex::instance());
_predictor = CreatePredictor(config);
if (NULL == _predictor.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << model_path;
return -1;
}
uint32_t matrix_size = _row * _col;
if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
VLOG(2) << "load " << _file_name << " read ok.";
return 0;
} else {
LOG(ERROR) << "load " << _file_name << " read error.";
destroy();
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << model_path;
return 0;
}
public:
std::string _file_name;
int _row;
int _col;
float* _params;
};
} // namespace fluid_arm
} // namespace inference
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,24 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h"
#include "paddle_inference/paddle/include/paddle_engine.h"
#include "core/predictor/framework/factory.h"
namespace baidu {
namespace paddle_serving {
namespace fluid_arm {
namespace inference {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_ANALYSIS");
DEFINE_int32(gpuid, 0, "GPU device id to use");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidArmAnalysisDirCore>,
::baidu::paddle_serving::predictor::FluidInferEngine<PaddleInferenceEngine>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_ANALYSIS_DIR");
"PADDLE_INFER");
} // namespace fluid_arm
} // namespace inference
} // namespace paddle_serving
} // namespace baidu
......@@ -41,24 +41,24 @@ include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include)
include(op/CMakeLists.txt)
include(proto/CMakeLists.txt)
add_executable(serving ${serving_srcs})
add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid
add_dependencies(serving pdcodegen paddle_inference_engine pdserving paddle_inference
opencv_imgcodecs cube-api)
if (WITH_GPU)
add_dependencies(serving fluid_gpu_engine)
add_dependencies(serving paddle_inference_engine)
endif()
target_include_directories(serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
)
if(WITH_GPU)
target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
-Wl,--no-whole-archive)
endif()
target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine
target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
-Wl,--no-whole-archive)
target_link_libraries(serving paddle_fluid ${paddle_depend_libs})
target_link_libraries(serving paddle_inference ${paddle_depend_libs})
target_link_libraries(serving opencv_imgcodecs
${opencv_depend_libs})
......
......@@ -18,16 +18,16 @@ include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include)
include(op/CMakeLists.txt)
include(proto/CMakeLists.txt)
add_executable(elastic_serving ${serving_srcs})
add_dependencies(elastic_serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube-api)
add_dependencies(elastic_serving pdcodegen paddle_inference_engine pdserving paddle_inference cube-api)
target_include_directories(elastic_serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../predictor
)
target_link_libraries(elastic_serving -Wl,--whole-archive fluid_cpu_engine
target_link_libraries(elastic_serving -Wl,--whole-archive paddle_inference_engine
-Wl,--no-whole-archive)
target_link_libraries(elastic_serving paddle_fluid ${paddle_depend_libs})
target_link_libraries(elastic_serving paddle_inference ${paddle_depend_libs})
target_link_libraries(elastic_serving pdserving)
target_link_libraries(elastic_serving cube-api)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册