提交 a3195cfc 编写于 作者: Z zhangjun

update

上级 62251420
...@@ -20,7 +20,7 @@ message EngineDesc { ...@@ -20,7 +20,7 @@ message EngineDesc {
required string type = 2; required string type = 2;
required string reloadable_meta = 3; required string reloadable_meta = 3;
required string reloadable_type = 4; required string reloadable_type = 4;
required string model_data_path = 5; required string model_dir = 5;
required int32 runtime_thread_num = 6; required int32 runtime_thread_num = 6;
required int32 batch_infer_size = 7; required int32 batch_infer_size = 7;
required int32 enable_batch_align = 8; required int32 enable_batch_align = 8;
...@@ -41,12 +41,13 @@ message EngineDesc { ...@@ -41,12 +41,13 @@ message EngineDesc {
optional SparseParamServiceType sparse_param_service_type = 11; optional SparseParamServiceType sparse_param_service_type = 11;
optional string sparse_param_service_table_name = 12; optional string sparse_param_service_table_name = 12;
optional bool enable_memory_optimization = 13; optional bool enable_memory_optimization = 13;
optional bool static_optimization = 14; optional bool enable_ir_optimization = 14;
optional bool force_update_static_cache = 15; optional bool use_trt = 15;
optional bool enable_ir_optimization = 16; optional bool use_lite = 16;
optional bool use_trt = 17; optional bool use_xpu = 17;
optional bool use_lite = 18; optional bool use_gpu = 18;
optional bool use_xpu = 19; optional bool combined_model = 19;
optional bool encrypted_model = 20;
}; };
// model_toolkit conf // model_toolkit conf
......
...@@ -69,8 +69,6 @@ int test_write_conf() { ...@@ -69,8 +69,6 @@ int test_write_conf() {
engine->set_sparse_param_service_type(EngineDesc::LOCAL); engine->set_sparse_param_service_type(EngineDesc::LOCAL);
engine->set_sparse_param_service_table_name("local_kv"); engine->set_sparse_param_service_table_name("local_kv");
engine->set_enable_memory_optimization(true); engine->set_enable_memory_optimization(true);
engine->set_static_optimization(false);
engine->set_force_update_static_cache(false);
int ret = baidu::paddle_serving::configure::write_proto_conf( int ret = baidu::paddle_serving::configure::write_proto_conf(
&model_toolkit_conf, output_dir, model_toolkit_conf_file); &model_toolkit_conf, output_dir, model_toolkit_conf_file);
......
...@@ -148,6 +148,16 @@ class IsDerivedFrom { ...@@ -148,6 +148,16 @@ class IsDerivedFrom {
} }
}; };
void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
} // namespace predictor } // namespace predictor
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
...@@ -30,109 +30,28 @@ namespace predictor { ...@@ -30,109 +30,28 @@ namespace predictor {
using configure::ModelToolkitConf; using configure::ModelToolkitConf;
class InferEngineCreationParams {
public:
InferEngineCreationParams() {
_path = "";
_enable_memory_optimization = false;
_enable_ir_optimization = false;
_static_optimization = false;
_force_update_static_cache = false;
_use_trt = false;
_use_lite = false;
_use_xpu = false;
}
void set_path(const std::string& path) { _path = path; }
void set_enable_memory_optimization(bool enable_memory_optimization) {
_enable_memory_optimization = enable_memory_optimization;
}
void set_enable_ir_optimization(bool enable_ir_optimization) {
_enable_ir_optimization = enable_ir_optimization;
}
void set_use_trt(bool use_trt) { _use_trt = use_trt; }
void set_use_lite(bool use_lite) { _use_lite = use_lite; }
void set_use_xpu(bool use_xpu) { _use_xpu = use_xpu; }
bool enable_memory_optimization() const {
return _enable_memory_optimization;
}
bool enable_ir_optimization() const { return _enable_ir_optimization; }
bool use_trt() const { return _use_trt; }
bool use_lite() const { return _use_lite; }
bool use_xpu() const { return _use_xpu; }
void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization;
}
void set_force_update_static_cache(bool force_update_static_cache = false) {
_force_update_static_cache = force_update_static_cache;
}
bool static_optimization() const { return _static_optimization; }
bool force_update_static_cache() const { return _force_update_static_cache; }
std::string get_path() const { return _path; }
void dump() const {
LOG(INFO) << "InferEngineCreationParams: "
<< "model_path = " << _path << ", "
<< "enable_memory_optimization = " << _enable_memory_optimization
<< ", "
<< "enable_tensorrt = " << _use_trt << ", "
<< "enable_lite = " << _use_lite << ", "
<< "enable_xpu = " << _use_xpu << ", "
<< "enable_ir_optimization = " << _enable_ir_optimization << ", "
<< "static_optimization = " << _static_optimization << ", "
<< "force_update_static_cache = " << _force_update_static_cache;
}
private:
std::string _path;
bool _enable_memory_optimization;
bool _enable_ir_optimization;
bool _static_optimization;
bool _force_update_static_cache;
bool _use_trt;
bool _use_lite;
bool _use_xpu;
};
class AutoLock { class AutoLock {
public: public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) { explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex); pthread_mutex_lock(&mutex);
} }
~AutoLock() { pthread_mutex_unlock(&_mut); } ~AutoLock() { pthread_mutex_unlock(&_mut); }
private: private:
pthread_mutex_t& _mut; pthread_mutex_t& _mut;
}; };
class GlobalPaddleCreateMutex { class GlobalCreateMutex {
public: public:
pthread_mutex_t& mutex() { return _mut; } pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() { static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex; static GlobalCreateMutex gmutex;
return gmutex.mutex(); return gmutex.mutex();
} }
private: private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); } GlobalCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut; pthread_mutex_t _mut;
}; };
...@@ -180,7 +99,7 @@ class ReloadableInferEngine : public InferEngine { ...@@ -180,7 +99,7 @@ class ReloadableInferEngine : public InferEngine {
uint64_t last_revision; uint64_t last_revision;
}; };
virtual int load(const InferEngineCreationParams& params) = 0; virtual int load(const configure::EngineDesc& conf) = 0;
int proc_initialize_impl(const configure::EngineDesc& conf, bool version) { int proc_initialize_impl(const configure::EngineDesc& conf, bool version) {
_reload_tag_file = conf.reloadable_meta(); _reload_tag_file = conf.reloadable_meta();
...@@ -190,47 +109,9 @@ class ReloadableInferEngine : public InferEngine { ...@@ -190,47 +109,9 @@ class ReloadableInferEngine : public InferEngine {
_infer_batch_size = conf.batch_infer_size(); _infer_batch_size = conf.batch_infer_size();
_infer_batch_align = conf.enable_batch_align(); _infer_batch_align = conf.enable_batch_align();
bool enable_memory_optimization = false; _conf = conf;
if (conf.has_enable_memory_optimization()) {
enable_memory_optimization = conf.enable_memory_optimization();
}
bool static_optimization = false;
if (conf.has_static_optimization()) {
static_optimization = conf.static_optimization();
}
bool force_update_static_cache = false;
if (conf.has_force_update_static_cache()) {
force_update_static_cache = conf.force_update_static_cache();
}
if (conf.has_enable_ir_optimization()) {
_infer_engine_params.set_enable_ir_optimization(
conf.enable_ir_optimization());
}
_infer_engine_params.set_path(_model_data_path);
if (enable_memory_optimization) {
_infer_engine_params.set_enable_memory_optimization(true);
_infer_engine_params.set_static_optimization(static_optimization);
_infer_engine_params.set_force_update_static_cache(
force_update_static_cache);
}
if (conf.has_use_trt()) {
_infer_engine_params.set_use_trt(conf.use_trt());
}
if (conf.has_use_lite()) {
_infer_engine_params.set_use_lite(conf.use_lite());
}
if (conf.has_use_xpu()) {
_infer_engine_params.set_use_xpu(conf.use_xpu());
}
if (!check_need_reload() || load(_infer_engine_params) != 0) { if (!check_need_reload() || load(conf) != 0) {
LOG(ERROR) << "Failed load model_data_path" << _model_data_path; LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
return -1; return -1;
} }
...@@ -258,7 +139,6 @@ class ReloadableInferEngine : public InferEngine { ...@@ -258,7 +139,6 @@ class ReloadableInferEngine : public InferEngine {
if (_infer_thread_num > 0) { if (_infer_thread_num > 0) {
return 0; return 0;
} }
return thrd_initialize_impl(); return thrd_initialize_impl();
} }
...@@ -288,7 +168,7 @@ class ReloadableInferEngine : public InferEngine { ...@@ -288,7 +168,7 @@ class ReloadableInferEngine : public InferEngine {
} }
uint64_t version() const { return _version; } uint64_t version() const { return _version; }
uint32_t thread_num() const { return _infer_thread_num; } uint32_t thread_num() const { return _infer_thread_num; }
private: private:
...@@ -350,7 +230,7 @@ class ReloadableInferEngine : public InferEngine { ...@@ -350,7 +230,7 @@ class ReloadableInferEngine : public InferEngine {
protected: protected:
std::string _model_data_path; std::string _model_data_path;
InferEngineCreationParams _infer_engine_params; configure::EngineDesc _conf;
private: private:
std::string _reload_tag_file; std::string _reload_tag_file;
...@@ -389,25 +269,25 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -389,25 +269,25 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
return ReloadableInferEngine::proc_initialize(conf, version); return ReloadableInferEngine::proc_initialize(conf, version);
} }
virtual int load(const InferEngineCreationParams& params) { virtual int load(const configure::EngineDesc& conf) {
if (_reload_vec.empty()) { if (_reload_vec.empty()) {
return 0; return 0;
} }
for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) { for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) {
if (load_data(_reload_vec[ti], params) != 0) { if (load_data(_reload_vec[ti], conf) != 0) {
LOG(ERROR) << "Failed reload engine model: " << ti; LOG(ERROR) << "Failed reload engine model: " << ti;
return -1; return -1;
} }
} }
LOG(WARNING) << "Succ load engine, path: " << params.get_path(); LOG(WARNING) << "Succ load engine, path: " << conf.model_dir();
return 0; return 0;
} }
int load_data(ModelData<EngineCore>* md, int load_data(ModelData<EngineCore>* md,
const InferEngineCreationParams& params) { const configure::EngineDesc& conf) {
uint32_t next_idx = (md->current_idx + 1) % 2; uint32_t next_idx = (md->current_idx + 1) % 2;
if (md->cores[next_idx]) { if (md->cores[next_idx]) {
delete md->cores[next_idx]; delete md->cores[next_idx];
...@@ -415,9 +295,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -415,9 +295,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
md->cores[next_idx] = new (std::nothrow) EngineCore; md->cores[next_idx] = new (std::nothrow) EngineCore;
params.dump(); //params.dump();
if (!md->cores[next_idx] || md->cores[next_idx]->create(params) != 0) { if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
LOG(ERROR) << "Failed create model, path: " << params.get_path(); LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
return -1; return -1;
} }
md->current_idx = next_idx; md->current_idx = next_idx;
...@@ -428,9 +308,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -428,9 +308,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
// memory pool to be inited in non-serving-threads // memory pool to be inited in non-serving-threads
ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>; ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
if (!md || load_data(md, _infer_engine_params) != 0) { if (!md || load_data(md, _conf) != 0) {
LOG(ERROR) << "Failed create thread data from " LOG(ERROR) << "Failed create thread data from "
<< _infer_engine_params.get_path(); << _infer_engine_params.model_dir();
return -1; return -1;
} }
...@@ -486,16 +366,16 @@ class CloneDBReloadableInferEngine ...@@ -486,16 +366,16 @@ class CloneDBReloadableInferEngine
return DBReloadableInferEngine<EngineCore>::proc_initialize(conf, version); return DBReloadableInferEngine<EngineCore>::proc_initialize(conf, version);
} }
virtual int load(const InferEngineCreationParams& params) { virtual int load(const configure::EngineDesc& conf) {
// 加载进程级模型数据 // 加载进程级模型数据
if (!_pd || if (!_pd ||
DBReloadableInferEngine<EngineCore>::load_data(_pd, params) != 0) { DBReloadableInferEngine<EngineCore>::load_data(_pd, conf) != 0) {
LOG(ERROR) << "Failed to create common model from [" << params.get_path() LOG(ERROR) << "Failed to create common model from [" << params.model_dir()
<< "]."; << "].";
return -1; return -1;
} }
LOG(WARNING) << "Succ load common model[" << _pd->cores[_pd->current_idx] LOG(WARNING) << "Succ load common model[" << _pd->cores[_pd->current_idx]
<< "], path[" << params.get_path() << "]."; << "], path[" << conf.model_dir() << "].";
if (DBReloadableInferEngine<EngineCore>::_reload_vec.empty()) { if (DBReloadableInferEngine<EngineCore>::_reload_vec.empty()) {
return 0; return 0;
...@@ -511,7 +391,7 @@ class CloneDBReloadableInferEngine ...@@ -511,7 +391,7 @@ class CloneDBReloadableInferEngine
} }
} }
LOG(WARNING) << "Succ load clone model, path[" << params.get_path() << "]"; LOG(WARNING) << "Succ load clone model, path[" << conf.model_dir() << "]";
return 0; return 0;
} }
...@@ -555,18 +435,18 @@ class CloneDBReloadableInferEngine ...@@ -555,18 +435,18 @@ class CloneDBReloadableInferEngine
_pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据 _pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据
}; };
template <typename FluidFamilyCore> template <typename PaddleInferenceCore>
#ifdef WITH_TRT #ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> { class FluidInferEngine : public DBReloadableInferEngine<PaddleInferenceCore> {
#else #else
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore> {
#endif #endif
public: // NOLINT public: // NOLINT
FluidInferEngine() {} FluidInferEngine() {}
~FluidInferEngine() {} ~FluidInferEngine() {}
std::vector<std::string> GetInputNames() { std::vector<std::string> GetInputNames() {
FluidFamilyCore* core = PaddleInferenceCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core(); DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
} }
...@@ -574,8 +454,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { ...@@ -574,8 +454,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
} }
std::vector<std::string> GetOutputNames() { std::vector<std::string> GetOutputNames() {
FluidFamilyCore* core = PaddleInferenceCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core(); DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
} }
...@@ -584,8 +464,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { ...@@ -584,8 +464,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
std::unique_ptr<paddle_infer::Tensor> GetInputHandle( std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
const std::string& name) { const std::string& name) {
FluidFamilyCore* core = PaddleInferenceCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core(); DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
} }
...@@ -594,8 +474,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { ...@@ -594,8 +474,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
std::unique_ptr<paddle_infer::Tensor> GetOutputHandle( std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
const std::string& name) { const std::string& name) {
FluidFamilyCore* core = PaddleInferenceCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core(); DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetOutputHandle()"; LOG(ERROR) << "Failed get fluid core in GetOutputHandle()";
} }
...@@ -603,8 +483,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { ...@@ -603,8 +483,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
} }
int infer_impl() { int infer_impl() {
FluidFamilyCore* core = PaddleInferenceCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core(); DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in infer_impl()"; LOG(ERROR) << "Failed get fluid core in infer_impl()";
return -1; return -1;
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -33,183 +33,135 @@ using paddle_infer::Predictor; ...@@ -33,183 +33,135 @@ using paddle_infer::Predictor;
using paddle_infer::Tensor; using paddle_infer::Tensor;
using paddle_infer::CreatePredictor; using paddle_infer::CreatePredictor;
// data interface const static int max_batch = 32;
class PaddleInfencceEngine { const static int min_subgraph_size = 3;
// Engine Base
class PaddleEngineBase {
public: public:
virtual ~FluidFamilyCore() {} virtual ~PaddleEngineBase() {}
virtual std::vector<std::string> GetInputNames() { virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames(); return _predictor -> GetInputNames();
} }
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) { virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name); return _predictor -> GetInputHandle(name);
} }
virtual std::vector<std::string> GetOutputNames() { virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames(); return _predictor -> GetOutputNames();
} }
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) { virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name); return _predictor -> GetOutputHandle(name);
} }
virtual bool Run() { virtual bool Run() {
if (!_core->Run()) { if (!_predictor -> Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor"; LOG(ERROR) << "Failed call Run with paddle predictor";
return false; return false;
} }
return true; return true;
} }
virtual int create(const predictor::InferEngineCreationParams& params) = 0; virtual int create(const configure::EngineDesc& conf) = 0;
virtual int clone(void* origin_core) { virtual int clone(void* predictor) {
if (origin_core == NULL) { if (predictor == NULL) {
LOG(ERROR) << "origin paddle Predictor is null."; LOG(ERROR) << "origin paddle Predictor is null.";
return -1; return -1;
} }
Predictor* p_predictor = (Predictor*)origin_core; Predictor* prep = static_cast<Predictor*>(predictor);
_core = p_predictor->Clone(); _predictor = prep -> Clone();
if (_core.get() == NULL) { if (_predictor.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; LOG(ERROR) << "fail to clone paddle predictor: " << predictor;
return -1; return -1;
} }
return 0; return 0;
} }
virtual void* get() { return _core.get(); } virtual void* get() { return _predictor.get(); }
protected: protected:
std::shared_ptr<Predictor> _core; std::shared_ptr<Predictor> _predictor;
}; };
// infer interface // Paddle Inference Engine
class FluidCpuAnalysisCore : public FluidFamilyCore { class PaddleInferenceEngine : public PaddleEngineBase {
public: public:
int create(const predictor::InferEngineCreationParams& params) { int create(const configure::EngineDesc& engine_conf) {
std::string data_path = params.get_path(); std::string model_path = engine_conf.model_dir();
if (access(data_path.c_str(), F_OK) == -1) { if (access(model_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: " LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path; << model_path;
return -1; return -1;
} }
Config config; Config config;
config.SetParamsFile(data_path + "/__params__"); // todo, auto config(zhangjun)
config.SetProgFile(data_path + "/__model__"); if(engine_conf.has_combined_model()) {
config.DisableGpu(); if(!engine_conf.combined_model()) {
config.SetCpuMathLibraryNumThreads(1); config.SetModel(model_path)
} else {
if (params.enable_memory_optimization()) { config.SetParamsFile(model_path + "/__params__");
config.EnableMemoryOptim(); config.SetProgFile(model_path + "/__model__");
}
} else {
config.SetParamsFile(model_path + "/__params__");
config.SetProgFile(model_path + "/__model__");
} }
config.SwitchSpecifyInputNames(true); config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance()); config.SetCpuMathLibraryNumThreads(1);
_core = CreatePredictor(config); if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
if (NULL == _core.get()) { // 2000MB GPU memory
LOG(ERROR) << "create paddle predictor failed, path: " << data_path; config.EnableUseGpu(2000, FLAGS_gpuid);
return -1;
} }
VLOG(2) << "create paddle predictor sucess, path: " << data_path; if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
return 0; config.EnableTensorRtEngine(1 << 20,
} max_batch,
}; min_subgraph_size,
Config::Precision::kFloat32,
class FluidCpuAnalysisDirCore : public FluidFamilyCore { false,
public: false);
int create(const predictor::InferEngineCreationParams& params) { LOG(INFO) << "create TensorRT predictor";
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
} }
Config config; if (engine_conf.has_lite() && engine_conf.use_lite()) {
config.SetModel(data_path); config.EnableLiteEngine(PrecisionType::kFloat32, true);
config.DisableGpu();
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
} }
if (params.enable_ir_optimization()) { if (engine_conf.has_xpu() && engine_conf.use_xpu()) {
config.SwitchIrOptim(true); // 2 MB l3 cache
} else { config.EnableXpu(2 * 1024 * 1024);
}
if (engine_conf.has_enable_ir_optimization() && !engine_conf.enable_ir_optimization()) {
config.SwitchIrOptim(false); config.SwitchIrOptim(false);
} else {
config.SwitchIrOptim(true);
} }
AutoLock lock(GlobalPaddleCreateMutex::instance()); if (engine_conf.has_enable_memory_optimization() && engine_conf.enable_memory_optimization()) {
_core = CreatePredictor(config); config.EnableMemoryOptim();
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
} }
VLOG(2) << "create paddle predictor sucess, path: " << data_path; if (false) {
return 0; // todo, encrypt model
} //analysis_config.SetModelBuffer();
};
class FluidCpuAnalysisEncryptCore : public FluidFamilyCore {
public:
void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path note exits: "
<< data_path;
return -1;
} }
std::string model_buffer, params_buffer, key_buffer;
ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
ReadBinaryFile(data_path + "key", &key_buffer);
VLOG(2) << "prepare for encryption model";
auto cipher = paddle::MakeCipher("");
std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
Config analysis_config;
// paddle::AnalysisConfig analysis_config;
analysis_config.SetModelBuffer(&real_model_buffer[0],
real_model_buffer.size(),
&real_params_buffer[0],
real_params_buffer.size());
analysis_config.DisableGpu();
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
analysis_config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
VLOG(2) << "decrypt model file sucess"; _predictor = CreatePredictor(config);
_core = CreatePredictor(analysis_config); if (NULL == _predictor.get()) {
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path; LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1; return -1;
} }
VLOG(2) << "create paddle predictor sucess, path: " << data_path; VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0; return 0;
} }
}; };
} // namespace fluid_cpu } // namespace inference
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
...@@ -17,26 +17,13 @@ ...@@ -17,26 +17,13 @@
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace fluid_cpu { namespace inference {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidCpuAnalysisCore>, ::baidu::paddle_serving::predictor::FluidInferEngine<PaddleInferenceEngine>,
::baidu::paddle_serving::predictor::InferEngine, ::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS"); "PADDLE_INFER");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( } // namespace inference
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidCpuAnalysisDirCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS_DIR");
#if 1
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidCpuAnalysisEncryptCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS_ENCRYPT");
#endif
} // namespace fluid_cpu
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册