From a3195cfc455bdbcb64ea6b790e3398aaa1cd1dd9 Mon Sep 17 00:00:00 2001 From: zhangjun Date: Wed, 10 Mar 2021 15:45:10 +0800 Subject: [PATCH] update --- core/configure/proto/server_configure.proto | 15 +- core/configure/tests/test_configure.cpp | 2 - core/predictor/common/utils.h | 10 + core/predictor/framework/infer.h | 190 ++++-------------- .../paddle/include/paddle_engine.h | 188 +++++++---------- paddle_inference/paddle/src/paddle_engine.cpp | 21 +- 6 files changed, 127 insertions(+), 299 deletions(-) diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto index ea03d44f..62537d01 100644 --- a/core/configure/proto/server_configure.proto +++ b/core/configure/proto/server_configure.proto @@ -20,7 +20,7 @@ message EngineDesc { required string type = 2; required string reloadable_meta = 3; required string reloadable_type = 4; - required string model_data_path = 5; + required string model_dir = 5; required int32 runtime_thread_num = 6; required int32 batch_infer_size = 7; required int32 enable_batch_align = 8; @@ -41,12 +41,13 @@ message EngineDesc { optional SparseParamServiceType sparse_param_service_type = 11; optional string sparse_param_service_table_name = 12; optional bool enable_memory_optimization = 13; - optional bool static_optimization = 14; - optional bool force_update_static_cache = 15; - optional bool enable_ir_optimization = 16; - optional bool use_trt = 17; - optional bool use_lite = 18; - optional bool use_xpu = 19; + optional bool enable_ir_optimization = 14; + optional bool use_trt = 15; + optional bool use_lite = 16; + optional bool use_xpu = 17; + optional bool use_gpu = 18; + optional bool combined_model = 19; + optional bool encrypted_model = 20; }; // model_toolkit conf diff --git a/core/configure/tests/test_configure.cpp b/core/configure/tests/test_configure.cpp index 9ef35acb..816085fa 100644 --- a/core/configure/tests/test_configure.cpp +++ b/core/configure/tests/test_configure.cpp @@ -69,8 +69,6 @@ int test_write_conf() { engine->set_sparse_param_service_type(EngineDesc::LOCAL); engine->set_sparse_param_service_table_name("local_kv"); engine->set_enable_memory_optimization(true); - engine->set_static_optimization(false); - engine->set_force_update_static_cache(false); int ret = baidu::paddle_serving::configure::write_proto_conf( &model_toolkit_conf, output_dir, model_toolkit_conf_file); diff --git a/core/predictor/common/utils.h b/core/predictor/common/utils.h index a1cdb0db..989b0951 100644 --- a/core/predictor/common/utils.h +++ b/core/predictor/common/utils.h @@ -148,6 +148,16 @@ class IsDerivedFrom { } }; +void ReadBinaryFile(const std::string& filename, std::string* contents) { + std::ifstream fin(filename, std::ios::in | std::ios::binary); + fin.seekg(0, std::ios::end); + contents->clear(); + contents->resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(contents->at(0)), contents->size()); + fin.close(); +} + } // namespace predictor } // namespace paddle_serving } // namespace baidu diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index 63003f0c..6c0e98e6 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -30,109 +30,28 @@ namespace predictor { using configure::ModelToolkitConf; -class InferEngineCreationParams { - public: - InferEngineCreationParams() { - _path = ""; - _enable_memory_optimization = false; - _enable_ir_optimization = false; - _static_optimization = false; - _force_update_static_cache = false; - _use_trt = false; - _use_lite = false; - _use_xpu = false; - } - - void set_path(const std::string& path) { _path = path; } - - void set_enable_memory_optimization(bool enable_memory_optimization) { - _enable_memory_optimization = enable_memory_optimization; - } - - void set_enable_ir_optimization(bool enable_ir_optimization) { - _enable_ir_optimization = enable_ir_optimization; - } - - void set_use_trt(bool use_trt) { _use_trt = use_trt; } - - void set_use_lite(bool use_lite) { _use_lite = use_lite; } - - void set_use_xpu(bool use_xpu) { _use_xpu = use_xpu; } - - bool enable_memory_optimization() const { - return _enable_memory_optimization; - } - - bool enable_ir_optimization() const { return _enable_ir_optimization; } - - bool use_trt() const { return _use_trt; } - - bool use_lite() const { return _use_lite; } - - bool use_xpu() const { return _use_xpu; } - - void set_static_optimization(bool static_optimization = false) { - _static_optimization = static_optimization; - } - - void set_force_update_static_cache(bool force_update_static_cache = false) { - _force_update_static_cache = force_update_static_cache; - } - - bool static_optimization() const { return _static_optimization; } - - bool force_update_static_cache() const { return _force_update_static_cache; } - - std::string get_path() const { return _path; } - - void dump() const { - LOG(INFO) << "InferEngineCreationParams: " - << "model_path = " << _path << ", " - << "enable_memory_optimization = " << _enable_memory_optimization - << ", " - << "enable_tensorrt = " << _use_trt << ", " - << "enable_lite = " << _use_lite << ", " - << "enable_xpu = " << _use_xpu << ", " - << "enable_ir_optimization = " << _enable_ir_optimization << ", " - << "static_optimization = " << _static_optimization << ", " - << "force_update_static_cache = " << _force_update_static_cache; - } - - private: - std::string _path; - bool _enable_memory_optimization; - bool _enable_ir_optimization; - bool _static_optimization; - bool _force_update_static_cache; - bool _use_trt; - bool _use_lite; - bool _use_xpu; -}; - class AutoLock { public: explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) { pthread_mutex_lock(&mutex); } - ~AutoLock() { pthread_mutex_unlock(&_mut); } private: pthread_mutex_t& _mut; }; -class GlobalPaddleCreateMutex { +class GlobalCreateMutex { public: pthread_mutex_t& mutex() { return _mut; } static pthread_mutex_t& instance() { - static GlobalPaddleCreateMutex gmutex; + static GlobalCreateMutex gmutex; return gmutex.mutex(); } private: - GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); } - + GlobalCreateMutex() { pthread_mutex_init(&_mut, NULL); } pthread_mutex_t _mut; }; @@ -180,7 +99,7 @@ class ReloadableInferEngine : public InferEngine { uint64_t last_revision; }; - virtual int load(const InferEngineCreationParams& params) = 0; + virtual int load(const configure::EngineDesc& conf) = 0; int proc_initialize_impl(const configure::EngineDesc& conf, bool version) { _reload_tag_file = conf.reloadable_meta(); @@ -190,47 +109,9 @@ class ReloadableInferEngine : public InferEngine { _infer_batch_size = conf.batch_infer_size(); _infer_batch_align = conf.enable_batch_align(); - bool enable_memory_optimization = false; - if (conf.has_enable_memory_optimization()) { - enable_memory_optimization = conf.enable_memory_optimization(); - } - - bool static_optimization = false; - if (conf.has_static_optimization()) { - static_optimization = conf.static_optimization(); - } - - bool force_update_static_cache = false; - if (conf.has_force_update_static_cache()) { - force_update_static_cache = conf.force_update_static_cache(); - } - - if (conf.has_enable_ir_optimization()) { - _infer_engine_params.set_enable_ir_optimization( - conf.enable_ir_optimization()); - } - - _infer_engine_params.set_path(_model_data_path); - if (enable_memory_optimization) { - _infer_engine_params.set_enable_memory_optimization(true); - _infer_engine_params.set_static_optimization(static_optimization); - _infer_engine_params.set_force_update_static_cache( - force_update_static_cache); - } - - if (conf.has_use_trt()) { - _infer_engine_params.set_use_trt(conf.use_trt()); - } - - if (conf.has_use_lite()) { - _infer_engine_params.set_use_lite(conf.use_lite()); - } - - if (conf.has_use_xpu()) { - _infer_engine_params.set_use_xpu(conf.use_xpu()); - } + _conf = conf; - if (!check_need_reload() || load(_infer_engine_params) != 0) { + if (!check_need_reload() || load(conf) != 0) { LOG(ERROR) << "Failed load model_data_path" << _model_data_path; return -1; } @@ -258,7 +139,6 @@ class ReloadableInferEngine : public InferEngine { if (_infer_thread_num > 0) { return 0; } - return thrd_initialize_impl(); } @@ -288,7 +168,7 @@ class ReloadableInferEngine : public InferEngine { } uint64_t version() const { return _version; } - + uint32_t thread_num() const { return _infer_thread_num; } private: @@ -350,7 +230,7 @@ class ReloadableInferEngine : public InferEngine { protected: std::string _model_data_path; - InferEngineCreationParams _infer_engine_params; + configure::EngineDesc _conf; private: std::string _reload_tag_file; @@ -389,25 +269,25 @@ class DBReloadableInferEngine : public ReloadableInferEngine { return ReloadableInferEngine::proc_initialize(conf, version); } - virtual int load(const InferEngineCreationParams& params) { + virtual int load(const configure::EngineDesc& conf) { if (_reload_vec.empty()) { return 0; } for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) { - if (load_data(_reload_vec[ti], params) != 0) { + if (load_data(_reload_vec[ti], conf) != 0) { LOG(ERROR) << "Failed reload engine model: " << ti; return -1; } } - LOG(WARNING) << "Succ load engine, path: " << params.get_path(); + LOG(WARNING) << "Succ load engine, path: " << conf.model_dir(); return 0; } int load_data(ModelData* md, - const InferEngineCreationParams& params) { + const configure::EngineDesc& conf) { uint32_t next_idx = (md->current_idx + 1) % 2; if (md->cores[next_idx]) { delete md->cores[next_idx]; @@ -415,9 +295,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine { md->cores[next_idx] = new (std::nothrow) EngineCore; - params.dump(); - if (!md->cores[next_idx] || md->cores[next_idx]->create(params) != 0) { - LOG(ERROR) << "Failed create model, path: " << params.get_path(); + //params.dump(); + if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) { + LOG(ERROR) << "Failed create model, path: " << conf.model_dir(); return -1; } md->current_idx = next_idx; @@ -428,9 +308,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine { // memory pool to be inited in non-serving-threads ModelData* md = new (std::nothrow) ModelData; - if (!md || load_data(md, _infer_engine_params) != 0) { + if (!md || load_data(md, _conf) != 0) { LOG(ERROR) << "Failed create thread data from " - << _infer_engine_params.get_path(); + << _infer_engine_params.model_dir(); return -1; } @@ -486,16 +366,16 @@ class CloneDBReloadableInferEngine return DBReloadableInferEngine::proc_initialize(conf, version); } - virtual int load(const InferEngineCreationParams& params) { + virtual int load(const configure::EngineDesc& conf) { // 加载进程级模型数据 if (!_pd || - DBReloadableInferEngine::load_data(_pd, params) != 0) { - LOG(ERROR) << "Failed to create common model from [" << params.get_path() + DBReloadableInferEngine::load_data(_pd, conf) != 0) { + LOG(ERROR) << "Failed to create common model from [" << params.model_dir() << "]."; return -1; } LOG(WARNING) << "Succ load common model[" << _pd->cores[_pd->current_idx] - << "], path[" << params.get_path() << "]."; + << "], path[" << conf.model_dir() << "]."; if (DBReloadableInferEngine::_reload_vec.empty()) { return 0; @@ -511,7 +391,7 @@ class CloneDBReloadableInferEngine } } - LOG(WARNING) << "Succ load clone model, path[" << params.get_path() << "]"; + LOG(WARNING) << "Succ load clone model, path[" << conf.model_dir() << "]"; return 0; } @@ -555,18 +435,18 @@ class CloneDBReloadableInferEngine _pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据 }; -template +template #ifdef WITH_TRT -class FluidInferEngine : public DBReloadableInferEngine { +class FluidInferEngine : public DBReloadableInferEngine { #else -class FluidInferEngine : public CloneDBReloadableInferEngine { +class FluidInferEngine : public CloneDBReloadableInferEngine { #endif public: // NOLINT FluidInferEngine() {} ~FluidInferEngine() {} std::vector GetInputNames() { - FluidFamilyCore* core = - DBReloadableInferEngine::get_core(); + PaddleInferenceCore* core = + DBReloadableInferEngine::get_core(); if (!core || !core->get()) { LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; } @@ -574,8 +454,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { } std::vector GetOutputNames() { - FluidFamilyCore* core = - DBReloadableInferEngine::get_core(); + PaddleInferenceCore* core = + DBReloadableInferEngine::get_core(); if (!core || !core->get()) { LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; } @@ -584,8 +464,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { std::unique_ptr GetInputHandle( const std::string& name) { - FluidFamilyCore* core = - DBReloadableInferEngine::get_core(); + PaddleInferenceCore* core = + DBReloadableInferEngine::get_core(); if (!core || !core->get()) { LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; } @@ -594,8 +474,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { std::unique_ptr GetOutputHandle( const std::string& name) { - FluidFamilyCore* core = - DBReloadableInferEngine::get_core(); + PaddleInferenceCore* core = + DBReloadableInferEngine::get_core(); if (!core || !core->get()) { LOG(ERROR) << "Failed get fluid core in GetOutputHandle()"; } @@ -603,8 +483,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { } int infer_impl() { - FluidFamilyCore* core = - DBReloadableInferEngine::get_core(); + PaddleInferenceCore* core = + DBReloadableInferEngine::get_core(); if (!core || !core->get()) { LOG(ERROR) << "Failed get fluid core in infer_impl()"; return -1; diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h index b5b602cd..c92641f6 100644 --- a/paddle_inference/paddle/include/paddle_engine.h +++ b/paddle_inference/paddle/include/paddle_engine.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -33,183 +33,135 @@ using paddle_infer::Predictor; using paddle_infer::Tensor; using paddle_infer::CreatePredictor; -// data interface -class PaddleInfencceEngine { +const static int max_batch = 32; +const static int min_subgraph_size = 3; +// Engine Base +class PaddleEngineBase { public: - virtual ~FluidFamilyCore() {} + virtual ~PaddleEngineBase() {} virtual std::vector GetInputNames() { - return _core->GetInputNames(); + return _predictor -> GetInputNames(); } virtual std::unique_ptr GetInputHandle(const std::string& name) { - return _core->GetInputHandle(name); + return _predictor -> GetInputHandle(name); } virtual std::vector GetOutputNames() { - return _core->GetOutputNames(); + return _predictor -> GetOutputNames(); } virtual std::unique_ptr GetOutputHandle(const std::string& name) { - return _core->GetOutputHandle(name); + return _predictor -> GetOutputHandle(name); } virtual bool Run() { - if (!_core->Run()) { + if (!_predictor -> Run()) { LOG(ERROR) << "Failed call Run with paddle predictor"; return false; } return true; } - virtual int create(const predictor::InferEngineCreationParams& params) = 0; + virtual int create(const configure::EngineDesc& conf) = 0; - virtual int clone(void* origin_core) { - if (origin_core == NULL) { + virtual int clone(void* predictor) { + if (predictor == NULL) { LOG(ERROR) << "origin paddle Predictor is null."; return -1; } - Predictor* p_predictor = (Predictor*)origin_core; - _core = p_predictor->Clone(); - if (_core.get() == NULL) { - LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; + Predictor* prep = static_cast(predictor); + _predictor = prep -> Clone(); + if (_predictor.get() == NULL) { + LOG(ERROR) << "fail to clone paddle predictor: " << predictor; return -1; } return 0; } - virtual void* get() { return _core.get(); } + virtual void* get() { return _predictor.get(); } protected: - std::shared_ptr _core; + std::shared_ptr _predictor; }; -// infer interface -class FluidCpuAnalysisCore : public FluidFamilyCore { +// Paddle Inference Engine +class PaddleInferenceEngine : public PaddleEngineBase { public: - int create(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { + int create(const configure::EngineDesc& engine_conf) { + std::string model_path = engine_conf.model_dir(); + if (access(model_path.c_str(), F_OK) == -1) { LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; + << model_path; return -1; } Config config; - config.SetParamsFile(data_path + "/__params__"); - config.SetProgFile(data_path + "/__model__"); - config.DisableGpu(); - config.SetCpuMathLibraryNumThreads(1); - - if (params.enable_memory_optimization()) { - config.EnableMemoryOptim(); + // todo, auto config(zhangjun) + if(engine_conf.has_combined_model()) { + if(!engine_conf.combined_model()) { + config.SetModel(model_path) + } else { + config.SetParamsFile(model_path + "/__params__"); + config.SetProgFile(model_path + "/__model__"); + } + } else { + config.SetParamsFile(model_path + "/__params__"); + config.SetProgFile(model_path + "/__model__"); } - + config.SwitchSpecifyInputNames(true); - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = CreatePredictor(config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; + config.SetCpuMathLibraryNumThreads(1); + if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) { + // 2000MB GPU memory + config.EnableUseGpu(2000, FLAGS_gpuid); } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - -class FluidCpuAnalysisDirCore : public FluidFamilyCore { - public: - int create(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; + + if (engine_conf.has_use_trt() && engine_conf.use_trt()) { + config.EnableTensorRtEngine(1 << 20, + max_batch, + min_subgraph_size, + Config::Precision::kFloat32, + false, + false); + LOG(INFO) << "create TensorRT predictor"; } - Config config; - config.SetModel(data_path); - config.DisableGpu(); - config.SwitchSpecifyInputNames(true); - config.SetCpuMathLibraryNumThreads(1); - - if (params.enable_memory_optimization()) { - config.EnableMemoryOptim(); + if (engine_conf.has_lite() && engine_conf.use_lite()) { + config.EnableLiteEngine(PrecisionType::kFloat32, true); } - if (params.enable_ir_optimization()) { - config.SwitchIrOptim(true); - } else { + if (engine_conf.has_xpu() && engine_conf.use_xpu()) { + // 2 MB l3 cache + config.EnableXpu(2 * 1024 * 1024); + } + if (engine_conf.has_enable_ir_optimization() && !engine_conf.enable_ir_optimization()) { config.SwitchIrOptim(false); + } else { + config.SwitchIrOptim(true); } - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = CreatePredictor(config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; + if (engine_conf.has_enable_memory_optimization() && engine_conf.enable_memory_optimization()) { + config.EnableMemoryOptim(); } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - -class FluidCpuAnalysisEncryptCore : public FluidFamilyCore { - public: - void ReadBinaryFile(const std::string& filename, std::string* contents) { - std::ifstream fin(filename, std::ios::in | std::ios::binary); - fin.seekg(0, std::ios::end); - contents->clear(); - contents->resize(fin.tellg()); - fin.seekg(0, std::ios::beg); - fin.read(&(contents->at(0)), contents->size()); - fin.close(); - } - - int create(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path note exits: " - << data_path; - return -1; + + if (false) { + // todo, encrypt model + //analysis_config.SetModelBuffer(); } - std::string model_buffer, params_buffer, key_buffer; - ReadBinaryFile(data_path + "encrypt_model", &model_buffer); - ReadBinaryFile(data_path + "encrypt_params", ¶ms_buffer); - ReadBinaryFile(data_path + "key", &key_buffer); - - VLOG(2) << "prepare for encryption model"; - - auto cipher = paddle::MakeCipher(""); - std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer); - std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer); - - Config analysis_config; - // paddle::AnalysisConfig analysis_config; - analysis_config.SetModelBuffer(&real_model_buffer[0], - real_model_buffer.size(), - &real_params_buffer[0], - real_params_buffer.size()); - analysis_config.DisableGpu(); - analysis_config.SetCpuMathLibraryNumThreads(1); - if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); - } - analysis_config.SwitchSpecifyInputNames(true); AutoLock lock(GlobalPaddleCreateMutex::instance()); - VLOG(2) << "decrypt model file sucess"; - _core = CreatePredictor(analysis_config); - if (NULL == _core.get()) { + _predictor = CreatePredictor(config); + if (NULL == _predictor.get()) { LOG(ERROR) << "create paddle predictor failed, path: " << data_path; return -1; } + VLOG(2) << "create paddle predictor sucess, path: " << data_path; return 0; } }; -} // namespace fluid_cpu +} // namespace inference } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/paddle/src/paddle_engine.cpp b/paddle_inference/paddle/src/paddle_engine.cpp index 30d9ea32..c9ae46eb 100644 --- a/paddle_inference/paddle/src/paddle_engine.cpp +++ b/paddle_inference/paddle/src/paddle_engine.cpp @@ -17,26 +17,13 @@ namespace baidu { namespace paddle_serving { -namespace fluid_cpu { +namespace inference { REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine, + ::baidu::paddle_serving::predictor::FluidInferEngine, ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_CPU_ANALYSIS"); + "PADDLE_INFER"); -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine< - FluidCpuAnalysisDirCore>, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_CPU_ANALYSIS_DIR"); - -#if 1 -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine< - FluidCpuAnalysisEncryptCore>, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_CPU_ANALYSIS_ENCRYPT"); -#endif -} // namespace fluid_cpu +} // namespace inference } // namespace paddle_serving } // namespace baidu -- GitLab