提交 b97ade63 编写于 作者: W Wang Guibao 提交者: GitHub

Merge pull request #63 from wangguibao/memory_optimzation_switch

Add switch to make it configurable whether enable memory optimization
......@@ -40,6 +40,9 @@ message EngineDesc {
}
optional SparseParamServiceType sparse_param_service_type = 11;
optional string sparse_param_service_table_name = 12;
optional bool enable_memory_optimization = 13;
optional bool static_optimization = 14;
optional bool force_update_static_cache = 15;
};
// model_toolkit conf
......@@ -49,8 +52,7 @@ message ModelToolkitConf { repeated EngineDesc engines = 1; };
message ResourceConf {
required string model_toolkit_path = 1;
required string model_toolkit_file = 2;
optional string cube_config_path = 3;
optional string cube_config_file = 4;
optional string cube_config_file = 3;
};
// DAG node depency info
......
......@@ -68,6 +68,9 @@ int test_write_conf() {
engine->set_enable_batch_align(0);
engine->set_sparse_param_service_type(EngineDesc::LOCAL);
engine->set_sparse_param_service_table_name("local_kv");
engine->set_enable_memory_optimization(true);
engine->set_static_optimization(false);
engine->set_force_update_static_cache(false);
int ret = baidu::paddle_serving::configure::write_proto_conf(
&model_toolkit_conf, output_dir, model_toolkit_conf_file);
......@@ -79,6 +82,7 @@ int test_write_conf() {
ResourceConf resource_conf;
resource_conf.set_model_toolkit_path(output_dir);
resource_conf.set_model_toolkit_file("model_toolkit.prototxt");
resource_conf.set_cube_config_file("./conf/cube.conf");
ret = baidu::paddle_serving::configure::write_proto_conf(
&resource_conf, output_dir, resource_conf_file);
if (ret != 0) {
......
engines {
name: "image_classification_resnet"
type: "FLUID_CPU_NATIVE_DIR"
type: "FLUID_CPU_ANALYSIS_DIR"
reloadable_meta: "./data/model/paddle/fluid_time_file"
reloadable_type: "timestamp_ne"
model_data_path: "./data/model/paddle/fluid/SE_ResNeXt50_32x4d"
runtime_thread_num: 0
batch_infer_size: 0
enable_batch_align: 0
enable_memory_optimization: true
static_optimization: false
force_update_static_cache: false
}
engines {
name: "text_classification_bow"
type: "FLUID_CPU_ANALYSIS_DIR"
......
......@@ -128,6 +128,9 @@ engines {
enable_batch_align: 0
sparse_param_service_type: LOCAL
sparse_param_service_table_name: "local_kv"
enable_memory_optimization: true
static_optimization: false
force_update_static_cache: false
}
```
......@@ -175,6 +178,9 @@ Analysis API在模型加载过程中,会对模型计算逻辑进行多种优
|REMOTE|分布式大规模稀疏参数服务,以Cube作为引擎|
- sparse_param_service_table_name: 可选参数,大规模稀疏参数服务承载本模型所用参数的表名。
- enable_memory_optimization: bool类型,可选参数,是否启用内存优化。只在使用fluid Analysis预测API时有意义。需要说明的是,在GPU预测时,会执行显存优化
- static_optimization: bool类型,是否执行静态优化。只有当启用内存优化时有意义。
- force_update_static_cache: bool类型,是否强制更新静态优化cache。只有当启用内存优化时有意义。
## 5. 命令行配置参数
......
......@@ -93,7 +93,7 @@ class FluidFamilyCore {
return true;
}
virtual int create(const std::string& data_path) = 0;
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
......@@ -119,7 +119,8 @@ class FluidFamilyCore {
// infer interface
class FluidCpuAnalysisCore : public FluidFamilyCore {
public:
int create(const std::string& data_path) {
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -131,6 +132,12 @@ class FluidCpuAnalysisCore : public FluidFamilyCore {
analysis_config.SetProgFile(data_path + "/__model__");
analysis_config.DisableGpu();
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(),
params.force_update_static_cache());
}
analysis_config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
......@@ -147,7 +154,8 @@ class FluidCpuAnalysisCore : public FluidFamilyCore {
class FluidCpuNativeCore : public FluidFamilyCore {
public:
int create(const std::string& data_path) {
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -177,7 +185,8 @@ class FluidCpuNativeCore : public FluidFamilyCore {
class FluidCpuAnalysisDirCore : public FluidFamilyCore {
public:
int create(const std::string& data_path) {
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -189,6 +198,12 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.DisableGpu();
analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(),
params.force_update_static_cache());
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
......@@ -204,7 +219,8 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore {
class FluidCpuNativeDirCore : public FluidFamilyCore {
public:
int create(const std::string& data_path) {
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -380,7 +396,8 @@ class FluidCpuWithSigmoidCore : public FluidFamilyCore {
virtual ~FluidCpuWithSigmoidCore() {}
public:
int create(const std::string& model_path) {
int create(const predictor::InferEngineCreationParams& params) {
std::string model_path = params.get_path();
size_t pos = model_path.find_last_of("/\\");
std::string conf_path = model_path.substr(0, pos);
std::string conf_file = model_path.substr(pos);
......@@ -393,7 +410,9 @@ class FluidCpuWithSigmoidCore : public FluidFamilyCore {
_core.reset(new SigmoidFluidModel);
std::string fluid_model_data_path = conf.dnn_model_path();
int ret = load_fluid_model(fluid_model_data_path);
predictor::InferEngineCreationParams new_params(params);
new_params.set_path(fluid_model_data_path);
int ret = load_fluid_model(new_params);
if (ret < 0) {
LOG(ERROR) << "fail to load fluid model.";
return -1;
......@@ -442,7 +461,8 @@ class FluidCpuWithSigmoidCore : public FluidFamilyCore {
virtual SigmoidFluidModel* get() { return _core.get(); }
virtual int load_fluid_model(const std::string& data_path) = 0;
virtual int load_fluid_model(
const predictor::InferEngineCreationParams& params) = 0;
int softmax(float x, double& o) { // NOLINT
return _core->_sigmoid_core->softmax(x, o);
......@@ -454,7 +474,8 @@ class FluidCpuWithSigmoidCore : public FluidFamilyCore {
class FluidCpuNativeDirWithSigmoidCore : public FluidCpuWithSigmoidCore {
public:
int load_fluid_model(const std::string& data_path) {
int load_fluid_model(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -483,7 +504,8 @@ class FluidCpuNativeDirWithSigmoidCore : public FluidCpuWithSigmoidCore {
class FluidCpuAnalysisDirWithSigmoidCore : public FluidCpuWithSigmoidCore {
public:
int load_fluid_model(const std::string& data_path) {
int load_fluid_model(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -495,6 +517,12 @@ class FluidCpuAnalysisDirWithSigmoidCore : public FluidCpuWithSigmoidCore {
analysis_config.DisableGpu();
analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(),
params.force_update_static_cache());
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core->_fluid_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
......
......@@ -95,7 +95,7 @@ class FluidFamilyCore {
return true;
}
virtual int create(const std::string& data_path) = 0;
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
......@@ -121,7 +121,8 @@ class FluidFamilyCore {
// infer interface
class FluidGpuAnalysisCore : public FluidFamilyCore {
public:
int create(const std::string& data_path) {
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -133,7 +134,12 @@ class FluidGpuAnalysisCore : public FluidFamilyCore {
analysis_config.SetProgFile(data_path + "/__model__");
analysis_config.EnableUseGpu(100, FLAGS_gpuid);
analysis_config.SetCpuMathLibraryNumThreads(1);
analysis_config.EnableMemoryOptim(false, false);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(),
params.force_update_static_cache());
}
analysis_config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
......@@ -151,7 +157,8 @@ class FluidGpuAnalysisCore : public FluidFamilyCore {
class FluidGpuNativeCore : public FluidFamilyCore {
public:
int create(const std::string& data_path) {
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -180,7 +187,8 @@ class FluidGpuNativeCore : public FluidFamilyCore {
class FluidGpuAnalysisDirCore : public FluidFamilyCore {
public:
int create(const std::string& data_path) {
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -192,7 +200,11 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableUseGpu(100, FLAGS_gpuid);
analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1);
analysis_config.EnableMemoryOptim(false, false);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(),
params.force_update_static_cache());
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
......@@ -209,7 +221,8 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
class FluidGpuNativeDirCore : public FluidFamilyCore {
public:
int create(const std::string& data_path) {
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -385,7 +398,8 @@ class FluidGpuWithSigmoidCore : public FluidFamilyCore {
virtual ~FluidGpuWithSigmoidCore() {}
public:
int create(const std::string& model_path) {
int create(const predictor::InferEngineCreationParams& params) {
std::string model_path = params.get_path();
size_t pos = model_path.find_last_of("/\\");
std::string conf_path = model_path.substr(0, pos);
std::string conf_file = model_path.substr(pos);
......@@ -398,7 +412,9 @@ class FluidGpuWithSigmoidCore : public FluidFamilyCore {
_core.reset(new SigmoidFluidModel);
std::string fluid_model_data_path = conf.dnn_model_path();
int ret = load_fluid_model(fluid_model_data_path);
predictor::InferEngineCreationParams new_params(params);
new_params.set_path(fluid_model_data_path);
int ret = load_fluid_model(new_params);
if (ret < 0) {
LOG(ERROR) << "fail to load fluid model.";
return -1;
......@@ -447,7 +463,8 @@ class FluidGpuWithSigmoidCore : public FluidFamilyCore {
virtual SigmoidFluidModel* get() { return _core.get(); }
virtual int load_fluid_model(const std::string& data_path) = 0;
virtual int load_fluid_model(
const predictor::InferEngineCreationParams& params) = 0;
int softmax(float x, double& o) { // NOLINT
return _core->_sigmoid_core->softmax(x, o);
......@@ -459,7 +476,8 @@ class FluidGpuWithSigmoidCore : public FluidFamilyCore {
class FluidGpuNativeDirWithSigmoidCore : public FluidGpuWithSigmoidCore {
public:
int load_fluid_model(const std::string& data_path) {
int load_fluid_model(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -488,7 +506,8 @@ class FluidGpuNativeDirWithSigmoidCore : public FluidGpuWithSigmoidCore {
class FluidGpuAnalysisDirWithSigmoidCore : public FluidGpuWithSigmoidCore {
public:
int load_fluid_model(const std::string& data_path) {
int load_fluid_model(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
......@@ -500,7 +519,12 @@ class FluidGpuAnalysisDirWithSigmoidCore : public FluidGpuWithSigmoidCore {
analysis_config.EnableUseGpu(100, FLAGS_gpuid);
analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1);
analysis_config.EnableMemoryOptim(false, false);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(),
params.force_update_static_cache());
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core->_fluid_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
......
......@@ -29,6 +29,55 @@ namespace predictor {
using configure::ModelToolkitConf;
class InferEngineCreationParams {
public:
InferEngineCreationParams() {
_path = "";
_enable_memory_optimization = false;
_static_optimization = false;
_force_update_static_cache = false;
}
void set_path(const std::string& path) { _path = path; }
void set_enable_memory_optimization(bool enable_memory_optimization) {
_enable_memory_optimization = enable_memory_optimization;
}
bool enable_memory_optimization() const {
return _enable_memory_optimization;
}
void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization;
}
void set_force_update_static_cache(bool force_update_static_cache = false) {
_force_update_static_cache = force_update_static_cache;
}
bool static_optimization() const { return _static_optimization; }
bool force_update_static_cache() const { return _force_update_static_cache; }
std::string get_path() const { return _path; }
void dump() const {
LOG(INFO) << "InferEngineCreationParams: "
<< "model_path = " << _path << ", "
<< "enable_memory_optimization = " << _enable_memory_optimization
<< ", "
<< "static_optimization = " << _static_optimization << ", "
<< "force_update_static_cache = " << _force_update_static_cache;
}
private:
std::string _path;
bool _enable_memory_optimization;
bool _static_optimization;
bool _force_update_static_cache;
};
class InferEngine {
public:
virtual ~InferEngine() {}
......@@ -75,7 +124,7 @@ class ReloadableInferEngine : public InferEngine {
typedef im::bsf::Task<Tensor, Tensor> TaskT;
virtual int load(const std::string& data_path) = 0;
virtual int load(const InferEngineCreationParams& params) = 0;
int proc_initialize_impl(const configure::EngineDesc& conf, bool version) {
_reload_tag_file = conf.reloadable_meta();
......@@ -84,7 +133,31 @@ class ReloadableInferEngine : public InferEngine {
_infer_thread_num = conf.runtime_thread_num();
_infer_batch_size = conf.batch_infer_size();
_infer_batch_align = conf.enable_batch_align();
if (!check_need_reload() || load(_model_data_path) != 0) {
bool enable_memory_optimization = false;
if (conf.has_enable_memory_optimization()) {
enable_memory_optimization = conf.enable_memory_optimization();
}
bool static_optimization = false;
if (conf.has_static_optimization()) {
static_optimization = conf.static_optimization();
}
bool force_update_static_cache = false;
if (conf.has_force_update_static_cache()) {
force_update_static_cache = conf.force_update_static_cache();
}
_infer_engine_params.set_path(_model_data_path);
if (enable_memory_optimization) {
_infer_engine_params.set_enable_memory_optimization(true);
_infer_engine_params.set_static_optimization(static_optimization);
_infer_engine_params.set_force_update_static_cache(
force_update_static_cache);
}
if (!check_need_reload() || load(_infer_engine_params) != 0) {
LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
return -1;
}
......@@ -175,7 +248,7 @@ class ReloadableInferEngine : public InferEngine {
int reload() {
if (check_need_reload()) {
LOG(WARNING) << "begin reload model[" << _model_data_path << "].";
return load(_model_data_path);
return load(_infer_engine_params);
}
return 0;
}
......@@ -243,6 +316,7 @@ class ReloadableInferEngine : public InferEngine {
protected:
std::string _model_data_path;
InferEngineCreationParams _infer_engine_params;
private:
std::string _reload_tag_file;
......@@ -281,32 +355,35 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
return ReloadableInferEngine::proc_initialize(conf, version);
}
virtual int load(const std::string& model_data_dir) {
virtual int load(const InferEngineCreationParams& params) {
if (_reload_vec.empty()) {
return 0;
}
for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) {
if (load_data(_reload_vec[ti], model_data_dir) != 0) {
if (load_data(_reload_vec[ti], params) != 0) {
LOG(ERROR) << "Failed reload engine model: " << ti;
return -1;
}
}
LOG(WARNING) << "Succ load engine, path: " << model_data_dir;
LOG(WARNING) << "Succ load engine, path: " << params.get_path();
return 0;
}
int load_data(ModelData<EngineCore>* md, const std::string& data_path) {
int load_data(ModelData<EngineCore>* md,
const InferEngineCreationParams& params) {
uint32_t next_idx = (md->current_idx + 1) % 2;
if (md->cores[next_idx]) {
delete md->cores[next_idx];
}
md->cores[next_idx] = new (std::nothrow) EngineCore;
if (!md->cores[next_idx] || md->cores[next_idx]->create(data_path) != 0) {
LOG(ERROR) << "Failed create model, path: " << data_path;
params.dump();
if (!md->cores[next_idx] || md->cores[next_idx]->create(params) != 0) {
LOG(ERROR) << "Failed create model, path: " << params.get_path();
return -1;
}
md->current_idx = next_idx;
......@@ -321,8 +398,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
}
ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
if (!md || load_data(md, _model_data_path) != 0) {
LOG(ERROR) << "Failed create thread data from " << _model_data_path;
if (!md || load_data(md, _infer_engine_params) != 0) {
LOG(ERROR) << "Failed create thread data from "
<< _infer_engine_params.get_path();
return -1;
}
......@@ -383,17 +461,16 @@ class CloneDBReloadableInferEngine
return DBReloadableInferEngine<EngineCore>::proc_initialize(conf, version);
}
virtual int load(const std::string& model_data_dir) {
virtual int load(const InferEngineCreationParams& params) {
// 加载进程级模型数据
if (!_pd ||
DBReloadableInferEngine<EngineCore>::load_data(_pd, model_data_dir) !=
0) {
LOG(ERROR) << "Failed to create common model from [" << model_data_dir
DBReloadableInferEngine<EngineCore>::load_data(_pd, params) != 0) {
LOG(ERROR) << "Failed to create common model from [" << params.get_path()
<< "].";
return -1;
}
LOG(WARNING) << "Succ load common model[" << _pd->cores[_pd->current_idx]
<< "], path[" << model_data_dir << "].";
<< "], path[" << params.get_path() << "].";
if (DBReloadableInferEngine<EngineCore>::_reload_vec.empty()) {
return 0;
......@@ -409,7 +486,7 @@ class CloneDBReloadableInferEngine
}
}
LOG(WARNING) << "Succ load clone model, path[" << model_data_dir << "]";
LOG(WARNING) << "Succ load clone model, path[" << params.get_path() << "]";
return 0;
}
......
......@@ -110,12 +110,6 @@ int Resource::cube_initialize(const std::string& path,
}
int err = 0;
std::string cube_config_path = resource_conf.cube_config_path();
if (err != 0) {
LOG(ERROR) << "reade cube_config_path failed, path[" << path << "], file["
<< cube_config_path << "]";
return -1;
}
std::string cube_config_file = resource_conf.cube_config_file();
if (err != 0) {
LOG(ERROR) << "reade cube_config_file failed, path[" << path << "], file["
......@@ -124,8 +118,8 @@ int Resource::cube_initialize(const std::string& path,
}
err = CubeAPI::instance()->init(cube_config_file.c_str());
if (err != 0) {
LOG(ERROR) << "failed initialize cube, config: " << cube_config_path << "/"
<< cube_config_file << " error code : " << err;
LOG(ERROR) << "failed initialize cube, config: " << cube_config_file
<< " error code : " << err;
return -1;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册