diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index 0d273e34e49598a0fb2b8dd18c41c2d9654162a3..c059b9607ddc3996e52c43270be1b5cb1442c2b8 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -36,6 +36,19 @@ using baidu::paddle_serving::predictor::InferManager; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; int GeneralInferOp::inference() { + VLOG(2) << "Going to run inference"; + const std::vector pre_node_names = pre_names(); + if (pre_node_names.size() != 1) { + LOG(ERROR) << "This op(" << op_name() + << ") can only have one predecessor op, but received " + << pre_node_names.size(); + return -1; + } + if (InferManager::instance().infer( + engine_name().c_str())) { + return -1; + } + std::cout << "Infer Success" << std::endl; return 0; } DEFINE_OP(GeneralInferOp); diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 4e5a54675e4bd75b9019436d272d885bc8b7fe8f..20ab055761177fd17c2143c8347259c858ad79ef 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -20,6 +20,7 @@ #include "core/general-server/op/general_infer_helper.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" +#include "core/predictor/framework/resource.h" #include "core/util/include/timer.h" namespace baidu { @@ -32,6 +33,7 @@ using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::FeedInst; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; +using baidu::paddle_serving::predictor::InferManager; int conf_check(const Request *req, const std::shared_ptr &model_config) { @@ -71,6 +73,90 @@ int conf_check(const Request *req, int GeneralReaderOp::inference() { // reade request from client + // TODO: only support one engine here + std::string engine_name = "general_infer_0"; + const Request *req = dynamic_cast(get_request_message()); + uint64_t log_id = req->log_id(); + int input_var_num = 0; + std::vector elem_type; + std::vector elem_size; + std::vector capacity; + int var_num = req->insts(0).tensor_array_size(); + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + std::shared_ptr model_config = + resource.get_general_model_config(); + elem_type.resize(var_num); + elem_size.resize(var_num); + capacity.resize(var_num); + for (int i = 0; i < var_num; ++i) { + std::string tensor_name = model_config->_feed_name[i]; + std::cout << "START Tensor Name: " <> lod; + std::vector shape; + // get lod info here + if (req->insts(0).tensor_array(i).lod_size() > 0) { + lod.resize(1); + for (int k = 0; k < req->insts(0).tensor_array(i).lod_size(); ++k) { + lod[0].push_back(req->insts(0).tensor_array(i).lod(k)); + } + capacity[i] = 1; + for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { + int dim = req->insts(0).tensor_array(i).shape(k); + VLOG(2) << "(logid=" << log_id << ") shape for var[" << i + << "]: " << dim; + capacity[i] *= dim; + shape.push_back(dim); + } + VLOG(2) << "(logid=" << log_id << ") var[" << i + << "] is tensor, capacity: " << capacity[i]; + } + else { + capacity[i] = 1; + for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { + int dim = req->insts(0).tensor_array(i).shape(k); + VLOG(2) << "(logid=" << log_id << ") shape for var[" << i + << "]: " << dim; + capacity[i] *= dim; + shape.push_back(dim); + } + VLOG(2) << "(logid=" << log_id << ") var[" << i + << "] is tensor, capacity: " << capacity[i]; + } + lod_tensor->SetLoD(lod); + lod_tensor->Reshape(shape); + std::cout << "FINI Set Lod and Reshape, and elem type: "<< elem_type[i] << std::endl; + // insert data here + if (req->insts(0).tensor_array(i).elem_type() == 0) { + // TODO: Copy twice here, can optimize + int elem_num = req->insts(0).tensor_array(i).int64_data_size(); + std::vector data(elem_num); + int64_t* dst_ptr = data.data(); + for (int k = 0; k < elem_num; ++k) { + dst_ptr[k] = req->insts(0).tensor_array(i).int64_data(k); + } + lod_tensor->CopyFromCpu(dst_ptr); + } else if(req->insts(0).tensor_array(i).elem_type() == 1) { + int elem_num = req->insts(0).tensor_array(i).float_data_size(); + std::vector data(elem_num); + float* dst_ptr = data.data(); + for (int k = 0; k < elem_num; ++k) { + dst_ptr[k] = req->insts(0).tensor_array(i).float_data(k); + } + lod_tensor->CopyFromCpu(dst_ptr); + } else if(req->insts(0).tensor_array(i).elem_type()== 2) { + int elem_num = req->insts(0).tensor_array(i).int_data_size(); + std::vector data(elem_num); + int32_t* dst_ptr = data.data(); + for (int k = 0; k < elem_num; ++k) { + dst_ptr[k] = req->insts(0).tensor_array(i).int_data(k); + } + lod_tensor->CopyFromCpu(dst_ptr); + } + std::cout << "FINISH Tensor Name: " <(get_request_message()); + // response inst with only fetch_var_names + Response *res = mutable_data(); + baidu::paddle_serving::predictor::Resource &resource = + baidu::paddle_serving::predictor::Resource::instance(); + std::shared_ptr model_config = + resource.get_general_model_config(); + std::vector capacity(req->fetch_var_names_size(), 1); + std::string engine_name = "general_infer_0"; + ModelOutput *output = res->add_outputs(); + FetchInst *fetch_inst = output->add_insts(); + FetchInst *fetch_p = output->mutable_insts(0); + std::vector outs = InferManager::instance().GetOutputNames(engine_name.c_str()); + for (int i = 0; i < req->fetch_var_names_size(); ++i) { + Tensor *tensor = fetch_inst->add_tensor_array(); + std::string tensor_name = outs[i]; + auto lod_tensor = InferManager::instance().GetOutputHandle(engine_name.c_str(), tensor_name.c_str()); + std::vector shape = lod_tensor->shape(); + for (int k = 0; k < shape.size(); ++k) { + capacity[i] *= shape[k]; + tensor->add_shape(shape[k]); + } + auto dtype = lod_tensor->type(); + if (dtype == paddle::PaddleDType::INT64) { + std::vector datas(capacity[i]); + int64_t* data_ptr = datas.data(); + lod_tensor->CopyToCpu(data_ptr); + google::protobuf::RepeatedField tmp_data(data_ptr, data_ptr + capacity[i]); + tensor->mutable_int64_data()->Swap(&tmp_data); + } else if (dtype == paddle::PaddleDType::FLOAT32) { + std::vector datas(capacity[i]); + float* data_ptr = datas.data(); + lod_tensor->CopyToCpu(data_ptr); + google::protobuf::RepeatedField tmp_data(data_ptr, data_ptr + capacity[i]); + tensor->mutable_float_data()->Swap(&tmp_data); + } else if (dtype == paddle::PaddleDType::INT32) { + std::vector datas(capacity[i]); + int32_t* data_ptr = datas.data(); + lod_tensor->CopyToCpu(data_ptr); + google::protobuf::RepeatedField tmp_data(data_ptr, data_ptr + capacity[i]); + tensor->mutable_int_data()->Swap(&tmp_data); + } + std::vector> lod = lod_tensor->lod(); + if (lod.size() > 0) { + for (int j = 0; j < lod[0].size(); ++j) { + tensor->add_lod(lod[0][j]); + } + } + } return 0; } diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index 74a41f6e92d731eb74ca85312377375608a3e065..cc0cb8f6593f3d55538684571bcd16554925f95e 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -119,6 +119,8 @@ class InferEngine { virtual int thrd_finalize_impl() = 0; virtual int thrd_clear_impl() = 0; virtual int proc_finalize_impl() = 0; + virtual std::vector GetInputNames() = 0; + virtual std::vector GetOutputNames() = 0; virtual std::unique_ptr GetInputHandle(const std::string& name) = 0; virtual std::unique_ptr GetOutputHandle(const std::string& name) = 0; virtual int infer_impl() = 0; @@ -514,6 +516,22 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { public: // NOLINT FluidInferEngine() {} ~FluidInferEngine() {} + std::vector GetInputNames() { + FluidFamilyCore* core = DBReloadableInferEngine::get_core(); + if (!core || !core->get()) { + LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; + } + return core->GetInputNames(); + } + + std::vector GetOutputNames() { + FluidFamilyCore* core = DBReloadableInferEngine::get_core(); + if (!core || !core->get()) { + LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; + } + return core->GetOutputNames(); + } + std::unique_ptr GetInputHandle(const std::string& name) { FluidFamilyCore* core = DBReloadableInferEngine::get_core(); if (!core || !core->get()) { @@ -677,6 +695,20 @@ class VersionedInferEngine : public InferEngine { return engine->infer(); } + std::vector GetInputNames() { + InferEngine* engine = default_engine(); + if (!engine) { + LOG(WARNING) << "fail to get default engine"; + } + return engine->GetInputNames(); + } + std::vector GetOutputNames() { + InferEngine* engine = default_engine(); + if (!engine) { + LOG(WARNING) << "fail to get default engine"; + } + return engine->GetOutputNames(); + } std::unique_ptr GetInputHandle(const std::string& name) { InferEngine* engine = default_engine(); if (!engine) { @@ -718,6 +750,21 @@ class VersionedInferEngine : public InferEngine { return iter->second->infer(); } + std::vector GetInputNames(uint64_t version) { + auto iter = _versions.find(version); + if (iter == _versions.end()) { + LOG(ERROR) << "Not found version engine: " << version; + } + return iter->second->GetInputNames(); + } + + std::vector GetOutputNames(uint64_t version) { + auto iter = _versions.find(version); + if (iter == _versions.end()) { + LOG(ERROR) << "Not found version engine: " << version; + } + return iter->second->GetOutputNames(); + } std::unique_ptr GetInputHandle(uint64_t version, const std::string& name) { auto iter = _versions.find(version); @@ -867,6 +914,21 @@ class InferManager { } return it->second->infer(); } + + std::vector GetInputNames(const char* model_name) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetInputNames(); + } + std::vector GetOutputNames(const char* model_name) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetOutputNames(); + } std::unique_ptr GetInputHandle(const char* model_name, const std::string& name) { auto it = _map.find(model_name); if (it == _map.end()) { @@ -908,6 +970,22 @@ class InferManager { } return it->second->infer(version); } + std::vector GetInputNames(const char* model_name, uint64_t version) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetInputNames(version); + } + + std::vector GetOutputNames(const char* model_name, uint64_t version) { + auto it = _map.find(model_name); + if (it == _map.end()) { + LOG(WARNING) << "Cannot find engine in map, model name:" << model_name; + } + return it->second->GetOutputNames(version); + } + std::unique_ptr GetInputHandle(const char* model_name, uint64_t version, const std::string& name) { auto it = _map.find(model_name); if (it == _map.end()) { diff --git a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h index cceec8a8d8792fffe9a30d8114e662d6529374a8..159fc948318dcdf4bae047d9e41d95c54e7ac95a 100644 --- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h +++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h @@ -64,10 +64,18 @@ using paddle_infer::CreatePredictor; class FluidFamilyCore { public: virtual ~FluidFamilyCore() {} + virtual std::vector GetInputNames() { + return _core->GetInputNames(); + } + virtual std::unique_ptr GetInputHandle(const std::string& name) { return _core->GetInputHandle(name); } - + + virtual std::vector GetOutputNames() { + return _core->GetOutputNames(); + } + virtual std::unique_ptr GetOutputHandle(const std::string& name) { return _core->GetOutputHandle(name); } diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h index 3782c967823d07c23ba02e5ce0f388dc6b46e181..5e7efe4347ff0c53f4a86e4b55c400ca5d90b6fb 100644 --- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h +++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h @@ -61,31 +61,36 @@ class GlobalPaddleCreateMutex { pthread_mutex_t _mut; }; -class GlobalSigmoidCreateMutex { - public: - pthread_mutex_t& mutex() { return _mut; } - static pthread_mutex_t& instance() { - static GlobalSigmoidCreateMutex gmutex; - return gmutex.mutex(); - } - - private: - GlobalSigmoidCreateMutex() { pthread_mutex_init(&_mut, NULL); } - - pthread_mutex_t _mut; -}; +using paddle_infer::Config; +using paddle_infer::Predictor; +using paddle_infer::Tensor; +using paddle_infer::CreatePredictor; // data interface class FluidFamilyCore { public: virtual ~FluidFamilyCore() {} - virtual bool Run(const void* in_data, void* out_data) { - if (!_core->Run(*(std::vector*)in_data, - (std::vector*)out_data)) { + virtual std::vector GetInputNames() { + return _core->GetInputNames(); + } + + virtual std::unique_ptr GetInputHandle(const std::string& name) { + return _core->GetInputHandle(name); + } + + virtual std::vector GetOutputNames() { + return _core->GetOutputNames(); + } + + virtual std::unique_ptr GetOutputHandle(const std::string& name) { + return _core->GetOutputHandle(name); + } + + virtual bool Run() { + if (!_core->Run()) { LOG(ERROR) << "Failed call Run with paddle predictor"; return false; } - return true; } @@ -96,8 +101,8 @@ class FluidFamilyCore { LOG(ERROR) << "origin paddle Predictor is null."; return -1; } - paddle::PaddlePredictor* p_predictor = - (paddle::PaddlePredictor*)origin_core; + Predictor* p_predictor = + (Predictor*)origin_core; _core = p_predictor->Clone(); if (_core.get() == NULL) { LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; @@ -109,7 +114,7 @@ class FluidFamilyCore { virtual void* get() { return _core.get(); } protected: - std::unique_ptr _core; + std::shared_ptr _core; }; // infer interface @@ -123,51 +128,19 @@ class FluidGpuAnalysisCore : public FluidFamilyCore { return -1; } - paddle::AnalysisConfig analysis_config; - analysis_config.SetParamsFile(data_path + "/__params__"); - analysis_config.SetProgFile(data_path + "/__model__"); - analysis_config.EnableUseGpu(100, FLAGS_gpuid); - analysis_config.SetCpuMathLibraryNumThreads(1); + Config config; + config.SetParamsFile(data_path + "/__params__"); + config.SetProgFile(data_path + "/__model__"); + config.EnableUseGpu(100, FLAGS_gpuid); + config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); + config.EnableMemoryOptim(); } - analysis_config.SwitchSpecifyInputNames(true); - + config.SwitchSpecifyInputNames(true); AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = - paddle::CreatePaddlePredictor(analysis_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - -class FluidGpuNativeCore : public FluidFamilyCore { - public: - int create(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::NativeConfig native_config; - native_config.param_file = data_path + "/__params__"; - native_config.prog_file = data_path + "/__model__"; - native_config.use_gpu = true; - native_config.fraction_of_gpu_memory = 0.01; - native_config.device = FLAGS_gpuid; - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = paddle::CreatePaddlePredictor( - native_config); + _core = CreatePredictor(config); if (NULL == _core.get()) { LOG(ERROR) << "create paddle predictor failed, path: " << data_path; return -1; @@ -188,81 +161,39 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { return -1; } - paddle::AnalysisConfig analysis_config; - analysis_config.SetModel(data_path); - analysis_config.EnableUseGpu(1500, FLAGS_gpuid); - analysis_config.SwitchSpecifyInputNames(true); - analysis_config.SetCpuMathLibraryNumThreads(1); + Config config; + config.SetModel(data_path); + config.EnableUseGpu(1500, FLAGS_gpuid); + config.SwitchSpecifyInputNames(true); + config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); + config.EnableMemoryOptim(); } - -#if 0 // todo: support flexible shape - - int min_seq_len = 1; - int max_seq_len = 512; - int opt_seq_len = 128; - int head_number = 12; - int batch = 50; - - std::vector min_in_shape = {batch, min_seq_len, 1}; - std::vector max_in_shape = {batch, max_seq_len, 1}; - std::vector opt_in_shape = {batch, opt_seq_len, 1}; - - std::string input1_name = "src_text_a_ids"; - std::string input2_name = "pos_text_a_ids"; - std::string input3_name = "sent_text_a_ids"; - std::string input4_name = "stack_0.tmp_0"; - - std::map> min_input_shape = { - {input1_name, min_in_shape}, - {input2_name, min_in_shape}, - {input3_name, min_in_shape}, - {input4_name, {batch, head_number, min_seq_len, min_seq_len}}, - }; - - std::map> max_input_shape = { - {input1_name, max_in_shape}, - {input2_name, max_in_shape}, - {input3_name, max_in_shape}, - {input4_name, {batch, head_number, max_seq_len, max_seq_len}}, - }; - std::map> opt_input_shape = { - {input1_name, opt_in_shape}, - {input2_name, opt_in_shape}, - {input3_name, opt_in_shape}, - {input4_name, {batch, head_number, opt_seq_len, opt_seq_len}}, - }; - - analysis_config.SetTRTDynamicShapeInfo( - min_input_shape, max_input_shape, opt_input_shape); -#endif int max_batch = 32; int min_subgraph_size = 3; if (params.use_trt()) { - analysis_config.EnableTensorRtEngine( + config.EnableTensorRtEngine( 1 << 20, max_batch, min_subgraph_size, - paddle::AnalysisConfig::Precision::kFloat32, + Config::Precision::kFloat32, false, false); LOG(INFO) << "create TensorRT predictor"; } else { if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); + config.EnableMemoryOptim(); } if (params.enable_ir_optimization()) { - analysis_config.SwitchIrOptim(true); + config.SwitchIrOptim(true); } else { - analysis_config.SwitchIrOptim(false); + config.SwitchIrOptim(false); } } AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = - paddle::CreatePaddlePredictor(analysis_config); + _core = CreatePredictor(config); if (NULL == _core.get()) { LOG(ERROR) << "create paddle predictor failed, path: " << data_path; return -1; @@ -273,34 +204,6 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { } }; -class FluidGpuNativeDirCore : public FluidFamilyCore { - public: - int create(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::NativeConfig native_config; - native_config.model_dir = data_path; - native_config.use_gpu = true; - native_config.fraction_of_gpu_memory = 0.01; - native_config.device = FLAGS_gpuid; - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core = paddle::CreatePaddlePredictor( - native_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; class Parameter { public: @@ -383,214 +286,6 @@ class Parameter { float* _params; }; -class SigmoidModel { - public: - ~SigmoidModel() {} - int load(const char* sigmoid_w_file, - const char* sigmoid_b_file, - float exp_max, - float exp_min) { - AutoLock lock(GlobalSigmoidCreateMutex::instance()); - if (0 != _sigmoid_w.init(2, 1, sigmoid_w_file) || 0 != _sigmoid_w.load()) { - LOG(ERROR) << "load params sigmoid_w failed."; - return -1; - } - VLOG(2) << "load sigmoid_w [" << _sigmoid_w._params[0] << "] [" - << _sigmoid_w._params[1] << "]."; - if (0 != _sigmoid_b.init(2, 1, sigmoid_b_file) || 0 != _sigmoid_b.load()) { - LOG(ERROR) << "load params sigmoid_b failed."; - return -1; - } - VLOG(2) << "load sigmoid_b [" << _sigmoid_b._params[0] << "] [" - << _sigmoid_b._params[1] << "]."; - _exp_max_input = exp_max; - _exp_min_input = exp_min; - return 0; - } - - int softmax(float x, double& o) { // NOLINT - float _y0 = x * _sigmoid_w._params[0] + _sigmoid_b._params[0]; - float _y1 = x * _sigmoid_w._params[1] + _sigmoid_b._params[1]; - _y0 = (_y0 > _exp_max_input) - ? _exp_max_input - : ((_y0 < _exp_min_input) ? _exp_min_input : _y0); - _y1 = (_y1 > _exp_max_input) - ? _exp_max_input - : ((_y1 < _exp_min_input) ? _exp_min_input : _y1); - o = 1.0f / (1.0f + exp(_y0 - _y1)); - return 0; - } - - public: - Parameter _sigmoid_w; - Parameter _sigmoid_b; - float _exp_max_input; - float _exp_min_input; -}; - -class SigmoidFluidModel { - public: - int softmax(float x, double& o) { // NOLINT - return _sigmoid_core->softmax(x, o); - } // NOLINT - - std::unique_ptr Clone() { - std::unique_ptr clone_model; - clone_model.reset(new SigmoidFluidModel()); - clone_model->_sigmoid_core = _sigmoid_core; - clone_model->_fluid_core = _fluid_core->Clone(); - return std::move(clone_model); - } - - public: - std::unique_ptr _fluid_core; - std::shared_ptr _sigmoid_core; -}; - -class FluidGpuWithSigmoidCore : public FluidFamilyCore { - public: - virtual ~FluidGpuWithSigmoidCore() {} - - public: - int create(const predictor::InferEngineCreationParams& params) { - std::string model_path = params.get_path(); - size_t pos = model_path.find_last_of("/\\"); - std::string conf_path = model_path.substr(0, pos); - std::string conf_file = model_path.substr(pos); - configure::SigmoidConf conf; - if (configure::read_proto_conf(conf_path, conf_file, &conf) != 0) { - LOG(ERROR) << "failed load model path: " << model_path; - return -1; - } - - _core.reset(new SigmoidFluidModel); - - std::string fluid_model_data_path = conf.dnn_model_path(); - predictor::InferEngineCreationParams new_params(params); - new_params.set_path(fluid_model_data_path); - int ret = load_fluid_model(new_params); - if (ret < 0) { - LOG(ERROR) << "fail to load fluid model."; - return -1; - } - const char* sigmoid_w_file = conf.sigmoid_w_file().c_str(); - const char* sigmoid_b_file = conf.sigmoid_b_file().c_str(); - float exp_max = conf.exp_max_input(); - float exp_min = conf.exp_min_input(); - _core->_sigmoid_core.reset(new SigmoidModel); - LOG(INFO) << "create sigmoid core[" << _core->_sigmoid_core.get() - << "], use count[" << _core->_sigmoid_core.use_count() << "]."; - ret = _core->_sigmoid_core->load( - sigmoid_w_file, sigmoid_b_file, exp_max, exp_min); - if (ret < 0) { - LOG(ERROR) << "fail to load sigmoid model."; - return -1; - } - return 0; - } - - virtual bool Run(const void* in_data, void* out_data) { - if (!_core->_fluid_core->Run( - *(std::vector*)in_data, - (std::vector*)out_data)) { - LOG(ERROR) << "Failed call Run with paddle predictor"; - return false; - } - - return true; - } - - virtual int clone(SigmoidFluidModel* origin_core) { - if (origin_core == NULL) { - LOG(ERROR) << "origin paddle Predictor is null."; - return -1; - } - _core = origin_core->Clone(); - if (_core.get() == NULL) { - LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; - return -1; - } - LOG(INFO) << "clone sigmoid core[" << _core->_sigmoid_core.get() - << "] use count[" << _core->_sigmoid_core.use_count() << "]."; - return 0; - } - - virtual SigmoidFluidModel* get() { return _core.get(); } - - virtual int load_fluid_model( - const predictor::InferEngineCreationParams& params) = 0; - - int softmax(float x, double& o) { // NOLINT - return _core->_sigmoid_core->softmax(x, o); - } - - protected: - std::unique_ptr _core; -}; - -class FluidGpuNativeDirWithSigmoidCore : public FluidGpuWithSigmoidCore { - public: - int load_fluid_model(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::NativeConfig native_config; - native_config.model_dir = data_path; - native_config.use_gpu = true; - native_config.fraction_of_gpu_memory = 0.01; - native_config.device = FLAGS_gpuid; - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core->_fluid_core = - paddle::CreatePaddlePredictor( - native_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - -class FluidGpuAnalysisDirWithSigmoidCore : public FluidGpuWithSigmoidCore { - public: - int load_fluid_model(const predictor::InferEngineCreationParams& params) { - std::string data_path = params.get_path(); - if (access(data_path.c_str(), F_OK) == -1) { - LOG(ERROR) << "create paddle predictor failed, path not exits: " - << data_path; - return -1; - } - - paddle::AnalysisConfig analysis_config; - analysis_config.SetModel(data_path); - analysis_config.EnableUseGpu(100, FLAGS_gpuid); - analysis_config.SwitchSpecifyInputNames(true); - analysis_config.SetCpuMathLibraryNumThreads(1); - - if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(); - } - - AutoLock lock(GlobalPaddleCreateMutex::instance()); - _core->_fluid_core = - paddle::CreatePaddlePredictor(analysis_config); - if (NULL == _core.get()) { - LOG(ERROR) << "create paddle predictor failed, path: " << data_path; - return -1; - } - - VLOG(2) << "create paddle predictor sucess, path: " << data_path; - return 0; - } -}; - } // namespace fluid_gpu } // namespace paddle_serving } // namespace baidu diff --git a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp index 7447a417338a37716eff025721126e4c817408a6..c00ea8719414f5ac324ac62e3e36128ad6035f91 100644 --- a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp +++ b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp @@ -32,28 +32,6 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( ::baidu::paddle_serving::predictor::InferEngine, "FLUID_GPU_ANALYSIS_DIR"); -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine< - FluidGpuAnalysisDirWithSigmoidCore>, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_GPU_ANALYSIS_DIR_SIGMOID"); - -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_GPU_NATIVE"); - -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_GPU_NATIVE_DIR"); - -REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( - ::baidu::paddle_serving::predictor::FluidInferEngine< - FluidGpuNativeDirWithSigmoidCore>, - ::baidu::paddle_serving::predictor::InferEngine, - "FLUID_GPU_NATIVE_DIR_SIGMOID"); - } // namespace fluid_gpu } // namespace paddle_serving } // namespace baidu