#include #include #include "logger.h" #include "conf_parser.h" #include "predictor.h" Predictor::~Predictor() {} bool feed(paddle::PaddlePredictor *predictor, const std::vector &tensors) { LOG(INFO) << "Predictor::feed"; auto names = predictor->GetInputNames(); if (names.size() != tensors.size()) { LOG(WARNING) << "The given size " << tensors.size() << " is not equal to the required size " << names.size(); return false; } for (size_t i = 0; i < names.size(); ++i) { auto i_t = predictor->GetInputTensor(names[i]); i_t->Reshape(tensors[i].shape); i_t->SetLoD(tensors[i].lod); if (tensors[i].data.type == DataType::FLOAT32) { const float *temp = reinterpret_cast(tensors[i].data.data.get()); i_t->copy_from_cpu(temp); } else if (tensors[i].data.type == DataType::INT32) { const int32_t *temp = reinterpret_cast(tensors[i].data.data.get()); i_t->copy_from_cpu(temp); } else if (tensors[i].data.type == DataType::INT64) { const int64_t *temp = reinterpret_cast(tensors[i].data.data.get()); i_t->copy_from_cpu(temp); } else { LOG(ERROR) << "do not support current datatype"; return false; } } return true; } bool fetch(paddle::PaddlePredictor *predictor, std::vector &tensors) { LOG(INFO) << "Predictor::fetch"; auto names = predictor->GetOutputNames(); for (auto &name : names) { auto o_t = predictor->GetOutputTensor(name); std::vector s = o_t->shape(); Tensor out; out.shape = s; out.lod = o_t->lod(); int num = std::accumulate(s.begin(), s.end(), 1, std::multiplies()); if (o_t->type() == paddle::PaddleDType::FLOAT32) { out.data = DataBuf(DataType::FLOAT32, size_t(num)); float *p_data = reinterpret_cast(out.data.data.get()); o_t->copy_to_cpu(p_data); } else if (o_t->type() == paddle::PaddleDType::INT32) { out.data = DataBuf(DataType::INT32, size_t(num)); int32_t *p_data = reinterpret_cast(out.data.data.get()); o_t->copy_to_cpu(p_data); } else if (o_t->type() == paddle::PaddleDType::INT64) { out.data = DataBuf(DataType::INT64, size_t(num)); int64_t *p_data = reinterpret_cast(out.data.data.get()); o_t->copy_to_cpu(p_data); } else { LOG(ERROR) << "do no support current datatype"; return false; } tensors.push_back(out); } return true; } bool Predictor::predict(const std::vector &inputs, const std::vector &layers, std::vector &outputs) { LOG(INFO) << "Predictor::predict"; (void)layers; // 1. feed input if (!feed(_predictor.get(), inputs)) { return false; } // 2. execute inference if (!_predictor->ZeroCopyRun()) { LOG(WARNING) << "fail to execute predictor"; return false; } // 3. fetch output if (!fetch(_predictor.get(), outputs)) { return false; } return true; } bool check_shape(const std::vector> &datas, const std::vector> &shapes) { LOG(INFO) << "check_shape"; if (datas.size() != shapes.size()) { LOG(ERROR) << "datas size: " << datas.size() << " != " << "shapes size(): " << shapes.size(); return false; } for (size_t i = 0; i < datas.size(); ++i) { int count = 1; for (auto num : shapes[i]) { count *= num; } int data_size = static_cast(datas[i].size()); if (count != data_size) { LOG(ERROR) << "data[" << i << "] size " << data_size << " != " << "shape [" << i << "] size " << count; return false; } } return true; } bool feed(paddle::PaddlePredictor *predictor, const std::vector> &datas, const std::vector> &shapes) { LOG(INFO) << "Predictor::feed"; // 1. check input shape if (!check_shape(datas, shapes)) { return false; } // 2. check given input and required input auto names = predictor->GetInputNames(); if (names.size() != datas.size()) { LOG(WARNING) << "The given size " << datas.size() << " is not equal to the required size " << names.size(); return false; } // 3. feed for (size_t i = 0; i < names.size(); ++i) { auto i_t = predictor->GetInputTensor(names[i]); i_t->Reshape(shapes[i]); i_t->copy_from_cpu(datas[i].data()); } return true; } bool fetch(paddle::PaddlePredictor *predictor, std::vector> &datas, std::vector> &shapes) { LOG(INFO) << "Predictor::fetch"; auto names = predictor->GetOutputNames(); for (auto &name : names) { auto o_t = predictor->GetOutputTensor(name); std::vector s = o_t->shape(); shapes.push_back(s); int num = std::accumulate(s.begin(), s.end(), 1, std::multiplies()); std::vector data(num); o_t->copy_to_cpu(data.data()); datas.push_back(data); } return true; } bool Predictor::predict(const std::vector> &input_datas, const std::vector> &input_shapes, const std::vector &layers, std::vector> &output_datas, std::vector> &output_shapes) { LOG(INFO) << "Predictor::predict"; (void)layers; // 1. feed input if (!feed(_predictor.get(), input_datas, input_shapes)) { return false; } // 2. execute inference if (!_predictor->ZeroCopyRun()) { LOG(WARNING) << "fail to execute predictor"; return false; } // 3. fetch output if (!fetch(_predictor.get(), output_datas, output_shapes)) { return false; } return true; } void init_tensorrt(const ConfParser *conf, const std::string &prefix, AnalysisConfig &config) { LOG(INFO) << "Predictor::init_tensorrt()"; // 1. max_batch_size for tensorrt int max_batch_size = 1; if (!conf->get_int(prefix, "max_batch_size", max_batch_size)) { LOG(WARNING) << "fail to get max_batch_size from conf, set as 1"; } max_batch_size = std::max(1, max_batch_size); // 2. workspace_size for tensorrt int workspace_size = 0; if (!conf->get_int(prefix, "workspace_size", workspace_size)) { LOG(WARNING) << "fail to get workspace_size from conf, set as 0"; } workspace_size = std::max(0, workspace_size); // 3. min_subgraph_size for tensorrt int min_subgraph_size = 3; if (!conf->get_int(prefix, "min_subgraph_size", min_subgraph_size)) { LOG(WARNING) << "fail to get min_subgraph_size from conf, set as 3"; } min_subgraph_size = std::max(0, min_subgraph_size); config.EnableTensorRtEngine( workspace_size, max_batch_size, min_subgraph_size); } void init_anakin(const ConfParser *conf, const std::string &prefix, AnalysisConfig &config) { LOG(INFO) << "Predictor::init_anakin()"; // 1. max_batch_size for tensorrt int max_batch_size = 1; if (!conf->get_int(prefix, "max_batch_size", max_batch_size)) { LOG(WARNING) << "fail to get max_batch_size from conf, set as 1"; } max_batch_size = std::max(1, max_batch_size); std::map> anakin_max_input_dict; std::vector input_names; if (!conf->get_strings(prefix, "input_names", input_names)) { LOG(WARNING) << "fail to get input_names from conf"; } for (auto &n : input_names) { std::vector shape; if (!conf->get_ints(prefix, n, shape)) { LOG(WARNING) << "fail to get the shape of " + n; } else { anakin_max_input_dict[n] = shape; } } config.EnableAnakinEngine(max_batch_size, anakin_max_input_dict); config.pass_builder()->TurnOnDebug(); } void init_gpu(const ConfParser *conf, const std::string &prefix, int device, AnalysisConfig &config) { LOG(INFO) << "Predictor::init_gpu()"; // 1. GPU memeroy uint32_t gpu_memory_mb = 1024; if (!conf->get_uint(prefix, "gpu_memory_mb", gpu_memory_mb)) { LOG(WARNING) << "fail to get gpu_memory_mb from conf, set as 1024"; } config.EnableUseGpu(gpu_memory_mb, device); // 2. use_tensorrt std::string infer_engine; if (!conf->get_string(prefix, "infer_engine", infer_engine)) { LOG(WARNING) << "disable infer engine"; return; } else if (infer_engine == "tensorrt") { init_tensorrt(conf, prefix + "tensorrt_", config); } else if (infer_engine == "anakin") { init_anakin(conf, prefix + "anakin_", config); } else { LOG(WARNING) << "unknwon infer engine"; return; } } void init_cpu(const ConfParser *conf, const std::string &prefix, AnalysisConfig &config) { LOG(INFO) << "Predictor::init_cpu()"; config.DisableGpu(); // 1. cpu_math_library (such as mkl/openblas) num_threads int num_threads = 1; if (!conf->get_int(prefix, "num_threads", num_threads)) { LOG(WARNING) << "fail to get num_threads conf, set as 1"; } num_threads = std::max(1, num_threads); config.SetCpuMathLibraryNumThreads(num_threads); // 2. use_mkldnn int use_mkldnn = -1; if (conf->get_int(prefix, "use_mkldnn", use_mkldnn) && use_mkldnn > 0) { config.EnableMKLDNN(); } } bool init_model(const ConfParser *conf, const std::string &prefix, AnalysisConfig &config) { LOG(INFO) << "Predictor::init_model()"; std::string prog_file; if (!conf->get_string(prefix, "prog_file", prog_file)) { LOG(WARNING) << "fail to get prog_file from conf"; } std::string param_file; if (!conf->get_string(prefix, "param_file", param_file)) { LOG(WARNING) << "fail to get param_file from conf"; } if (!prog_file.empty() && !param_file.empty()) { if (!file_exist(prog_file)) { LOG(FATAL) << "file: " << prog_file << " is not exist"; return false; } if (!file_exist(param_file)) { LOG(FATAL) << "file: " << param_file << " is not exist"; return false; } config.SetModel(prog_file, param_file); return true; } std::string model_path; if (!conf->get_string(prefix, "model_path", model_path)) { LOG(FATAL) << "fail to get model_path from conf"; return false; } config.SetModel(model_path); return true; } void show_version_info() { static bool initialized = false; if (initialized) { return; } LOG(INFO) << "[date:" << __DATE__ << "]" << "[time:" << __TIME__ << "]"; LOG(INFO) << "paddle " << paddle::get_version(); initialized = true; } bool Predictor::init(const std::string &conf_file, const std::string &prefix) { LOG(INFO) << "Predictor::init()"; show_version_info(); std::unique_ptr config(new AnalysisConfig()); std::unique_ptr conf(new ConfParser()); if (!conf->init(conf_file)) { LOG(FATAL) << "fail to load conf file: " << conf_file; return false; } // 1. Debug if (!conf->get_int(prefix, "debug", _debug)) { _debug = -1; LOG(WARNING) << "fail to get debug from conf, set as -1"; } // 2. init model if (!init_model(conf.get(), prefix, *config.get())) { LOG(FATAL) << "fail to init model"; return false; } // 3. enable_ir_optim int ir_optim = -1; if (!conf->get_int(prefix, "enable_ir_optim", ir_optim)) { LOG(WARNING) << "fail to get enable_ir_optim from conf, set as false"; } config->SwitchIrOptim(ir_optim > 0); // 4. specify_input_name int sp_input = -1; if (!conf->get_int(prefix, "specify_input_name", sp_input)) { LOG(WARNING) << "fail to get specify_input_name from conf, set as false"; } config->SwitchSpecifyInputNames(sp_input > 0); // 5. use zerocopy config->SwitchUseFeedFetchOps(false); // 6. Device int device = -1; if (!conf->get_int(prefix, "device", device)) { LOG(WARNING) << "fail to get device from conf"; return false; } if (device < 0) { LOG(INFO) << "use cpu!"; init_cpu(conf.get(), prefix, *config.get()); } else { LOG(INFO) << "use gpu!"; init_gpu(conf.get(), prefix, device, *config.get()); } // 7. delete unused pass std::vector passes; if (conf->get_strings(prefix, "delete_pass", passes)) { for (auto &p : passes) { LOG(INFO) << "delete pass: " << p; config->pass_builder()->DeletePass(p); } } // 8. create predictor auto predictor = CreatePaddlePredictor(*config.get()); if (NULL == predictor) { LOG(ERROR) << "fail to create paddle predictor"; return false; } _predictor = std::move(predictor); return true; } bool Predictor::init_shared(Predictor *cls) { LOG(INFO) << "Predictor::init_shared"; this->_predictor = std::move(cls->_predictor->Clone()); if (NULL == this->_predictor) { LOG(ERROR) << "fail to clone paddle predictor"; return false; } return true; } ICNNPredict *Predictor::clone() { LOG(INFO) << "Predictor::clone"; Predictor *cls = new Predictor(); if (!cls->init_shared(this)) { LOG(FATAL) << "fail to call cls->init_shared"; delete cls; return NULL; } return cls; } ICNNPredict *create_cnnpredict(const std::string &conf_file, const std::string &prefix) { LOG(INFO) << "create_cnnpredict"; Predictor *predictor = new Predictor(); if (!predictor->init(conf_file, prefix)) { delete predictor; return NULL; } return predictor; }