提交 ed52619f 编写于 作者: littletomatodonkey's avatar littletomatodonkey

add cpp zero copy run interface

上级 0a011e56
...@@ -41,6 +41,8 @@ public: ...@@ -41,6 +41,8 @@ public:
this->use_mkldnn = bool(stoi(config_map_["use_mkldnn"])); this->use_mkldnn = bool(stoi(config_map_["use_mkldnn"]));
this->use_zero_copy_run = bool(stoi(config_map_["use_zero_copy_run"]));
this->max_side_len = stoi(config_map_["max_side_len"]); this->max_side_len = stoi(config_map_["max_side_len"]);
this->det_db_thresh = stod(config_map_["det_db_thresh"]); this->det_db_thresh = stod(config_map_["det_db_thresh"]);
...@@ -68,6 +70,8 @@ public: ...@@ -68,6 +70,8 @@ public:
bool use_mkldnn = false; bool use_mkldnn = false;
bool use_zero_copy_run = false;
int max_side_len = 960; int max_side_len = 960;
double det_db_thresh = 0.3; double det_db_thresh = 0.3;
......
...@@ -39,8 +39,8 @@ public: ...@@ -39,8 +39,8 @@ public:
explicit DBDetector(const std::string &model_dir, const bool &use_gpu, explicit DBDetector(const std::string &model_dir, const bool &use_gpu,
const int &gpu_id, const int &gpu_mem, const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const int &max_side_len, const bool &use_mkldnn, const bool &use_zero_copy_run,
const double &det_db_thresh, const int &max_side_len, const double &det_db_thresh,
const double &det_db_box_thresh, const double &det_db_box_thresh,
const double &det_db_unclip_ratio, const double &det_db_unclip_ratio,
const bool &visualize) { const bool &visualize) {
...@@ -49,6 +49,7 @@ public: ...@@ -49,6 +49,7 @@ public:
this->gpu_mem_ = gpu_mem; this->gpu_mem_ = gpu_mem;
this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
this->use_mkldnn_ = use_mkldnn; this->use_mkldnn_ = use_mkldnn;
this->use_zero_copy_run_ = use_zero_copy_run;
this->max_side_len_ = max_side_len; this->max_side_len_ = max_side_len;
...@@ -75,6 +76,7 @@ private: ...@@ -75,6 +76,7 @@ private:
int gpu_mem_ = 4000; int gpu_mem_ = 4000;
int cpu_math_library_num_threads_ = 4; int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false; bool use_mkldnn_ = false;
bool use_zero_copy_run_ = false;
int max_side_len_ = 960; int max_side_len_ = 960;
......
...@@ -38,12 +38,14 @@ public: ...@@ -38,12 +38,14 @@ public:
explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu, explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu,
const int &gpu_id, const int &gpu_mem, const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const string &label_path) { const bool &use_mkldnn, const bool &use_zero_copy_run,
const string &label_path) {
this->use_gpu_ = use_gpu; this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id; this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem; this->gpu_mem_ = gpu_mem;
this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
this->use_mkldnn_ = use_mkldnn; this->use_mkldnn_ = use_mkldnn;
this->use_zero_copy_run_ = use_zero_copy_run;
this->label_list_ = Utility::ReadDict(label_path); this->label_list_ = Utility::ReadDict(label_path);
this->label_list_.push_back(" "); this->label_list_.push_back(" ");
...@@ -64,6 +66,7 @@ private: ...@@ -64,6 +66,7 @@ private:
int gpu_mem_ = 4000; int gpu_mem_ = 4000;
int cpu_math_library_num_threads_ = 4; int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false; bool use_mkldnn_ = false;
bool use_zero_copy_run_ = false;
std::vector<std::string> label_list_; std::vector<std::string> label_list_;
......
...@@ -48,14 +48,15 @@ int main(int argc, char **argv) { ...@@ -48,14 +48,15 @@ int main(int argc, char **argv) {
cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR); cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id, DBDetector det(
config.gpu_mem, config.cpu_math_library_num_threads, config.det_model_dir, config.use_gpu, config.gpu_id, config.gpu_mem,
config.use_mkldnn, config.max_side_len, config.det_db_thresh, config.cpu_math_library_num_threads, config.use_mkldnn,
config.det_db_box_thresh, config.det_db_unclip_ratio, config.use_zero_copy_run, config.max_side_len, config.det_db_thresh,
config.visualize); config.det_db_box_thresh, config.det_db_unclip_ratio, config.visualize);
CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id, CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id,
config.gpu_mem, config.cpu_math_library_num_threads, config.gpu_mem, config.cpu_math_library_num_threads,
config.use_mkldnn, config.char_list_file); config.use_mkldnn, config.use_zero_copy_run,
config.char_list_file);
auto start = std::chrono::system_clock::now(); auto start = std::chrono::system_clock::now();
std::vector<std::vector<std::vector<int>>> boxes; std::vector<std::vector<std::vector<int>>> boxes;
......
...@@ -32,7 +32,7 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -32,7 +32,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
// false for zero copy tensor // false for zero copy tensor
// true for commom tensor // true for commom tensor
config.SwitchUseFeedFetchOps(true); config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_);
// true for multiple input // true for multiple input
config.SwitchSpecifyInputNames(true); config.SwitchSpecifyInputNames(true);
...@@ -61,12 +61,21 @@ void DBDetector::Run(cv::Mat &img, ...@@ -61,12 +61,21 @@ void DBDetector::Run(cv::Mat &img,
this->permute_op_.Run(&resize_img, input.data()); this->permute_op_.Run(&resize_img, input.data());
// Inference. // Inference.
paddle::PaddleTensor input_t; if (this->use_zero_copy_run_) {
input_t.shape = {1, 3, resize_img.rows, resize_img.cols}; auto input_names = this->predictor_->GetInputNames();
input_t.data = paddle::PaddleBuf(input.data(), input.size() * sizeof(float)); auto input_t = this->predictor_->GetInputTensor(input_names[0]);
input_t.dtype = PaddleDType::FLOAT32; input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
std::vector<paddle::PaddleTensor> outputs; input_t->copy_from_cpu(input.data());
this->predictor_->Run({input_t}, &outputs, 1); this->predictor_->ZeroCopyRun();
} else {
paddle::PaddleTensor input_t;
input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
input_t.data =
paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
input_t.dtype = PaddleDType::FLOAT32;
std::vector<paddle::PaddleTensor> outputs;
this->predictor_->Run({input_t}, &outputs, 1);
}
std::vector<float> out_data; std::vector<float> out_data;
auto output_names = this->predictor_->GetOutputNames(); auto output_names = this->predictor_->GetOutputNames();
......
...@@ -40,13 +40,21 @@ void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes, ...@@ -40,13 +40,21 @@ void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes,
this->permute_op_.Run(&resize_img, input.data()); this->permute_op_.Run(&resize_img, input.data());
// Inference. // Inference.
paddle::PaddleTensor input_t; if (this->use_zero_copy_run_) {
input_t.shape = {1, 3, resize_img.rows, resize_img.cols}; auto input_names = this->predictor_->GetInputNames();
input_t.data = auto input_t = this->predictor_->GetInputTensor(input_names[0]);
paddle::PaddleBuf(input.data(), input.size() * sizeof(float)); input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
input_t.dtype = PaddleDType::FLOAT32; input_t->copy_from_cpu(input.data());
std::vector<paddle::PaddleTensor> outputs; this->predictor_->ZeroCopyRun();
this->predictor_->Run({input_t}, &outputs, 1); } else {
paddle::PaddleTensor input_t;
input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
input_t.data =
paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
input_t.dtype = PaddleDType::FLOAT32;
std::vector<paddle::PaddleTensor> outputs;
this->predictor_->Run({input_t}, &outputs, 1);
}
std::vector<int64_t> rec_idx; std::vector<int64_t> rec_idx;
auto output_names = this->predictor_->GetOutputNames(); auto output_names = this->predictor_->GetOutputNames();
...@@ -124,7 +132,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -124,7 +132,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
// false for zero copy tensor // false for zero copy tensor
// true for commom tensor // true for commom tensor
config.SwitchUseFeedFetchOps(true); config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_);
// true for multiple input // true for multiple input
config.SwitchSpecifyInputNames(true); config.SwitchSpecifyInputNames(true);
......
...@@ -4,6 +4,7 @@ gpu_id 0 ...@@ -4,6 +4,7 @@ gpu_id 0
gpu_mem 4000 gpu_mem 4000
cpu_math_library_num_threads 10 cpu_math_library_num_threads 10
use_mkldnn 0 use_mkldnn 0
use_zero_copy_run 1
# det config # det config
max_side_len 960 max_side_len 960
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册