From ed52619f4b4791fda71ff4b22f517e04c78bd3b8 Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Mon, 24 Aug 2020 02:37:41 +0000 Subject: [PATCH] add cpp zero copy run interface --- deploy/cpp_infer/include/config.h | 4 ++++ deploy/cpp_infer/include/ocr_det.h | 6 ++++-- deploy/cpp_infer/include/ocr_rec.h | 5 ++++- deploy/cpp_infer/src/main.cpp | 13 +++++++------ deploy/cpp_infer/src/ocr_det.cpp | 23 ++++++++++++++++------- deploy/cpp_infer/src/ocr_rec.cpp | 24 ++++++++++++++++-------- deploy/cpp_infer/tools/config.txt | 1 + 7 files changed, 52 insertions(+), 24 deletions(-) diff --git a/deploy/cpp_infer/include/config.h b/deploy/cpp_infer/include/config.h index c5257d8a..8db693b1 100644 --- a/deploy/cpp_infer/include/config.h +++ b/deploy/cpp_infer/include/config.h @@ -41,6 +41,8 @@ public: this->use_mkldnn = bool(stoi(config_map_["use_mkldnn"])); + this->use_zero_copy_run = bool(stoi(config_map_["use_zero_copy_run"])); + this->max_side_len = stoi(config_map_["max_side_len"]); this->det_db_thresh = stod(config_map_["det_db_thresh"]); @@ -68,6 +70,8 @@ public: bool use_mkldnn = false; + bool use_zero_copy_run = false; + int max_side_len = 960; double det_db_thresh = 0.3; diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h index ed2667ee..0308d07f 100644 --- a/deploy/cpp_infer/include/ocr_det.h +++ b/deploy/cpp_infer/include/ocr_det.h @@ -39,8 +39,8 @@ public: explicit DBDetector(const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, - const bool &use_mkldnn, const int &max_side_len, - const double &det_db_thresh, + const bool &use_mkldnn, const bool &use_zero_copy_run, + const int &max_side_len, const double &det_db_thresh, const double &det_db_box_thresh, const double &det_db_unclip_ratio, const bool &visualize) { @@ -49,6 +49,7 @@ public: this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; this->use_mkldnn_ = use_mkldnn; + this->use_zero_copy_run_ = use_zero_copy_run; this->max_side_len_ = max_side_len; @@ -75,6 +76,7 @@ private: int gpu_mem_ = 4000; int cpu_math_library_num_threads_ = 4; bool use_mkldnn_ = false; + bool use_zero_copy_run_ = false; int max_side_len_ = 960; diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h index 471aeb58..520f0f28 100644 --- a/deploy/cpp_infer/include/ocr_rec.h +++ b/deploy/cpp_infer/include/ocr_rec.h @@ -38,12 +38,14 @@ public: explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, - const bool &use_mkldnn, const string &label_path) { + const bool &use_mkldnn, const bool &use_zero_copy_run, + const string &label_path) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; this->use_mkldnn_ = use_mkldnn; + this->use_zero_copy_run_ = use_zero_copy_run; this->label_list_ = Utility::ReadDict(label_path); this->label_list_.push_back(" "); @@ -64,6 +66,7 @@ private: int gpu_mem_ = 4000; int cpu_math_library_num_threads_ = 4; bool use_mkldnn_ = false; + bool use_zero_copy_run_ = false; std::vector label_list_; diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index 27c98e5b..1dd33b30 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -48,14 +48,15 @@ int main(int argc, char **argv) { cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR); - DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id, - config.gpu_mem, config.cpu_math_library_num_threads, - config.use_mkldnn, config.max_side_len, config.det_db_thresh, - config.det_db_box_thresh, config.det_db_unclip_ratio, - config.visualize); + DBDetector det( + config.det_model_dir, config.use_gpu, config.gpu_id, config.gpu_mem, + config.cpu_math_library_num_threads, config.use_mkldnn, + config.use_zero_copy_run, config.max_side_len, config.det_db_thresh, + config.det_db_box_thresh, config.det_db_unclip_ratio, config.visualize); CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id, config.gpu_mem, config.cpu_math_library_num_threads, - config.use_mkldnn, config.char_list_file); + config.use_mkldnn, config.use_zero_copy_run, + config.char_list_file); auto start = std::chrono::system_clock::now(); std::vector>> boxes; diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index 36ffc9af..56fbace8 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -32,7 +32,7 @@ void DBDetector::LoadModel(const std::string &model_dir) { // false for zero copy tensor // true for commom tensor - config.SwitchUseFeedFetchOps(true); + config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_); // true for multiple input config.SwitchSpecifyInputNames(true); @@ -61,12 +61,21 @@ void DBDetector::Run(cv::Mat &img, this->permute_op_.Run(&resize_img, input.data()); // Inference. - paddle::PaddleTensor input_t; - input_t.shape = {1, 3, resize_img.rows, resize_img.cols}; - input_t.data = paddle::PaddleBuf(input.data(), input.size() * sizeof(float)); - input_t.dtype = PaddleDType::FLOAT32; - std::vector outputs; - this->predictor_->Run({input_t}, &outputs, 1); + if (this->use_zero_copy_run_) { + auto input_names = this->predictor_->GetInputNames(); + auto input_t = this->predictor_->GetInputTensor(input_names[0]); + input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); + input_t->copy_from_cpu(input.data()); + this->predictor_->ZeroCopyRun(); + } else { + paddle::PaddleTensor input_t; + input_t.shape = {1, 3, resize_img.rows, resize_img.cols}; + input_t.data = + paddle::PaddleBuf(input.data(), input.size() * sizeof(float)); + input_t.dtype = PaddleDType::FLOAT32; + std::vector outputs; + this->predictor_->Run({input_t}, &outputs, 1); + } std::vector out_data; auto output_names = this->predictor_->GetOutputNames(); diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp index 697f9e8e..a3486db4 100644 --- a/deploy/cpp_infer/src/ocr_rec.cpp +++ b/deploy/cpp_infer/src/ocr_rec.cpp @@ -40,13 +40,21 @@ void CRNNRecognizer::Run(std::vector>> boxes, this->permute_op_.Run(&resize_img, input.data()); // Inference. - paddle::PaddleTensor input_t; - input_t.shape = {1, 3, resize_img.rows, resize_img.cols}; - input_t.data = - paddle::PaddleBuf(input.data(), input.size() * sizeof(float)); - input_t.dtype = PaddleDType::FLOAT32; - std::vector outputs; - this->predictor_->Run({input_t}, &outputs, 1); + if (this->use_zero_copy_run_) { + auto input_names = this->predictor_->GetInputNames(); + auto input_t = this->predictor_->GetInputTensor(input_names[0]); + input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); + input_t->copy_from_cpu(input.data()); + this->predictor_->ZeroCopyRun(); + } else { + paddle::PaddleTensor input_t; + input_t.shape = {1, 3, resize_img.rows, resize_img.cols}; + input_t.data = + paddle::PaddleBuf(input.data(), input.size() * sizeof(float)); + input_t.dtype = PaddleDType::FLOAT32; + std::vector outputs; + this->predictor_->Run({input_t}, &outputs, 1); + } std::vector rec_idx; auto output_names = this->predictor_->GetOutputNames(); @@ -124,7 +132,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { // false for zero copy tensor // true for commom tensor - config.SwitchUseFeedFetchOps(true); + config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_); // true for multiple input config.SwitchSpecifyInputNames(true); diff --git a/deploy/cpp_infer/tools/config.txt b/deploy/cpp_infer/tools/config.txt index a049fc7d..40beea3a 100644 --- a/deploy/cpp_infer/tools/config.txt +++ b/deploy/cpp_infer/tools/config.txt @@ -4,6 +4,7 @@ gpu_id 0 gpu_mem 4000 cpu_math_library_num_threads 10 use_mkldnn 0 +use_zero_copy_run 1 # det config max_side_len 960 -- GitLab