add cpp zero copy run interface

ed52619f · littletomatodonkey · 0a011e56 · ed52619f · ed52619f · ed52619f
7 changed file
--- a/deploy/cpp_infer/include/config.h
+++ b/deploy/cpp_infer/include/config.h
@@ -41,6 +41,8 @@ public:
    this->use_mkldnn = bool(stoi(config_map_["use_mkldnn"]));
+    this->use_zero_copy_run = bool(stoi(config_map_["use_zero_copy_run"]));
    this->max_side_len = stoi(config_map_["max_side_len"]);
    this->det_db_thresh = stod(config_map_["det_db_thresh"]);
@@ -68,6 +70,8 @@ public:
  bool use_mkldnn = false;
+  bool use_zero_copy_run = false;
  int max_side_len = 960;
  double det_db_thresh = 0.3;

--- a/deploy/cpp_infer/include/ocr_det.h
+++ b/deploy/cpp_infer/include/ocr_det.h
@@ -39,8 +39,8 @@ public:
  explicit DBDetector(const std::string &model_dir, const bool &use_gpu,
                      const int &gpu_id, const int &gpu_mem,
                      const int &cpu_math_library_num_threads,
-                      const bool &use_mkldnn, const int &max_side_len,
+                      const bool &use_mkldnn, const bool &use_zero_copy_run,
-                      const double &det_db_thresh,
+                      const int &max_side_len, const double &det_db_thresh,
                      const double &det_db_box_thresh,
                      const double &det_db_unclip_ratio,
                      const bool &visualize) {
@@ -49,6 +49,7 @@ public:
    this->gpu_mem_ = gpu_mem;
    this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
    this->use_mkldnn_ = use_mkldnn;
+    this->use_zero_copy_run_ = use_zero_copy_run;
    this->max_side_len_ = max_side_len;
@@ -75,6 +76,7 @@ private:
  int gpu_mem_ = 4000;
  int cpu_math_library_num_threads_ = 4;
  bool use_mkldnn_ = false;
+  bool use_zero_copy_run_ = false;
  int max_side_len_ = 960;

--- a/deploy/cpp_infer/include/ocr_rec.h
+++ b/deploy/cpp_infer/include/ocr_rec.h
@@ -38,12 +38,14 @@ public:
  explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu,
                          const int &gpu_id, const int &gpu_mem,
                          const int &cpu_math_library_num_threads,
-                          const bool &use_mkldnn, const string &label_path) {
+                          const bool &use_mkldnn, const bool &use_zero_copy_run,
+                          const string &label_path) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
    this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
    this->use_mkldnn_ = use_mkldnn;
+    this->use_zero_copy_run_ = use_zero_copy_run;
    this->label_list_ = Utility::ReadDict(label_path);
    this->label_list_.push_back(" ");
@@ -64,6 +66,7 @@ private:
  int gpu_mem_ = 4000;
  int cpu_math_library_num_threads_ = 4;
  bool use_mkldnn_ = false;
+  bool use_zero_copy_run_ = false;
  std::vector<std::string> label_list_;

--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -48,14 +48,15 @@ int main(int argc, char **argv) {
  cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
-  DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id,
+  DBDetector det(
-                 config.gpu_mem, config.cpu_math_library_num_threads,
+      config.det_model_dir, config.use_gpu, config.gpu_id, config.gpu_mem,
-                 config.use_mkldnn, config.max_side_len, config.det_db_thresh,
+      config.cpu_math_library_num_threads, config.use_mkldnn,
-                 config.det_db_box_thresh, config.det_db_unclip_ratio,
+      config.use_zero_copy_run, config.max_side_len, config.det_db_thresh,
-                 config.visualize);
+      config.det_db_box_thresh, config.det_db_unclip_ratio, config.visualize);
  CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id,
                     config.gpu_mem, config.cpu_math_library_num_threads,
-                     config.use_mkldnn, config.char_list_file);
+                     config.use_mkldnn, config.use_zero_copy_run,
+                     config.char_list_file);
  auto start = std::chrono::system_clock::now();
  std::vector<std::vector<std::vector<int>>> boxes;

--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -32,7 +32,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
  // false for zero copy tensor
  // true for commom tensor
-  config.SwitchUseFeedFetchOps(true);
+  config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_);
  // true for multiple input
  config.SwitchSpecifyInputNames(true);
@@ -61,12 +61,21 @@ void DBDetector::Run(cv::Mat &img,
  this->permute_op_.Run(&resize_img, input.data());
  // Inference.
-  paddle::PaddleTensor input_t;
+  if (this->use_zero_copy_run_) {
-  input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
+    auto input_names = this->predictor_->GetInputNames();
-  input_t.data = paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
+    auto input_t = this->predictor_->GetInputTensor(input_names[0]);
-  input_t.dtype = PaddleDType::FLOAT32;
+    input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
-  std::vector<paddle::PaddleTensor> outputs;
+    input_t->copy_from_cpu(input.data());
-  this->predictor_->Run({input_t}, &outputs, 1);
+    this->predictor_->ZeroCopyRun();
+  } else {
+    paddle::PaddleTensor input_t;
+    input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
+    input_t.data =
+        paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
+    input_t.dtype = PaddleDType::FLOAT32;
+    std::vector<paddle::PaddleTensor> outputs;
+    this->predictor_->Run({input_t}, &outputs, 1);
+  }
  std::vector<float> out_data;
  auto output_names = this->predictor_->GetOutputNames();

--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -40,13 +40,21 @@ void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes,
    this->permute_op_.Run(&resize_img, input.data());
    // Inference.
-    paddle::PaddleTensor input_t;
+    if (this->use_zero_copy_run_) {
-    input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
+      auto input_names = this->predictor_->GetInputNames();
-    input_t.data =
+      auto input_t = this->predictor_->GetInputTensor(input_names[0]);
-        paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
+      input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
-    input_t.dtype = PaddleDType::FLOAT32;
+      input_t->copy_from_cpu(input.data());
-    std::vector<paddle::PaddleTensor> outputs;
+      this->predictor_->ZeroCopyRun();
-    this->predictor_->Run({input_t}, &outputs, 1);
+    } else {
+      paddle::PaddleTensor input_t;
+      input_t.shape = {1, 3, resize_img.rows, resize_img.cols};
+      input_t.data =
+          paddle::PaddleBuf(input.data(), input.size() * sizeof(float));
+      input_t.dtype = PaddleDType::FLOAT32;
+      std::vector<paddle::PaddleTensor> outputs;
+      this->predictor_->Run({input_t}, &outputs, 1);
+    }
    std::vector<int64_t> rec_idx;
    auto output_names = this->predictor_->GetOutputNames();
@@ -124,7 +132,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
  // false for zero copy tensor
  // true for commom tensor
-  config.SwitchUseFeedFetchOps(true);
+  config.SwitchUseFeedFetchOps(!this->use_zero_copy_run_);
  // true for multiple input
  config.SwitchSpecifyInputNames(true);

--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
@@ -4,6 +4,7 @@ gpu_id  0
 gpu_mem  4000
 cpu_math_library_num_threads  10
 use_mkldnn 0
+use_zero_copy_run 1
 # det config
 max_side_len  960