From 36358fdf64339c7932114551f5f79beabaf086f9 Mon Sep 17 00:00:00 2001
From: andyjpaddle <jiangkaitao@baidu.com>
Date: Fri, 22 Apr 2022 09:08:01 +0000
Subject: [PATCH] update cpp infer for rec

---
 deploy/cpp_infer/include/args.h        |  2 ++
 deploy/cpp_infer/include/ocr_rec.h     | 11 +++++++++--
 deploy/cpp_infer/readme.md             |  2 ++
 deploy/cpp_infer/readme_ch.md          |  2 ++
 deploy/cpp_infer/src/args.cpp          |  2 ++
 deploy/cpp_infer/src/ocr_rec.cpp       | 21 ++++++++++++---------
 deploy/cpp_infer/src/paddleocr.cpp     |  3 ++-
 deploy/cpp_infer/src/preprocess_op.cpp | 23 ++++++++++++-----------
 8 files changed, 43 insertions(+), 23 deletions(-)
diff --git a/deploy/cpp_infer/include/args.h b/deploy/cpp_infer/include/args.h
index fe5d45f6..473ff25d 100644
--- a/deploy/cpp_infer/include/args.h
+++ b/deploy/cpp_infer/include/args.h
@@ -46,6 +46,8 @@ DECLARE_int32(cls_batch_num);
 DECLARE_string(rec_model_dir);
 DECLARE_int32(rec_batch_num);
 DECLARE_string(rec_char_dict_path);
+DECLARE_int32(rec_img_h);
+DECLARE_int32(rec_img_w);
 // forward related
 DECLARE_bool(det);
 DECLARE_bool(rec);
diff --git a/deploy/cpp_infer/include/ocr_rec.h b/deploy/cpp_infer/include/ocr_rec.h
index f634b6dc..30f8efa9 100644
--- a/deploy/cpp_infer/include/ocr_rec.h
+++ b/deploy/cpp_infer/include/ocr_rec.h
@@ -45,7 +45,8 @@ public:
                           const bool &use_mkldnn, const string &label_path,
                           const bool &use_tensorrt,
                           const std::string &precision,
-                          const int &rec_batch_num) {
+                          const int &rec_batch_num, const int &rec_img_h,
+                          const int &rec_img_w) {
     this->use_gpu_ = use_gpu;
     this->gpu_id_ = gpu_id;
     this->gpu_mem_ = gpu_mem;
@@ -54,6 +55,10 @@ public:
     this->use_tensorrt_ = use_tensorrt;
     this->precision_ = precision;
     this->rec_batch_num_ = rec_batch_num;
+    this->rec_img_h_ = rec_img_h;
+    this->rec_img_w_ = rec_img_w;
+    std::vector<int> rec_image_shape = {3, rec_img_h, rec_img_w};
+    this->rec_image_shape_ = rec_image_shape;
 
     this->label_list_ = Utility::ReadDict(label_path);
     this->label_list_.insert(this->label_list_.begin(),
@@ -86,7 +91,9 @@ private:
   bool use_tensorrt_ = false;
   std::string precision_ = "fp32";
   int rec_batch_num_ = 6;
-
+  int rec_img_h_ = 32;
+  int rec_img_w_ = 320;
+  std::vector<int> rec_image_shape_ = {3, rec_img_h_, rec_img_w_};
   // pre-process
   CrnnResizeImg resize_op_;
   Normalize normalize_op_;
diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md
index 4290fbb0..66c3a4c0 100644
--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
@@ -323,6 +323,8 @@ More parameters are as follows,
 |rec_model_dir|string|-|Address of recognition inference model|
 |rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|dictionary file|
 |rec_batch_num|int|6|batch size of recognition|
+|rec_img_h|int|32|image height of recognition|
+|rec_img_w|int|320|image width of recognition|
 
 * Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `rec_char_dict_path` and `rec_model_dir`.
 
diff --git a/deploy/cpp_infer/readme_ch.md b/deploy/cpp_infer/readme_ch.md
index 95b28397..47c7e032 100644
--- a/deploy/cpp_infer/readme_ch.md
+++ b/deploy/cpp_infer/readme_ch.md
@@ -336,6 +336,8 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 |rec_model_dir|string|-|识别模型inference model地址|
 |rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件|
 |rec_batch_num|int|6|识别模型batchsize|
+|rec_img_h|int|32|识别模型输入图像高度|
+|rec_img_w|int|320|识别模型输入图像宽度|
 
 
 * PaddleOCR也支持多语言的预测，更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分，如果希望进行多语言预测，只需将修改`rec_char_dict_path`（字典文件路径）以及`rec_model_dir`（inference模型路径）字段即可。
diff --git a/deploy/cpp_infer/src/args.cpp b/deploy/cpp_infer/src/args.cpp
index 82cfb54a..fe582367 100644
--- a/deploy/cpp_infer/src/args.cpp
+++ b/deploy/cpp_infer/src/args.cpp
@@ -47,6 +47,8 @@ DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
 DEFINE_int32(rec_batch_num, 6, "rec_batch_num.");
 DEFINE_string(rec_char_dict_path, "../../ppocr/utils/ppocr_keys_v1.txt",
               "Path of dictionary.");
+DEFINE_int32(rec_img_h, 32, "rec image height");
+DEFINE_int32(rec_img_w, 320, "rec image width");
 
 // ocr forward related
 DEFINE_bool(det, true, "Whether use det in forward.");
diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp
index 54ed3fef..f0adfcf0 100644
--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -39,7 +39,9 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
     auto preprocess_start = std::chrono::steady_clock::now();
     int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_);
     int batch_num = end_img_no - beg_img_no;
-    float max_wh_ratio = 0;
+    int imgH = this->rec_image_shape_[1];
+    int imgW = this->rec_image_shape_[2];
+    float max_wh_ratio = imgW * 1.0 / imgH;
     for (int ino = beg_img_no; ino < end_img_no; ino++) {
       int h = img_list[indices[ino]].rows;
       int w = img_list[indices[ino]].cols;
@@ -47,28 +49,28 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
       max_wh_ratio = max(max_wh_ratio, wh_ratio);
     }
 
-    int batch_width = 0;
+    int batch_width = imgW;
     std::vector<cv::Mat> norm_img_batch;
     for (int ino = beg_img_no; ino < end_img_no; ino++) {
       cv::Mat srcimg;
       img_list[indices[ino]].copyTo(srcimg);
       cv::Mat resize_img;
       this->resize_op_.Run(srcimg, resize_img, max_wh_ratio,
-                           this->use_tensorrt_);
+                           this->use_tensorrt_, this->rec_image_shape_);
       this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
                               this->is_scale_);
       norm_img_batch.push_back(resize_img);
       batch_width = max(resize_img.cols, batch_width);
     }
 
-    std::vector<float> input(batch_num * 3 * 32 * batch_width, 0.0f);
+    std::vector<float> input(batch_num * 3 * imgH * batch_width, 0.0f);
     this->permute_op_.Run(norm_img_batch, input.data());
     auto preprocess_end = std::chrono::steady_clock::now();
     preprocess_diff += preprocess_end - preprocess_start;
     // Inference.
     auto input_names = this->predictor_->GetInputNames();
     auto input_t = this->predictor_->GetInputHandle(input_names[0]);
-    input_t->Reshape({batch_num, 3, 32, batch_width});
+    input_t->Reshape({batch_num, 3, imgH, batch_width});
     auto inference_start = std::chrono::steady_clock::now();
     input_t->CopyFromCpu(input.data());
     this->predictor_->Run();
@@ -142,13 +144,14 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
         precision = paddle_infer::Config::Precision::kInt8;
       }
       config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
-
+      int imgH = this->rec_image_shape_[1];
+      int imgW = this->rec_image_shape_[2];
       std::map<std::string, std::vector<int>> min_input_shape = {
-          {"x", {1, 3, 32, 10}}, {"lstm_0.tmp_0", {10, 1, 96}}};
+          {"x", {1, 3, imgH, 10}}, {"lstm_0.tmp_0", {10, 1, 96}}};
       std::map<std::string, std::vector<int>> max_input_shape = {
-          {"x", {1, 3, 32, 2000}}, {"lstm_0.tmp_0", {1000, 1, 96}}};
+          {"x", {1, 3, imgH, 2000}}, {"lstm_0.tmp_0", {1000, 1, 96}}};
       std::map<std::string, std::vector<int>> opt_input_shape = {
-          {"x", {1, 3, 32, 320}}, {"lstm_0.tmp_0", {25, 1, 96}}};
+          {"x", {1, 3, imgH, imgW}}, {"lstm_0.tmp_0", {25, 1, 96}}};
 
       config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
                                     opt_input_shape);
diff --git a/deploy/cpp_infer/src/paddleocr.cpp b/deploy/cpp_infer/src/paddleocr.cpp
index 861461a0..9b297432 100644
--- a/deploy/cpp_infer/src/paddleocr.cpp
+++ b/deploy/cpp_infer/src/paddleocr.cpp
@@ -41,7 +41,8 @@ PaddleOCR::PaddleOCR() {
     this->recognizer_ = new CRNNRecognizer(
         FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
         FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_rec_char_dict_path,
-        FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num);
+        FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num,
+        FLAGS_rec_img_h, FLAGS_rec_img_w);
   }
 };
 
diff --git a/deploy/cpp_infer/src/preprocess_op.cpp b/deploy/cpp_infer/src/preprocess_op.cpp
index 14e8bd1d..fff49ba2 100644
--- a/deploy/cpp_infer/src/preprocess_op.cpp
+++ b/deploy/cpp_infer/src/preprocess_op.cpp
@@ -41,16 +41,17 @@ void Permute::Run(const cv::Mat *im, float *data) {
 }
 
 void PermuteBatch::Run(const std::vector<cv::Mat> imgs, float *data) {
-    for (int j = 0; j < imgs.size(); j ++){
-        int rh = imgs[j].rows;
-        int rw = imgs[j].cols;
-        int rc = imgs[j].channels();
-        for (int i = 0; i < rc; ++i) {
-            cv::extractChannel(imgs[j], cv::Mat(rh, rw, CV_32FC1, data + (j * rc + i) * rh * rw), i);
-        }
+  for (int j = 0; j < imgs.size(); j++) {
+    int rh = imgs[j].rows;
+    int rw = imgs[j].cols;
+    int rc = imgs[j].channels();
+    for (int i = 0; i < rc; ++i) {
+      cv::extractChannel(
+          imgs[j], cv::Mat(rh, rw, CV_32FC1, data + (j * rc + i) * rh * rw), i);
     }
+  }
 }
-    
+
 void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
                     const std::vector<float> &scale, const bool is_scale) {
   double e = 1.0;
@@ -101,8 +102,8 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
   imgC = rec_image_shape[0];
   imgH = rec_image_shape[1];
   imgW = rec_image_shape[2];
-    
-  imgW = int(32 * wh_ratio);
+
+  imgW = int(imgH * wh_ratio);
 
   float ratio = float(img.cols) / float(img.rows);
   int resize_w, resize_h;
@@ -111,7 +112,7 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
     resize_w = imgW;
   else
     resize_w = int(ceilf(imgH * ratio));
-    
+
   cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
              cv::INTER_LINEAR);
   cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
-- 
GitLab