Merge remote-tracking branch 'upstream/dygraph' into dy1

c2cf5351 · qq_25193841 · 0e408d3e · e97fc679 · c2cf5351 · c2cf5351
14 changed file
--- a/README_ch.md
+++ b/README_ch.md
@@ -8,7 +8,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
 - 静态图版本：develop分支
 **近期更新**
- 2021.1.4 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题，总数142个，每周一都会更新，欢迎大家持续关注。
+- 2021.1.11 [FAQ](./doc/doc_ch/FAQ.md)新增5个高频问题，总数147个，每周一都会更新，欢迎大家持续关注。
 - 2020.12.15 更新数据合成工具[Style-Text](./StyleText/README_ch.md)，可以批量合成大量与目标场景类似的图像，在多个场景验证，效果明显提升。
 - 2020.11.25 更新半自动标注工具[PPOCRLabel](./PPOCRLabel/README_ch.md)，辅助开发者高效完成标注任务，输出格式与PP-OCR训练任务完美衔接。
 - 2020.9.22 更新PP-OCR技术文章，https://arxiv.org/abs/2009.09941
@@ -102,7 +102,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
 - FAQ
    - [【精选】OCR精选10个问题](./doc/doc_ch/FAQ.md)
    - [【理论篇】OCR通用31个问题](./doc/doc_ch/FAQ.md)
-    - [【实战篇】PaddleOCR实战101个问题](./doc/doc_ch/FAQ.md)
+    - [【实战篇】PaddleOCR实战106个问题](./doc/doc_ch/FAQ.md)
 - [技术交流群](#欢迎加入PaddleOCR技术交流群)
 - [参考文献](./doc/doc_ch/reference.md)
 - [许可证书](#许可证书)

--- a/StyleText/README.md
+++ b/StyleText/README.md
@@ -52,7 +52,7 @@ If you save the model in another location, please modify the address of the mode
 ```
 bg_generator:
-  pretrain: style_text_rec/bg_generator
+  pretrain: style_text_models/bg_generator
 ...
 text_generator:
  pretrain: style_text_models/text_generator

--- a/configs/det/det_mv3_db.yml
+++ b/configs/det/det_mv3_db.yml
@@ -102,6 +102,7 @@ Train:
    drop_last: False
    batch_size_per_card: 16
    num_workers: 8
+    use_shared_memory: False
 Eval:
  dataset:
@@ -129,3 +130,4 @@ Eval:
    drop_last: False
    batch_size_per_card: 1 # must be 1
    num_workers: 8
+    use_shared_memory: False
\ No newline at end of file
--- a/configs/rec/rec_icdar15_train.yml
+++ b/configs/rec/rec_icdar15_train.yml
@@ -76,6 +76,7 @@ Train:
    batch_size_per_card: 256
    drop_last: True
    num_workers: 8
+    use_shared_memory: False
 Eval:
  dataset:
@@ -96,3 +97,4 @@ Eval:
    drop_last: False
    batch_size_per_card: 256
    num_workers: 4
+    use_shared_memory: False
--- a/deploy/cpp_infer/include/preprocess_op.h
+++ b/deploy/cpp_infer/include/preprocess_op.h
@@ -47,18 +47,20 @@ public:
 class ResizeImgType0 {
 public:
  virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len,
-                   float &ratio_h, float &ratio_w);
+                   float &ratio_h, float &ratio_w, bool use_tensorrt);
 };
 class CrnnResizeImg {
 public:
  virtual void Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
+                   bool use_tensorrt = false,
                   const std::vector<int> &rec_image_shape = {3, 32, 320});
 };
 class ClsResizeImg {
 public:
  virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
+                   bool use_tensorrt = false,
                   const std::vector<int> &rec_image_shape = {3, 48, 192});
 };

--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -77,7 +77,7 @@ int main(int argc, char **argv) {
  auto end = std::chrono::system_clock::now();
  auto duration =
      std::chrono::duration_cast<std::chrono::microseconds>(end - start);
-  std::cout << "Cost"
+  std::cout << "Cost  "
            << double(duration.count()) *
                   std::chrono::microseconds::period::num /
                   std::chrono::microseconds::period::den

--- a/deploy/cpp_infer/src/ocr_cls.cpp
+++ b/deploy/cpp_infer/src/ocr_cls.cpp
@@ -25,7 +25,7 @@ cv::Mat Classifier::Run(cv::Mat &img) {
  int index = 0;
  float wh_ratio = float(img.cols) / float(img.rows);
-  this->resize_op_.Run(img, resize_img, cls_image_shape);
+  this->resize_op_.Run(img, resize_img, this->use_tensorrt_, cls_image_shape);
  this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
                          this->is_scale_);

--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -61,7 +61,8 @@ void DBDetector::Run(cv::Mat &img,
  cv::Mat srcimg;
  cv::Mat resize_img;
  img.copyTo(srcimg);
-  this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w);
+  this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w,
+                       this->use_tensorrt_);
  this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
                          this->is_scale_);

--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -33,7 +33,7 @@ void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes,
    float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
-    this->resize_op_.Run(crop_img, resize_img, wh_ratio);
+    this->resize_op_.Run(crop_img, resize_img, wh_ratio, this->use_tensorrt_);
    this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
                            this->is_scale_);

--- a/deploy/cpp_infer/src/preprocess_op.cpp
+++ b/deploy/cpp_infer/src/preprocess_op.cpp
@@ -60,7 +60,8 @@ void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
 }
 void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
-                         int max_size_len, float &ratio_h, float &ratio_w) {
+                         int max_size_len, float &ratio_h, float &ratio_w,
+                         bool use_tensorrt) {
  int w = img.cols;
  int h = img.rows;
@@ -89,14 +90,19 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
    resize_w = 32;
  else
    resize_w = (resize_w / 32) * 32;
+  if (!use_tensorrt) {
    cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
    ratio_h = float(resize_h) / float(h);
    ratio_w = float(resize_w) / float(w);
+  } else {
+    cv::resize(img, resize_img, cv::Size(640, 640));
+    ratio_h = float(640) / float(h);
+    ratio_w = float(640) / float(w);
+  }
 }
 void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
+                        bool use_tensorrt,
                        const std::vector<int> &rec_image_shape) {
  int imgC, imgH, imgW;
  imgC = rec_image_shape[0];
@@ -111,12 +117,27 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
    resize_w = imgW;
  else
    resize_w = int(ceilf(imgH * ratio));
+  if (!use_tensorrt) {
    cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
               cv::INTER_LINEAR);
+    cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
+                       int(imgW - resize_img.cols), cv::BORDER_CONSTANT,
+                       {127, 127, 127});
+  } else {
+    int k = int(img.cols * 32 / img.rows);
+    if (k >= 100) {
+      cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f,
+                 cv::INTER_LINEAR);
+    } else {
+      cv::resize(img, resize_img, cv::Size(k, 32), 0.f, 0.f, cv::INTER_LINEAR);
+      cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(100 - k),
+                         cv::BORDER_CONSTANT, {127, 127, 127});
+    }
+  }
 }
 void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
+                       bool use_tensorrt,
                       const std::vector<int> &rec_image_shape) {
  int imgC, imgH, imgW;
  imgC = rec_image_shape[0];
@@ -130,12 +151,16 @@ void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
  else
    resize_w = int(ceilf(imgH * ratio));
+  if (!use_tensorrt) {
    cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
               cv::INTER_LINEAR);
    if (resize_w < imgW) {
      cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, imgW - resize_w,
                         cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
    }
+  } else {
+    cv::resize(img, resize_img, cv::Size(100, 32), 0.f, 0.f, cv::INTER_LINEAR);
+  }
 }
 } // namespace PaddleOCR
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -21,13 +21,13 @@ PaddleOCR开源的文本检测算法列表：
 |EAST|MobileNetV3|78.24%|79.15%|78.69%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)|
 |DB|ResNet50_vd|86.41%|78.72%|82.38%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
 |DB|MobileNetV3|77.29%|73.08%|75.12%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
-|SAST|ResNet50_vd|91.83%|81.80%|86.52%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
+|SAST|ResNet50_vd|91.39%|83.77%|87.42%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
 在Total-text文本检测公开数据集上，算法效果如下：
 |模型|骨干网络|precision|recall|Hmean|下载链接|
 | --- | --- | --- | --- | --- | --- |
-|SAST|ResNet50_vd|89.05%|76.80%|82.47%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)|
+|SAST|ResNet50_vd|89.63%|78.44%|83.66%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)|
 **说明：** SAST模型训练额外加入了icdar2013、icdar2017、COCO-Text、ArT等公开数据集进行调优。PaddleOCR用到的经过整理格式的英文公开数据集下载：[百度云地址](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (提取码: 2bpi)

--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -23,13 +23,13 @@ On the ICDAR2015 dataset, the text detection result is as follows:
 |EAST|MobileNetV3|78.24%|79.15%|78.69%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar)|
 |DB|ResNet50_vd|86.41%|78.72%|82.38%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
 |DB|MobileNetV3|77.29%|73.08%|75.12%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
-|SAST|ResNet50_vd|91.83%|81.80%|86.52%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
+|SAST|ResNet50_vd|91.39%|83.77%|87.42%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
 On Total-Text dataset, the text detection result is as follows:
 |Model|Backbone|precision|recall|Hmean|Download link|
 | --- | --- | --- | --- | --- | --- |
-|SAST|ResNet50_vd|89.05%|76.80%|82.47%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)|
+|SAST|ResNet50_vd|89.63%|78.44%|83.66%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_totaltext_v2.0_train.tar)|
 **Note：** Additional data, like icdar2013, icdar2017, COCO-Text, ArT, was added to the model training of SAST. Download English public dataset in organized format used by PaddleOCR from [Baidu Drive](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (download code: 2bpi).

--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -66,8 +66,10 @@ def build_dataloader(config, mode, device, logger):
    batch_size = loader_config['batch_size_per_card']
    drop_last = loader_config['drop_last']
    num_workers = loader_config['num_workers']
+    if 'use_shared_memory' in loader_config.keys():
-    use_shared_memory = False
+        use_shared_memory = loader_config['use_shared_memory']
+    else:
+        use_shared_memory = True
    if mode == "Train":
        #Distribute data to multiple cards
        batch_sampler = DistributedBatchSampler(
@@ -75,7 +77,6 @@ def build_dataloader(config, mode, device, logger):
            batch_size=batch_size,
            shuffle=False,
            drop_last=drop_last)
-        use_shared_memory = True
    else:
        #Distribute data to single card
        batch_sampler = BatchSampler(