Merge remote-tracking branch 'origin/release/2.6' into release2.6

eba1286c · qq_25193841 · 974588ea · 4f49ea33 · eba1286c · eba1286c
31 changed file
--- a/README_ch.md
+++ b/README_ch.md
@@ -27,13 +27,6 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力

 ## 📣 近期更新

- 💼 **2022.9.5 飞桨智慧金融行业系列直播课** 
-  - PaddleOCR发布四大范例：印章弯曲文本检测与识别、扫描版合同关键信息抽取、通用卡证结构化信息提取、中文表格识别与属性分析
-  - 9月6日起每周二、周四19点直播，扫码免费加入微信群获取直播链接，与行业专家深度交流 
-    <div align="center">
-    <img src="https://user-images.githubusercontent.com/50011306/188440561-d99fce4f-f6ef-4ec0-be7e-47a70b91633a.jpg"  width = "150" height = "150" />
-    </div>
-
 - **🔥2022.8.24 发布 PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)**

  - 发布[PP-Structurev2](./ppstructure/README_ch.md)，系统功能性能全面升级，适配中文场景，新增支持[版面复原](./ppstructure/recovery/README_ch.md)，支持**一行命令完成PDF转Word**；
@@ -83,9 +76,10 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 <a name="开源社区"></a>
 ## 👫 开源社区
 - **📑项目合作：** 如果您是企业开发者且有明确的OCR垂类应用需求，填写[问卷](https://paddle.wjx.cn/vj/QwF7GKw.aspx)后可免费与官方团队展开不同层次的合作。
- **👫加入社区：** 微信扫描二维码并填写问卷之后，加入交流群领取福利
-  - **获取PaddleOCR最新发版解说《OCR超强技术详解与产业应用实战》系列直播课回放链接**
-  - **10G重磅OCR学习大礼包：**《动手学OCR》电子书，配套讲解视频和notebook项目；66篇OCR相关顶会前沿论文打包放送，包括CVPR、AAAI、IJCAI、ICCV等；PaddleOCR历次发版直播课视频；OCR社区优秀开发者项目分享视频。
+- **👫加入社区：** 微信扫描二维码并填写问卷之后，加入交流群领取20G重磅OCR学习大礼包
+  - **包括《动手学OCR》电子书** ，配套讲解视频和notebook项目；PaddleOCR历次发版直播课视频；
+  - **OCR场景应用模型集合：** 包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等垂类模型，覆盖通用，制造、金融、交通行业的主要OCR垂类应用。
+  - PDF2Word应用程序；OCR社区优秀开发者项目分享视频。
 - **🏅️社区项目**：[社区项目](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等，是官方为社区开发者打造的荣誉墙，也是帮助优质项目宣传的广播站。                                                                                                                      
 - **🎁社区常规赛**：社区常规赛是面向OCR开发者的积分赛事，覆盖文档、代码、模型和应用四大类型，以季度为单位评选并发放奖励，赛题详情与报名方法可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)。


--- a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml
+++ b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml
@@ -88,6 +88,7 @@ Train:
        prob: 0.5
        ext_data_num: 2
        image_shape: [48, 320, 3]
+        max_text_length: *max_text_length
    - RecAug:
    - MultiLabelEncode:
    - RecResizeImg:

--- a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml
+++ b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml
@@ -162,6 +162,7 @@ Train:
        prob: 0.5
        ext_data_num: 2
        image_shape: [48, 320, 3]
+        max_text_length: *max_text_length
    - RecAug:
    - MultiLabelEncode:
    - RecResizeImg:

--- a/configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml
+++ b/configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml
@@ -88,6 +88,7 @@ Train:
        prob: 0.5
        ext_data_num: 2
        image_shape: [48, 320, 3]
+        max_text_length: *max_text_length
    - RecAug:
    - MultiLabelEncode:
    - RecResizeImg:

--- a/deploy/cpp_infer/include/args.h
+++ b/deploy/cpp_infer/include/args.h
@@ -54,6 +54,7 @@ DECLARE_string(table_model_dir);
 DECLARE_int32(table_max_len);
 DECLARE_int32(table_batch_num);
 DECLARE_string(table_char_dict_path);
+DECLARE_bool(merge_no_span_structure);
 // forward related
 DECLARE_bool(det);
 DECLARE_bool(rec);

--- a/deploy/cpp_infer/include/paddlestructure.h
+++ b/deploy/cpp_infer/include/paddlestructure.h
@@ -54,15 +54,12 @@ private:
             std::vector<double> &time_info_det,
             std::vector<double> &time_info_rec,
             std::vector<double> &time_info_cls);
-  std::string
-  rebuild_table(std::vector<std::string> rec_html_tags,
-                std::vector<std::vector<std::vector<int>>> rec_boxes,
-                std::vector<OCRPredictResult> &ocr_result);
+  std::string rebuild_table(std::vector<std::string> rec_html_tags,
+                            std::vector<std::vector<int>> rec_boxes,
+                            std::vector<OCRPredictResult> &ocr_result);

-  float iou(std::vector<std::vector<int>> &box1,
-            std::vector<std::vector<int>> &box2);
-  float dis(std::vector<std::vector<int>> &box1,
-            std::vector<std::vector<int>> &box2);
+  float iou(std::vector<int> &box1, std::vector<int> &box2);
+  float dis(std::vector<int> &box1, std::vector<int> &box2);

  static bool comparison_dis(const std::vector<float> &dis1,
                             const std::vector<float> &dis2) {

--- a/deploy/cpp_infer/include/postprocess_op.h
+++ b/deploy/cpp_infer/include/postprocess_op.h
@@ -92,14 +92,13 @@ private:

 class TablePostProcessor {
 public:
-  void init(std::string label_path);
-  void
-  Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs,
-      std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
-      std::vector<int> &structure_probs_shape,
-      std::vector<std::vector<std::string>> &rec_html_tag_batch,
-      std::vector<std::vector<std::vector<std::vector<int>>>> &rec_boxes_batch,
-      std::vector<int> &width_list, std::vector<int> &height_list);
+  void init(std::string label_path, bool merge_no_span_structure = true);
+  void Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs,
+           std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
+           std::vector<int> &structure_probs_shape,
+           std::vector<std::vector<std::string>> &rec_html_tag_batch,
+           std::vector<std::vector<std::vector<int>>> &rec_boxes_batch,
+           std::vector<int> &width_list, std::vector<int> &height_list);

 private:
  std::vector<std::string> label_list_;

--- a/deploy/cpp_infer/include/structure_table.h
+++ b/deploy/cpp_infer/include/structure_table.h
@@ -44,7 +44,8 @@ public:
      const int &gpu_mem, const int &cpu_math_library_num_threads,
      const bool &use_mkldnn, const string &label_path,
      const bool &use_tensorrt, const std::string &precision,
-      const int &table_batch_num, const int &table_max_len) {
+      const int &table_batch_num, const int &table_max_len,
+      const bool &merge_no_span_structure) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
@@ -55,7 +56,7 @@ public:
    this->table_batch_num_ = table_batch_num;
    this->table_max_len_ = table_max_len;

-    this->post_processor_.init(label_path);
+    this->post_processor_.init(label_path, merge_no_span_structure);
    LoadModel(model_dir);
  }

@@ -65,7 +66,7 @@ public:
  void Run(std::vector<cv::Mat> img_list,
           std::vector<std::vector<std::string>> &rec_html_tags,
           std::vector<float> &rec_scores,
-           std::vector<std::vector<std::vector<std::vector<int>>>> &rec_boxes,
+           std::vector<std::vector<std::vector<int>>> &rec_boxes,
           std::vector<double> &times);

 private:

--- a/deploy/cpp_infer/include/utility.h
+++ b/deploy/cpp_infer/include/utility.h
@@ -42,6 +42,7 @@ struct OCRPredictResult {

 struct StructurePredictResult {
  std::vector<int> box;
+  std::vector<std::vector<int>> cell_box;
  std::string type;
  std::vector<OCRPredictResult> text_res;
  std::string html;
@@ -56,6 +57,10 @@ public:
                              const std::vector<OCRPredictResult> &ocr_result,
                              const std::string &save_path);

+  static void VisualizeBboxes(const cv::Mat &srcimg,
+                              const StructurePredictResult &structure_result,
+                              const std::string &save_path);
+
  template <class ForwardIterator>
  inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
    return std::distance(first, std::max_element(first, last));
@@ -81,6 +86,9 @@ public:

  static void sorted_boxes(std::vector<OCRPredictResult> &ocr_result);

+  static std::vector<int> xyxyxyxy2xyxy(std::vector<std::vector<int>> &box);
+  static std::vector<int> xyxyxyxy2xyxy(std::vector<int> &box);
+
 private:
  static bool comparison_box(const OCRPredictResult &result1,
                             const OCRPredictResult &result2) {

--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
@@ -350,6 +350,7 @@ More parameters are as follows,
 |table_model_dir|string|-|Address of table recognition inference model|
 |table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict.txt|dictionary file|
 |table_max_len|int|488|The size of the long side of the input image of the table recognition model, the final input image size of the network is（table_max_len，table_max_len）|
+|merge_no_span_structure|bool|true|Whether to merge <td> and </td> to <td></td|


 * Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `rec_char_dict_path` and `rec_model_dir`.

--- a/deploy/cpp_infer/readme_ch.md
+++ b/deploy/cpp_infer/readme_ch.md
@@ -359,6 +359,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 |table_model_dir|string|-|表格识别模型inference model地址|
 |table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict.txt|字典文件|
 |table_max_len|int|488|表格识别模型输入图像长边大小，最终网络输入图像大小为（table_max_len，table_max_len）|
+|merge_no_span_structure|bool|true|是否合并<td> 和 </td> 为<td></td>|


 * PaddleOCR也支持多语言的预测，更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分，如果希望进行多语言预测，只需将修改`rec_char_dict_path`（字典文件路径）以及`rec_model_dir`（inference模型路径）字段即可。

--- a/deploy/cpp_infer/src/args.cpp
+++ b/deploy/cpp_infer/src/args.cpp
@@ -55,8 +55,10 @@ DEFINE_int32(rec_img_w, 320, "rec image width");
 DEFINE_string(table_model_dir, "", "Path of table struture inference model.");
 DEFINE_int32(table_max_len, 488, "max len size of input image.");
 DEFINE_int32(table_batch_num, 1, "table_batch_num.");
+DEFINE_bool(merge_no_span_structure, true,
+            "Whether merge <td> and </td> to <td></td>");
 DEFINE_string(table_char_dict_path,
-              "../../ppocr/utils/dict/table_structure_dict.txt",
+              "../../ppocr/utils/dict/table_structure_dict_ch.txt",
              "Path of dictionary.");

 // ocr forward related

--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -120,6 +120,7 @@ void structure(std::vector<cv::String> &cv_all_img_names) {
      engine.structure(cv_all_img_names, false, FLAGS_table);
  for (int i = 0; i < cv_all_img_names.size(); i++) {
    cout << "predict img: " << cv_all_img_names[i] << endl;
+    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
    for (int j = 0; j < structure_results[i].size(); j++) {
      std::cout << j << "\ttype: " << structure_results[i][j].type
                << ", region: [";
@@ -129,6 +130,11 @@ void structure(std::vector<cv::String> &cv_all_img_names) {
                << structure_results[i][j].box[3] << "], res: ";
      if (structure_results[i][j].type == "table") {
        std::cout << structure_results[i][j].html << std::endl;
+        std::string file_name = Utility::basename(cv_all_img_names[i]);
+
+        Utility::VisualizeBboxes(srcimg, structure_results[i][j],
+                                 FLAGS_output + "/" + std::to_string(j) + "_" +
+                                     file_name);
      } else {
        Utility::print_result(structure_results[i][j].text_res);
      }

--- a/deploy/cpp_infer/src/ocr_cls.cpp
+++ b/deploy/cpp_infer/src/ocr_cls.cpp
@@ -112,6 +112,11 @@ void Classifier::LoadModel(const std::string &model_dir) {
        precision = paddle_infer::Config::Precision::kInt8;
      }
      config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
+      if (!Utility::PathExists("./trt_cls_shape.txt")){
+        config.CollectShapeRangeInfo("./trt_cls_shape.txt");
+      } else { 
+        config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true);
+      }
    }
  } else {
    config.DisableGpu();

--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -32,49 +32,12 @@ void DBDetector::LoadModel(const std::string &model_dir) {
      if (this->precision_ == "int8") {
        precision = paddle_infer::Config::Precision::kInt8;
      }
-      config.EnableTensorRtEngine(1 << 20, 1, 20, precision, false, false);
-      std::map<std::string, std::vector<int>> min_input_shape = {
-          {"x", {1, 3, 50, 50}},
-          {"conv2d_92.tmp_0", {1, 120, 20, 20}},
-          {"conv2d_91.tmp_0", {1, 24, 10, 10}},
-          {"conv2d_59.tmp_0", {1, 96, 20, 20}},
-          {"nearest_interp_v2_1.tmp_0", {1, 256, 10, 10}},
-          {"nearest_interp_v2_2.tmp_0", {1, 256, 20, 20}},
-          {"conv2d_124.tmp_0", {1, 256, 20, 20}},
-          {"nearest_interp_v2_3.tmp_0", {1, 64, 20, 20}},
-          {"nearest_interp_v2_4.tmp_0", {1, 64, 20, 20}},
-          {"nearest_interp_v2_5.tmp_0", {1, 64, 20, 20}},
-          {"elementwise_add_7", {1, 56, 2, 2}},
-          {"nearest_interp_v2_0.tmp_0", {1, 256, 2, 2}}};
-      std::map<std::string, std::vector<int>> max_input_shape = {
-          {"x", {1, 3, 1536, 1536}},
-          {"conv2d_92.tmp_0", {1, 120, 400, 400}},
-          {"conv2d_91.tmp_0", {1, 24, 200, 200}},
-          {"conv2d_59.tmp_0", {1, 96, 400, 400}},
-          {"nearest_interp_v2_1.tmp_0", {1, 256, 200, 200}},
-          {"nearest_interp_v2_2.tmp_0", {1, 256, 400, 400}},
-          {"conv2d_124.tmp_0", {1, 256, 400, 400}},
-          {"nearest_interp_v2_3.tmp_0", {1, 64, 400, 400}},
-          {"nearest_interp_v2_4.tmp_0", {1, 64, 400, 400}},
-          {"nearest_interp_v2_5.tmp_0", {1, 64, 400, 400}},
-          {"elementwise_add_7", {1, 56, 400, 400}},
-          {"nearest_interp_v2_0.tmp_0", {1, 256, 400, 400}}};
-      std::map<std::string, std::vector<int>> opt_input_shape = {
-          {"x", {1, 3, 640, 640}},
-          {"conv2d_92.tmp_0", {1, 120, 160, 160}},
-          {"conv2d_91.tmp_0", {1, 24, 80, 80}},
-          {"conv2d_59.tmp_0", {1, 96, 160, 160}},
-          {"nearest_interp_v2_1.tmp_0", {1, 256, 80, 80}},
-          {"nearest_interp_v2_2.tmp_0", {1, 256, 160, 160}},
-          {"conv2d_124.tmp_0", {1, 256, 160, 160}},
-          {"nearest_interp_v2_3.tmp_0", {1, 64, 160, 160}},
-          {"nearest_interp_v2_4.tmp_0", {1, 64, 160, 160}},
-          {"nearest_interp_v2_5.tmp_0", {1, 64, 160, 160}},
-          {"elementwise_add_7", {1, 56, 40, 40}},
-          {"nearest_interp_v2_0.tmp_0", {1, 256, 40, 40}}};
-
-      config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
-                                    opt_input_shape);
+      config.EnableTensorRtEngine(1 << 30, 1, 20, precision, false, false);
+      if (!Utility::PathExists("./trt_det_shape.txt")){
+        config.CollectShapeRangeInfo("./trt_det_shape.txt");
+      } else { 
+        config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true);
+      }
    }
  } else {
    config.DisableGpu();

--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -148,19 +148,12 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
        precision = paddle_infer::Config::Precision::kInt8;
      }
      config.EnableTensorRtEngine(1 << 20, 10, 15, precision, false, false);
-      int imgH = this->rec_image_shape_[1];
-      int imgW = this->rec_image_shape_[2];
-      std::map<std::string, std::vector<int>> min_input_shape = {
-          {"x", {1, 3, imgH, 10}}, {"lstm_0.tmp_0", {10, 1, 96}}};
-      std::map<std::string, std::vector<int>> max_input_shape = {
-          {"x", {this->rec_batch_num_, 3, imgH, 2500}},
-          {"lstm_0.tmp_0", {1000, 1, 96}}};
-      std::map<std::string, std::vector<int>> opt_input_shape = {
-          {"x", {this->rec_batch_num_, 3, imgH, imgW}},
-          {"lstm_0.tmp_0", {25, 1, 96}}};
-
-      config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
-                                    opt_input_shape);
+      if (!Utility::PathExists("./trt_rec_shape.txt")){
+        config.CollectShapeRangeInfo("./trt_rec_shape.txt");
+      } else { 
+        config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true);
+      }
+      
    }
  } else {
    config.DisableGpu();

--- a/deploy/cpp_infer/src/paddlestructure.cpp
+++ b/deploy/cpp_infer/src/paddlestructure.cpp
@@ -27,7 +27,7 @@ PaddleStructure::PaddleStructure() {
        FLAGS_table_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
        FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_table_char_dict_path,
        FLAGS_use_tensorrt, FLAGS_precision, FLAGS_table_batch_num,
-        FLAGS_table_max_len);
+        FLAGS_table_max_len, FLAGS_merge_no_span_structure);
  }
 };

@@ -42,7 +42,7 @@ PaddleStructure::structure(std::vector<cv::String> cv_all_img_names,
  std::vector<std::vector<StructurePredictResult>> structure_results;

  if (!Utility::PathExists(FLAGS_output) && FLAGS_det) {
-    mkdir(FLAGS_output.c_str(), 0777);
+    Utility::CreateDir(FLAGS_output);
  }
  for (int i = 0; i < cv_all_img_names.size(); ++i) {
    std::vector<StructurePredictResult> structure_result;
@@ -84,7 +84,7 @@ void PaddleStructure::table(cv::Mat img,
  // predict structure
  std::vector<std::vector<std::string>> structure_html_tags;
  std::vector<float> structure_scores(1, 0);
-  std::vector<std::vector<std::vector<std::vector<int>>>> structure_boxes;
+  std::vector<std::vector<std::vector<int>>> structure_boxes;
  std::vector<double> structure_imes;
  std::vector<cv::Mat> img_list;
  img_list.push_back(img);
@@ -103,20 +103,15 @@ void PaddleStructure::table(cv::Mat img,
    this->det(img_list[i], ocr_result, time_info_det);
    // crop image
    std::vector<cv::Mat> rec_img_list;
+    std::vector<int> ocr_box;
    for (int j = 0; j < ocr_result.size(); j++) {
-      int x_collect[4] = {ocr_result[j].box[0][0], ocr_result[j].box[1][0],
-                          ocr_result[j].box[2][0], ocr_result[j].box[3][0]};
-      int y_collect[4] = {ocr_result[j].box[0][1], ocr_result[j].box[1][1],
-                          ocr_result[j].box[2][1], ocr_result[j].box[3][1]};
-      int left = int(*std::min_element(x_collect, x_collect + 4));
-      int right = int(*std::max_element(x_collect, x_collect + 4));
-      int top = int(*std::min_element(y_collect, y_collect + 4));
-      int bottom = int(*std::max_element(y_collect, y_collect + 4));
-      std::vector<int> box{max(0, left - expand_pixel),
-                           max(0, top - expand_pixel),
-                           min(img_list[i].cols, right + expand_pixel),
-                           min(img_list[i].rows, bottom + expand_pixel)};
-      cv::Mat crop_img = Utility::crop_image(img_list[i], box);
+      ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[j].box);
+      ocr_box[0] = max(0, ocr_box[0] - expand_pixel);
+      ocr_box[1] = max(0, ocr_box[1] - expand_pixel),
+      ocr_box[2] = min(img_list[i].cols, ocr_box[2] + expand_pixel);
+      ocr_box[3] = min(img_list[i].rows, ocr_box[3] + expand_pixel);
+
+      cv::Mat crop_img = Utility::crop_image(img_list[i], ocr_box);
      rec_img_list.push_back(crop_img);
    }
    // rec
@@ -125,38 +120,37 @@ void PaddleStructure::table(cv::Mat img,
    html = this->rebuild_table(structure_html_tags[i], structure_boxes[i],
                               ocr_result);
    structure_result.html = html;
+    structure_result.cell_box = structure_boxes[i];
    structure_result.html_score = structure_scores[i];
  }
 };

-std::string PaddleStructure::rebuild_table(
-    std::vector<std::string> structure_html_tags,
-    std::vector<std::vector<std::vector<int>>> structure_boxes,
-    std::vector<OCRPredictResult> &ocr_result) {
+std::string
+PaddleStructure::rebuild_table(std::vector<std::string> structure_html_tags,
+                               std::vector<std::vector<int>> structure_boxes,
+                               std::vector<OCRPredictResult> &ocr_result) {
  // match text in same cell
  std::vector<std::vector<string>> matched(structure_boxes.size(),
                                           std::vector<std::string>());

+  std::vector<int> ocr_box;
+  std::vector<int> structure_box;
  for (int i = 0; i < ocr_result.size(); i++) {
+    ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[i].box);
+    ocr_box[0] -= 1;
+    ocr_box[1] -= 1;
+    ocr_box[2] += 1;
+    ocr_box[3] += 1;
    std::vector<std::vector<float>> dis_list(structure_boxes.size(),
                                             std::vector<float>(3, 100000.0));
    for (int j = 0; j < structure_boxes.size(); j++) {
-      int x_collect[4] = {ocr_result[i].box[0][0], ocr_result[i].box[1][0],
-                          ocr_result[i].box[2][0], ocr_result[i].box[3][0]};
-      int y_collect[4] = {ocr_result[i].box[0][1], ocr_result[i].box[1][1],
-                          ocr_result[i].box[2][1], ocr_result[i].box[3][1]};
-      int left = int(*std::min_element(x_collect, x_collect + 4));
-      int right = int(*std::max_element(x_collect, x_collect + 4));
-      int top = int(*std::min_element(y_collect, y_collect + 4));
-      int bottom = int(*std::max_element(y_collect, y_collect + 4));
-      std::vector<std::vector<int>> box(2, std::vector<int>(2, 0));
-      box[0][0] = left - 1;
-      box[0][1] = top - 1;
-      box[1][0] = right + 1;
-      box[1][1] = bottom + 1;
-
-      dis_list[j][0] = this->dis(box, structure_boxes[j]);
-      dis_list[j][1] = 1 - this->iou(box, structure_boxes[j]);
+      if (structure_boxes[i].size() == 8) {
+        structure_box = Utility::xyxyxyxy2xyxy(structure_boxes[j]);
+      } else {
+        structure_box = structure_boxes[j];
+      }
+      dis_list[j][0] = this->dis(ocr_box, structure_box);
+      dis_list[j][1] = 1 - this->iou(ocr_box, structure_box);
      dis_list[j][2] = j;
    }
    // find min dis idx
@@ -164,6 +158,7 @@ std::string PaddleStructure::rebuild_table(
              PaddleStructure::comparison_dis);
    matched[dis_list[0][2]].push_back(ocr_result[i].text);
  }
+
  // get pred html
  std::string html_str = "";
  int td_tag_idx = 0;
@@ -221,19 +216,18 @@ std::string PaddleStructure::rebuild_table(
  return html_str;
 }

-float PaddleStructure::iou(std::vector<std::vector<int>> &box1,
-                           std::vector<std::vector<int>> &box2) {
-  int area1 = max(0, box1[1][0] - box1[0][0]) * max(0, box1[1][1] - box1[0][1]);
-  int area2 = max(0, box2[1][0] - box2[0][0]) * max(0, box2[1][1] - box2[0][1]);
+float PaddleStructure::iou(std::vector<int> &box1, std::vector<int> &box2) {
+  int area1 = max(0, box1[2] - box1[0]) * max(0, box1[3] - box1[1]);
+  int area2 = max(0, box2[2] - box2[0]) * max(0, box2[3] - box2[1]);

  // computing the sum_area
  int sum_area = area1 + area2;

  // find the each point of intersect rectangle
-  int x1 = max(box1[0][0], box2[0][0]);
-  int y1 = max(box1[0][1], box2[0][1]);
-  int x2 = min(box1[1][0], box2[1][0]);
-  int y2 = min(box1[1][1], box2[1][1]);
+  int x1 = max(box1[0], box2[0]);
+  int y1 = max(box1[1], box2[1]);
+  int x2 = min(box1[2], box2[2]);
+  int y2 = min(box1[3], box2[3]);

  // judge if there is an intersect
  if (y1 >= y2 || x1 >= x2) {
@@ -244,17 +238,16 @@ float PaddleStructure::iou(std::vector<std::vector<int>> &box1,
  }
 }

-float PaddleStructure::dis(std::vector<std::vector<int>> &box1,
-                           std::vector<std::vector<int>> &box2) {
-  int x1_1 = box1[0][0];
-  int y1_1 = box1[0][1];
-  int x2_1 = box1[1][0];
-  int y2_1 = box1[1][1];
+float PaddleStructure::dis(std::vector<int> &box1, std::vector<int> &box2) {
+  int x1_1 = box1[0];
+  int y1_1 = box1[1];
+  int x2_1 = box1[2];
+  int y2_1 = box1[3];

-  int x1_2 = box2[0][0];
-  int y1_2 = box2[0][1];
-  int x2_2 = box2[1][0];
-  int y2_2 = box2[1][1];
+  int x1_2 = box2[0];
+  int y1_2 = box2[1];
+  int x2_2 = box2[2];
+  int y2_2 = box2[3];

  float dis =
      abs(x1_2 - x1_1) + abs(y1_2 - y1_1) + abs(x2_2 - x2_1) + abs(y2_2 - y2_1);

--- a/deploy/cpp_infer/src/postprocess_op.cpp
+++ b/deploy/cpp_infer/src/postprocess_op.cpp
@@ -352,8 +352,21 @@ std::vector<std::vector<std::vector<int>>> DBPostProcessor::FilterTagDetRes(
  return root_points;
 }

-void TablePostProcessor::init(std::string label_path) {
+void TablePostProcessor::init(std::string label_path,
+                              bool merge_no_span_structure) {
  this->label_list_ = Utility::ReadDict(label_path);
+  if (merge_no_span_structure) {
+    this->label_list_.push_back("<td></td>");
+    std::vector<std::string>::iterator it;
+    for (it = this->label_list_.begin(); it != this->label_list_.end();) {
+      if (*it == "<td>") {
+        it = this->label_list_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+  // add_special_char
  this->label_list_.insert(this->label_list_.begin(), this->beg);
  this->label_list_.push_back(this->end);
 }
@@ -363,12 +376,12 @@ void TablePostProcessor::Run(
    std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
    std::vector<int> &structure_probs_shape,
    std::vector<std::vector<std::string>> &rec_html_tag_batch,
-    std::vector<std::vector<std::vector<std::vector<int>>>> &rec_boxes_batch,
+    std::vector<std::vector<std::vector<int>>> &rec_boxes_batch,
    std::vector<int> &width_list, std::vector<int> &height_list) {
  for (int batch_idx = 0; batch_idx < structure_probs_shape[0]; batch_idx++) {
    // image tags and boxs
    std::vector<std::string> rec_html_tags;
-    std::vector<std::vector<std::vector<int>>> rec_boxes;
+    std::vector<std::vector<int>> rec_boxes;

    float score = 0.f;
    int count = 0;
@@ -378,7 +391,7 @@ void TablePostProcessor::Run(
    // step
    for (int step_idx = 0; step_idx < structure_probs_shape[1]; step_idx++) {
      std::string html_tag;
-      std::vector<std::vector<int>> rec_box;
+      std::vector<int> rec_box;
      // html tag
      int step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
                           structure_probs_shape[2];
@@ -399,17 +412,19 @@ void TablePostProcessor::Run(
      count += 1;
      score += char_score;
      rec_html_tags.push_back(html_tag);
+
      // box
      if (html_tag == "<td>" || html_tag == "<td" || html_tag == "<td></td>") {
-        for (int point_idx = 0; point_idx < loc_preds_shape[2];
-             point_idx += 2) {
-          std::vector<int> point(2, 0);
+        for (int point_idx = 0; point_idx < loc_preds_shape[2]; point_idx++) {
          step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
                               loc_preds_shape[2] +
                           point_idx;
-          point[0] = int(loc_preds[step_start_idx] * width_list[batch_idx]);
-          point[1] =
-              int(loc_preds[step_start_idx + 1] * height_list[batch_idx]);
+          float point = loc_preds[step_start_idx];
+          if (point_idx % 2 == 0) {
+            point = int(point * width_list[batch_idx]);
+          } else {
+            point = int(point * height_list[batch_idx]);
+          }
          rec_box.push_back(point);
        }
        rec_boxes.push_back(rec_box);

--- a/deploy/cpp_infer/src/structure_table.cpp
+++ b/deploy/cpp_infer/src/structure_table.cpp
@@ -20,7 +20,7 @@ void StructureTableRecognizer::Run(
    std::vector<cv::Mat> img_list,
    std::vector<std::vector<std::string>> &structure_html_tags,
    std::vector<float> &structure_scores,
-    std::vector<std::vector<std::vector<std::vector<int>>>> &structure_boxes,
+    std::vector<std::vector<std::vector<int>>> &structure_boxes,
    std::vector<double> &times) {
  std::chrono::duration<float> preprocess_diff =
      std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
@@ -89,8 +89,7 @@ void StructureTableRecognizer::Run(
    auto postprocess_start = std::chrono::steady_clock::now();
    std::vector<std::vector<std::string>> structure_html_tag_batch;
    std::vector<float> structure_score_batch;
-    std::vector<std::vector<std::vector<std::vector<int>>>>
-        structure_boxes_batch;
+    std::vector<std::vector<std::vector<int>>> structure_boxes_batch;
    this->post_processor_.Run(loc_preds, structure_probs, structure_score_batch,
                              predict_shape0, predict_shape1,
                              structure_html_tag_batch, structure_boxes_batch,

--- a/deploy/cpp_infer/src/utility.cpp
+++ b/deploy/cpp_infer/src/utility.cpp
@@ -65,6 +65,37 @@ void Utility::VisualizeBboxes(const cv::Mat &srcimg,
            << std::endl;
 }

+void Utility::VisualizeBboxes(const cv::Mat &srcimg,
+                              const StructurePredictResult &structure_result,
+                              const std::string &save_path) {
+  cv::Mat img_vis;
+  srcimg.copyTo(img_vis);
+  for (int n = 0; n < structure_result.cell_box.size(); n++) {
+    if (structure_result.cell_box[n].size() == 8) {
+      cv::Point rook_points[4];
+      for (int m = 0; m < structure_result.cell_box[n].size(); m += 2) {
+        rook_points[m / 2] =
+            cv::Point(int(structure_result.cell_box[n][m]),
+                      int(structure_result.cell_box[n][m + 1]));
+      }
+      const cv::Point *ppt[1] = {rook_points};
+      int npt[] = {4};
+      cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
+    } else if (structure_result.cell_box[n].size() == 4) {
+      cv::Point rook_points[2];
+      rook_points[0] = cv::Point(int(structure_result.cell_box[n][0]),
+                                 int(structure_result.cell_box[n][1]));
+      rook_points[1] = cv::Point(int(structure_result.cell_box[n][2]),
+                                 int(structure_result.cell_box[n][3]));
+      cv::rectangle(img_vis, rook_points[0], rook_points[1], CV_RGB(0, 255, 0),
+                    2, 8, 0);
+    }
+  }
+
+  cv::imwrite(save_path, img_vis);
+  std::cout << "The table visualized image saved in " + save_path << std::endl;
+}
+
 // list all files under a directory
 void Utility::GetAllFiles(const char *dir_name,
                          std::vector<std::string> &all_inputs) {
@@ -268,13 +299,46 @@ cv::Mat Utility::crop_image(cv::Mat &img, std::vector<int> &area) {

 void Utility::sorted_boxes(std::vector<OCRPredictResult> &ocr_result) {
  std::sort(ocr_result.begin(), ocr_result.end(), Utility::comparison_box);
-
-  for (int i = 0; i < ocr_result.size() - 1; i++) {
-    if (abs(ocr_result[i + 1].box[0][1] - ocr_result[i].box[0][1]) < 10 &&
-        (ocr_result[i + 1].box[0][0] < ocr_result[i].box[0][0])) {
-      std::swap(ocr_result[i], ocr_result[i + 1]);
+  if (ocr_result.size() > 0) {
+    for (int i = 0; i < ocr_result.size() - 1; i++) {
+      for (int j = i; j > 0; j--) {
+        if (abs(ocr_result[j + 1].box[0][1] - ocr_result[j].box[0][1]) < 10 &&
+            (ocr_result[j + 1].box[0][0] < ocr_result[j].box[0][0])) {
+          std::swap(ocr_result[i], ocr_result[i + 1]);
+        }
+      }
    }
  }
 }

+std::vector<int> Utility::xyxyxyxy2xyxy(std::vector<std::vector<int>> &box) {
+  int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
+  int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
+  int left = int(*std::min_element(x_collect, x_collect + 4));
+  int right = int(*std::max_element(x_collect, x_collect + 4));
+  int top = int(*std::min_element(y_collect, y_collect + 4));
+  int bottom = int(*std::max_element(y_collect, y_collect + 4));
+  std::vector<int> box1(4, 0);
+  box1[0] = left;
+  box1[1] = top;
+  box1[2] = right;
+  box1[3] = bottom;
+  return box1;
+}
+
+std::vector<int> Utility::xyxyxyxy2xyxy(std::vector<int> &box) {
+  int x_collect[4] = {box[0], box[2], box[4], box[6]};
+  int y_collect[4] = {box[1], box[3], box[5], box[7]};
+  int left = int(*std::min_element(x_collect, x_collect + 4));
+  int right = int(*std::max_element(x_collect, x_collect + 4));
+  int top = int(*std::min_element(y_collect, y_collect + 4));
+  int bottom = int(*std::max_element(y_collect, y_collect + 4));
+  std::vector<int> box1(4, 0);
+  box1[0] = left;
+  box1[1] = top;
+  box1[2] = right;
+  box1[3] = bottom;
+  return box1;
+}
+
 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/lite/config.txt
+++ b/deploy/lite/config.txt
@@ -5,4 +5,4 @@ det_db_unclip_ratio  1.6
 det_db_use_dilate 0
 det_use_polygon_score 1
 use_direction_classify  1
-rec_image_height  32
\ No newline at end of file
+rec_image_height  48
\ No newline at end of file
--- a/deploy/lite/readme.md
+++ b/deploy/lite/readme.md
@@ -99,6 +99,8 @@ The following table also provides a series of models that can be deployed on mob

 |Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch|
 |---|---|---|---|---|---|---|
+|PP-OCRv3|extra-lightweight chinese OCR optimized model|16.2M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.nb)|v2.10|
+|PP-OCRv3(slim)|extra-lightweight chinese OCR optimized model|5.9M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb)|v2.10|
 |PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
 |PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.6M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|

@@ -134,17 +136,16 @@ Introduction to paddle_lite_opt parameters:
 The following takes the ultra-lightweight Chinese model of PaddleOCR as an example to introduce the use of the compiled opt file to complete the conversion of the inference model to the Paddle-Lite optimized model

 ```
-# 【[Recommendation] Download the Chinese and English inference model of PP-OCRv2
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf  ch_PP-OCRv2_det_slim_quant_infer.tar
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf  ch_PP-OCRv2_rec_slim_quant_infer.tar
+# 【[Recommendation] Download the Chinese and English inference model of PP-OCRv3
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf  ch_PP-OCRv3_det_slim_infer.tar
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf  ch_PP-OCRv2_rec_slim_quant_infer.tar
 wget  https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf  ch_ppocr_mobile_v2.0_cls_slim_infer.tar
 # Convert detection model
-./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel  --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_det_slim_infer/inference.pdmodel  --param_file=./ch_PP-OCRv3_det_slim_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv3_det_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
 # Convert recognition model
-./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel  --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdmodel  --param_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv3_rec_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
 # Convert angle classifier model
-./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel  --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams  --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
-
+paddle_lite_opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel  --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams  --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
 ```

 After the conversion is successful, there will be more files ending with `.nb` in the inference model directory, which is the successfully converted model file.
@@ -197,15 +198,15 @@ Some preparatory work is required first.
 cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/
 ```

-Prepare the test image, taking PaddleOCR/doc/imgs/11.jpg as an example, copy the image file to the demo/cxx/ocr/debug/ folder. Prepare the model files optimized by the lite opt tool, ch_det_mv3_db_opt.nb, ch_rec_mv3_crnn_opt.nb, and place them under the demo/cxx/ocr/debug/ folder.
+Prepare the test image, taking PaddleOCR/doc/imgs/11.jpg as an example, copy the image file to the demo/cxx/ocr/debug/ folder. Prepare the model files optimized by the lite opt tool, ch_PP-OCRv3_det_slim_opt.nb , ch_PP-OCRv3_rec_slim_opt.nb , and place them under the demo/cxx/ocr/debug/ folder.

 The structure of the OCR demo is as follows after the above command is executed:

 ```
 demo/cxx/ocr/
 |-- debug/  
-|   |--ch_PP-OCRv2_det_slim_opt.nb           Detection model
-|   |--ch_PP-OCRv2_rec_slim_opt.nb           Recognition model
+|   |--ch_PP-OCRv3_det_slim_opt.nb           Detection model
+|   |--ch_PP-OCRv3_rec_slim_opt.nb           Recognition model
 |   |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb           Text direction classification model
 |   |--11.jpg                           Image for OCR
 |   |--ppocr_keys_v1.txt                Dictionary file
@@ -240,7 +241,7 @@ det_db_thresh  0.3        # Used to filter the binarized image of DB prediction,
 det_db_box_thresh  0.5    # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate
 det_db_unclip_ratio  1.6  # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text
 use_direction_classify  0  # Whether to use the direction classifier, 0 means not to use, 1 means to use
-rec_image_height  32      # The height of the input image of the recognition model, the PP-OCRv3 model needs to be set to 48, and the PP-OCRv2 model needs to be set to 32
+rec_image_height  48      # The height of the input image of the recognition model, the PP-OCRv3 model needs to be set to 48, and the PP-OCRv2 model needs to be set to 32
 ```

 5. Run Model on phone
@@ -260,14 +261,14 @@ After the above steps are completed, you can use adb to push the file to the pho
 export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
 # The use of ocr_db_crnn is:
 # ./ocr_db_crnn Mode Detection model file Orientation classifier model file Recognition model file  Hardware  Precision  Threads Batchsize  Test image path Dictionary file path
- ./ocr_db_crnn system ch_PP-OCRv2_det_slim_opt.nb  ch_PP-OCRv2_rec_slim_opt.nb  ch_ppocr_mobile_v2.0_cls_slim_opt.nb  arm8 INT8 10 1  ./11.jpg  config.txt  ppocr_keys_v1.txt  True
+ ./ocr_db_crnn system ch_PP-OCRv3_det_slim_opt.nb  ch_PP-OCRv3_rec_slim_opt.nb  ch_ppocr_mobile_v2.0_cls_slim_opt.nb  arm8 INT8 10 1  ./11.jpg  config.txt  ppocr_keys_v1.txt  True
 # precision can be INT8 for quantitative model or FP32 for normal model.

 # Only using detection model
-./ocr_db_crnn  det ch_PP-OCRv2_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg  config.txt
+./ocr_db_crnn  det ch_PP-OCRv3_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg  config.txt

 # Only using recognition model
-./ocr_db_crnn  rec ch_PP-OCRv2_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
+./ocr_db_crnn  rec ch_PP-OCRv3_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
 ```

 If you modify the code, you need to recompile and push to the phone.

--- a/deploy/lite/readme_ch.md
+++ b/deploy/lite/readme_ch.md
@@ -97,6 +97,8 @@ Paddle-Lite 提供了多种策略来自动优化原始的模型，其中包括

 |模型版本|模型简介|模型大小|检测模型|文本方向分类模型|识别模型|Paddle-Lite版本|
 |---|---|---|---|---|---|---|
+|PP-OCRv3|蒸馏版超轻量中文OCR移动端模型|16.2M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.nb)|v2.10|
+|PP-OCRv3(slim)|蒸馏版超轻量中文OCR移动端模型|5.9M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb)|v2.10|
 |PP-OCRv2|蒸馏版超轻量中文OCR移动端模型|11M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
 |PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.6M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|

@@ -131,16 +133,16 @@ paddle_lite_opt 参数介绍：
 下面以PaddleOCR的超轻量中文模型为例，介绍使用编译好的opt文件完成inference模型到Paddle-Lite优化模型的转换。

 ```
-# 【推荐】 下载 PP-OCRv2版本的中英文 inference模型
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf  ch_PP-OCRv2_det_slim_quant_infer.tar
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf  ch_PP-OCRv2_rec_slim_quant_infer.tar
+# 【推荐】 下载 PP-OCRv3版本的中英文 inference模型
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf  ch_PP-OCRv3_det_slim_infer.tar
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf  ch_PP-OCRv2_rec_slim_quant_infer.tar
 wget  https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf  ch_ppocr_mobile_v2.0_cls_slim_infer.tar
 # 转换检测模型
-./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel  --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_det_slim_infer/inference.pdmodel  --param_file=./ch_PP-OCRv3_det_slim_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv3_det_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
 # 转换识别模型
-./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel  --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdmodel  --param_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv3_rec_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
 # 转换方向分类器模型
-./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel  --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams  --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel  --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams  --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer

 ```

@@ -194,15 +196,15 @@ wget  https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls
 ```

 准备测试图像，以`PaddleOCR/doc/imgs/11.jpg`为例，将测试的图像复制到`demo/cxx/ocr/debug/`文件夹下。
- 准备lite opt工具优化后的模型文件，比如使用`ch_PP-OCRv2_det_slim_opt.ch_PP-OCRv2_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`，模型文件放置在`demo/cxx/ocr/debug/`文件夹下。
+ 准备lite opt工具优化后的模型文件，比如使用`ch_PP-OCRv3_det_slim_opt.ch_PP-OCRv3_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`，模型文件放置在`demo/cxx/ocr/debug/`文件夹下。

 执行完成后，ocr文件夹下将有如下文件格式：

 ```
 demo/cxx/ocr/
 |-- debug/  
-|   |--ch_PP-OCRv2_det_slim_opt.nb           优化后的检测模型文件
-|   |--ch_PP-OCRv2_rec_slim_opt.nb           优化后的识别模型文件
+|   |--ch_PP-OCRv3_det_slim_opt.nb           优化后的检测模型文件
+|   |--ch_PP-OCRv3_rec_slim_opt.nb           优化后的识别模型文件
 |   |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb           优化后的文字方向分类器模型文件
 |   |--11.jpg                           待测试图像
 |   |--ppocr_keys_v1.txt                中文字典文件
@@ -239,7 +241,7 @@ det_db_thresh  0.3        # 用于过滤DB预测的二值化图像，设置为0.
 det_db_box_thresh  0.5    # 检测器后处理过滤box的阈值，如果检测存在漏框情况，可酌情减小
 det_db_unclip_ratio  1.6  # 表示文本框的紧致程度，越小则文本框更靠近文本
 use_direction_classify  0  # 是否使用方向分类器，0表示不使用，1表示使用
-rec_image_height  32      # 识别模型输入图像的高度，PP-OCRv3模型设置为48，PP-OCRv2模型需要设置为32
+rec_image_height  48      # 识别模型输入图像的高度，PP-OCRv3模型设置为48，PP-OCRv2模型需要设置为32
 ```

 5. 启动调试
@@ -259,13 +261,13 @@ rec_image_height  32      # 识别模型输入图像的高度，PP-OCRv3模型
 export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
 # 开始使用，ocr_db_crnn可执行文件的使用方式为:
 # ./ocr_db_crnn 预测模式  检测模型文件 方向分类器模型文件  识别模型文件 运行硬件 运行精度 线程数  batchsize  测试图像路径  参数配置路径  字典文件路径 是否使用benchmark参数
- ./ocr_db_crnn system  ch_PP-OCRv2_det_slim_opt.nb  ch_PP-OCRv2_rec_slim_opt.nb  ch_ppocr_mobile_v2.0_cls_slim_opt.nb  arm8 INT8 10 1  ./11.jpg  config.txt  ppocr_keys_v1.txt  True
+ ./ocr_db_crnn system  ch_PP-OCRv3_det_slim_opt.nb  ch_PP-OCRv3_rec_slim_opt.nb  ch_ppocr_mobile_v2.0_cls_slim_opt.nb  arm8 INT8 10 1  ./11.jpg  config.txt  ppocr_keys_v1.txt  True

 # 仅使用文本检测模型，使用方式如下：
-./ocr_db_crnn  det ch_PP-OCRv2_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg  config.txt
+./ocr_db_crnn  det ch_PP-OCRv3_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg  config.txt

 # 仅使用文本识别模型，使用方式如下：
-./ocr_db_crnn  rec ch_PP-OCRv2_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
+./ocr_db_crnn  rec ch_PP-OCRv3_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
 ```

 如果对代码做了修改，则需要重新编译并push到手机上。

--- a/deploy/slim/quantization/README.md
+++ b/deploy/slim/quantization/README.md
@@ -22,7 +22,7 @@
 ### 1. 安装PaddleSlim

 ```bash
-pip3 install paddleslim==2.2.2
+pip3 install paddleslim==2.3.2
 ```

 ### 2. 准备训练好的模型
@@ -32,18 +32,7 @@ PaddleOCR提供了一系列训练好的[模型](../../../doc/doc_ch/models_list.
 ### 3. 量化训练
 量化训练包括离线量化训练和在线量化训练，在线量化训练效果更好，需加载预训练模型，在定义好量化策略后即可对模型进行量化。

-
-量化训练的代码位于slim/quantization/quant.py 中，比如训练检测模型，训练指令如下：
-```bash
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model='your trained model'   Global.save_model_dir=./output/quant_model
-
-# 比如下载提供的训练模型
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
-tar -xf ch_ppocr_mobile_v2.0_det_train.tar
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_ppocr_mobile_v2.0_det_train/best_accuracy   Global.save_model_dir=./output/quant_model
-```
-
-模型蒸馏和模型量化可以同时使用，以PPOCRv3检测模型为例：
+量化训练的代码位于slim/quantization/quant.py 中，比如训练检测模型，以PPOCRv3检测模型为例，训练指令如下：
 ```
 # 下载检测预训练模型：
 wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
@@ -58,7 +47,7 @@ python deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_
 在得到量化训练保存的模型后，我们可以将其导出为inference_model，用于预测部署：

 ```bash
-python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
+python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
 ```

 ### 5. 量化模型部署

--- a/deploy/slim/quantization/README_en.md
+++ b/deploy/slim/quantization/README_en.md
@@ -25,7 +25,7 @@ After training, if you want to further compress the model size and accelerate th
 ### 1. Install PaddleSlim

 ```bash
-pip3 install paddleslim==2.2.2
+pip3 install paddleslim==2.3.2
 ```


@@ -39,18 +39,7 @@ Quantization training includes offline quantization training and online quantiza
 Online quantization training is more effective. It is necessary to load the pre-trained model.
 After the quantization strategy is defined, the model can be quantified.

-The code for quantization training is located in `slim/quantization/quant.py`. For example, to train a detection model, the training instructions are as follows:
-```bash
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model='your trained model'   Global.save_model_dir=./output/quant_model
-
-# download provided model
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
-tar -xf ch_ppocr_mobile_v2.0_det_train.tar
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_ppocr_mobile_v2.0_det_train/best_accuracy   Global.save_model_dir=./output/quant_model
-```
-
-
-Model distillation and model quantization can be used at the same time, taking the PPOCRv3 detection model as an example:
+The code for quantization training is located in `slim/quantization/quant.py`. For example, the training instructions of slim PPOCRv3 detection model are as follows:
 ```
 # download provided model
 wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
@@ -66,7 +55,7 @@ If you want to quantify the text recognition model, you can modify the configura
 Once we got the model after pruning and fine-tuning, we can export it as an inference model for the deployment of predictive tasks:

 ```bash
-python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
+python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
 ```

 ### 5. Deploy

--- a/deploy/slim/quantization/export_model.py
+++ b/deploy/slim/quantization/export_model.py
@@ -151,17 +151,24 @@ def main():

    arch_config = config["Architecture"]

-    arch_config = config["Architecture"]
+    if arch_config["algorithm"] == "SVTR" and arch_config["Head"][
+            "name"] != 'MultiHead':
+        input_shape = config["Eval"]["dataset"]["transforms"][-2][
+            'SVTRRecResizeImg']['image_shape']
+    else:
+        input_shape = None

    if arch_config["algorithm"] in ["Distillation", ]:  # distillation model
        archs = list(arch_config["Models"].values())
        for idx, name in enumerate(model.model_name_list):
            sub_model_save_path = os.path.join(save_path, name, "inference")
            export_single_model(model.model_list[idx], archs[idx],
-                                sub_model_save_path, logger, quanter)
+                                sub_model_save_path, logger, input_shape,
+                                quanter)
    else:
        save_path = os.path.join(save_path, "inference")
-        export_single_model(model, arch_config, save_path, logger, quanter)
+        export_single_model(model, arch_config, save_path, logger, input_shape,
+                            quanter)


 if __name__ == "__main__":

--- a/doc/doc_en/algorithm_sdmgr_en.md
+++ b/doc/doc_en/algorithm_sdmgr_en.md
@@ -31,7 +31,7 @@ The prediction result is saved as `./output/sdmgr_kie/predicts_kie.txt`, and the
 The visualization results are shown in the figure below:

 <div align="center">
-    <img src="./imgs/0.png" width="800">
+    <img src="../../ppstructure/docs/imgs/sdmgr_result.png" width="800">
 </div>

 ## 2. Model Training

--- a/doc/doc_en/config_en.md
+++ b/doc/doc_en/config_en.md
@@ -28,7 +28,7 @@ Take rec_chinese_lite_train_v2.0.yml as an example
 |      epoch_num           |    Maximum training epoch number             |       500        |                \                 |
 |      log_smooth_window   |    Log queue length, the median value in the queue each time will be printed           |       20          |                \                 |
 |      print_batch_step    |    Set print log interval         |       10          |                \                 |
-|      save_model_dir      |    Set model save path        |  output/{算法名称}  |                \                 |
+|      save_model_dir      |    Set model save path        |  output/{algorithm_name}  |                \                 |
 |      save_epoch_step     |    Set model save interval        |       3           |                \                 |
 |      eval_batch_step     |    Set the model evaluation interval        | 2000 or [1000, 2000]        | running evaluation every 2000 iters or evaluation is run every 2000 iterations after the 1000th iteration   |
 |      cal_metric_during_train     |    Set whether to evaluate the metric during the training process. At this time, the metric of the model under the current batch is evaluated        |       true         |                \                 |
@@ -245,4 +245,4 @@ For more supported languages, please refer to : [Multi-language model](https://g

 The multi-language model training method is the same as the Chinese model. The training data set is 100w synthetic data. A small amount of fonts and test data can be downloaded using the following two methods.
 * [Baidu Netdisk](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA),Extraction code:frgi.
-* [Google drive](https://drive.google.com/file/d/18cSWX7wXSy4G0tbKJ0d9PuIaiwRLHpjA/view)
\ No newline at end of file
+* [Google drive](https://drive.google.com/file/d/18cSWX7wXSy4G0tbKJ0d9PuIaiwRLHpjA/view)
--- a/ppocr/losses/rec_aster_loss.py
+++ b/ppocr/losses/rec_aster_loss.py
@@ -27,10 +27,9 @@ class CosineEmbeddingLoss(nn.Layer):
        self.epsilon = 1e-12

    def forward(self, x1, x2, target):
-        similarity = paddle.sum(
-            x1 * x2, dim=-1) / (paddle.norm(
-                x1, axis=-1) * paddle.norm(
-                    x2, axis=-1) + self.epsilon)
+        similarity = paddle.sum(x1 * x2, axis=-1) / (paddle.norm(
+            x1, axis=-1) * paddle.norm(
+                x2, axis=-1) + self.epsilon)
        one_list = paddle.full_like(target, fill_value=1)
        out = paddle.mean(
            paddle.where(

--- a/ppstructure/utility.py
+++ b/ppstructure/utility.py
@@ -32,7 +32,7 @@ def init_args():
    parser.add_argument(
        "--table_char_dict_path",
        type=str,
-        default="../ppocr/utils/dict/table_structure_dict.txt")
+        default="../ppocr/utils/dict/table_structure_dict_ch.txt")
    # params for layout
    parser.add_argument("--layout_model_dir", type=str)
    parser.add_argument(

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -38,7 +38,6 @@ def init_args():
    parser.add_argument("--ir_optim", type=str2bool, default=True)
    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
    parser.add_argument("--min_subgraph_size", type=int, default=15)
-    parser.add_argument("--shape_info_filename", type=str, default=None)
    parser.add_argument("--precision", type=str, default="fp32")
    parser.add_argument("--gpu_mem", type=int, default=500)

@@ -226,23 +225,22 @@ def create_predictor(args, mode, logger):
                    use_calib_mode=False)

                # collect shape
-                if args.shape_info_filename is not None:
-                    if not os.path.exists(args.shape_info_filename):
-                        config.collect_shape_range_info(
-                            args.shape_info_filename)
-                        logger.info(
-                            f"collect dynamic shape info into : {args.shape_info_filename}"
-                        )
-                    else:
-                        logger.info(
-                            f"dynamic shape info file( {args.shape_info_filename} ) already exists, not need to generate again."
-                        )
-                    config.enable_tuned_tensorrt_dynamic_shape(
-                        args.shape_info_filename, True)
+                trt_shape_f = os.path.join(model_dir, f"{mode}_trt_dynamic_shape.txt")
+
+                if not os.path.exists(trt_shape_f):
+                    config.collect_shape_range_info(trt_shape_f)
+                    logger.info(
+                        f"collect dynamic shape info into : {trt_shape_f}")
                else:
                    logger.info(
-                        f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning"
+                        f"dynamic shape info file( {trt_shape_f} ) already exists, not need to generate again."
                    )
+                try:
+                    config.enable_tuned_tensorrt_dynamic_shape(trt_shape_f,
+                                                               True)
+                except Exception as E:
+                    logger.info(E)
+                    logger.info("Please keep your paddlepaddle-gpu >= 2.3.0!")

        elif args.use_xpu:
            config.enable_xpu(10 * 1024 * 1024)