Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into dygraph

df64d404 · andyjpaddle · cfb3ab4f · 81ae8f40 · df64d404 · df64d404
338 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -31,4 +31,4 @@ paddleocr.egg-info/
 /deploy/android_demo/app/.cxx/
 /deploy/android_demo/app/cache/
 test_tipc/web/models/
-test_tipc/web/node_modules/
+test_tipc/web/node_modules/
\ No newline at end of file
--- a/PPOCRLabel/PPOCRLabel.py
+++ b/PPOCRLabel/PPOCRLabel.py
@@ -2285,7 +2285,7 @@ class MainWindow(QMainWindow):
        '''
            Table Recegnition
        '''
-        from paddleocr.ppstructure.table.predict_table import to_excel
+        from paddleocr import to_excel

        import time

@@ -2309,7 +2309,7 @@ class MainWindow(QMainWindow):
        # ONLY SUPPORT ONE TABLE in one image
        hasTable = False
        for region in res:
-            if region['type'] == 'Table':
+            if region['type'] == 'table':
                if region['res']['boxes'] is None:
                    msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
                    QMessageBox.information(self, "Information", msg)
@@ -2335,10 +2335,7 @@ class MainWindow(QMainWindow):
                    bbox = np.array(region['res']['boxes'][i])
                    rec_text = region['res']['rec_res'][i][0]

-                    # polys to rectangles
-                    x1, y1 = np.min(bbox[:, 0]), np.min(bbox[:, 1])
-                    x2, y2 = np.max(bbox[:, 0]), np.max(bbox[:, 1])
-                    rext_bbox = [[x1, y1], [x2, y1], [x2, y2], [x1, y2]]
+                    rext_bbox = [[bbox[0], bbox[1]], [bbox[2], bbox[1]], [bbox[2], bbox[3]], [bbox[0], bbox[3]]]

                    # save bbox to shape
                    shape = Shape(label=rec_text, line_color=DEFAULT_LINE_COLOR, key_cls=None)
@@ -2452,13 +2449,6 @@ class MainWindow(QMainWindow):
            export PPLabel and CSV to JSON (PubTabNet)
        '''
        import pandas as pd
-        from libs.dataPartitionDialog import DataPartitionDialog
-
-        # data partition user input
-        partitionDialog = DataPartitionDialog(parent=self)
-        partitionDialog.exec()
-        if partitionDialog.getStatus() == False:
-            return

        # automatically save annotations
        self.saveFilestate()
@@ -2481,28 +2471,19 @@ class MainWindow(QMainWindow):
                        labeldict[file] = eval(label)
                    else:
                        labeldict[file] = []
+        
+        # read table recognition output
+        TableRec_excel_dir = os.path.join(
+            self.lastOpenDir, 'tableRec_excel_output')

-        train_split, val_split, test_split = partitionDialog.getDataPartition()
-        # check validate
-        if train_split + val_split + test_split > 100:
-            msg = "The sum of training, validation and testing data should be less than 100%"
-            QMessageBox.information(self, "Information", msg)
-            return
-        print(train_split, val_split, test_split)
-        train_split, val_split, test_split = float(train_split) / 100., float(val_split) / 100., float(test_split) / 100.
-        train_id = int(len(labeldict) * train_split)
-        val_id = int(len(labeldict) * (train_split + val_split))
-        print('Data partition: train:', train_id, 
-              'validation:',  val_id - train_id,
-              'test:', len(labeldict) - val_id)
-
-        TableRec_excel_dir = os.path.join(self.lastOpenDir, 'tableRec_excel_output')
-        json_results = []
-        imgid = 0
+        # save txt
+        fid = open(
+            "{}/gt.txt".format(self.lastOpenDir), "w", encoding='utf-8')
        for image_path in labeldict.keys():
            # load csv annotations
            filename, _ = os.path.splitext(os.path.basename(image_path))
-            csv_path = os.path.join(TableRec_excel_dir, filename + '.xlsx')
+            csv_path = os.path.join(
+                TableRec_excel_dir, filename + '.xlsx')
            if not os.path.exists(csv_path):
                continue

@@ -2521,28 +2502,31 @@ class MainWindow(QMainWindow):
            cells = []
            for anno in labeldict[image_path]:
                tokens = list(anno['transcription'])
-                obb = anno['points']
-                hbb = OBB2HBB(np.array(obb)).tolist()
-                cells.append({'tokens': tokens, 'bbox': hbb})
-            
-            # data split
-            if imgid < train_id:
-                split = 'train'
-            elif imgid < val_id:
-                split = 'val'
-            else:
-                split = 'test'
-
-            #  save dict
-            html = {'structure': {'tokens': token_list}, 'cell': cells}
-            json_results.append({'filename': os.path.basename(image_path), 'split': split, 'imgid': imgid, 'html': html})
-            imgid += 1
-
-        # save json
-        with open("{}/annotation.json".format(self.lastOpenDir), "w", encoding='utf-8') as fid:
-            fid.write(json.dumps(json_results, ensure_ascii=False))
-        
-        msg = 'JSON sucessfully saved in {}/annotation.json'.format(self.lastOpenDir)
+                cells.append({
+                    'tokens': tokens, 
+                    'bbox': anno['points']
+                    })
+
+            # 构造标注信息
+            html = {
+                'structure': {
+                    'tokens': token_list
+                    }, 
+                'cells': cells
+                }
+            d = {
+                'filename': os.path.basename(image_path), 
+                'html': html
+                }
+            # 重构HTML
+            d['gt'] = rebuild_html_from_ppstructure_label(d)
+            fid.write('{}\n'.format(
+                json.dumps(
+                    d, ensure_ascii=False)))
+                    
+        # convert to PP-Structure label format
+        fid.close()
+        msg = 'JSON sucessfully saved in {}/gt.txt'.format(self.lastOpenDir)
        QMessageBox.information(self, "Information", msg)

    def autolcm(self):

--- a/PPOCRLabel/README.md
+++ b/PPOCRLabel/README.md
 English | [简体中文](README_ch.md)

-# PPOCRLabel
+# PPOCRLabelv2

-PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PP-OCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box, table and multi-point annotation modes. Annotations can be directly used for the training of PP-OCR detection and recognition models.
+PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PP-OCR model to automatically detect and re-recognize data. It is written in Python3 and PyQT5, supporting rectangular box, table, irregular text and key information annotation modes. Annotations can be directly used for the training of PP-OCR detection and recognition models.

-<img src="./data/gif/steps_en.gif" width="100%"/>
+|               regular text annotation               |                table annotation                |
+| :-------------------------------------------------: | :--------------------------------------------: |
+|  <img src="./data/gif/steps_en.gif" width="80%"/>   | <img src="./data/gif/table.gif" width="100%"/> |
+|            **irregular text annotation**            |         **key information annotation**         |
+| <img src="./data/gif/multi-point.gif" width="80%"/> |  <img src="./data/gif/kie.gif" width="100%"/>  |

 ### Recent Update


--- a/PPOCRLabel/README_ch.md
+++ b/PPOCRLabel/README_ch.md
 [English](README.md) | 简体中文

-# PPOCRLabel
+# PPOCRLabelv2

 PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具，内置PP-OCR模型对数据自动标注和重新识别。使用Python3和PyQT5编写，支持矩形框标注和四点标注模式，导出格式可直接用于PaddleOCR检测和识别模型的训练。

-<img src="./data/gif/steps.gif" width="100%"/>
+|                      常规标注                       |                    表格标注                    |
+| :-------------------------------------------------: | :--------------------------------------------: |
+|  <img src="./data/gif/steps_en.gif" width="80%"/>   | <img src="./data/gif/table.gif" width="100%"/> |
+|                 **不规则文本标注**                  |                **关键信息标注**                |
+| <img src="./data/gif/multi-point.gif" width="80%"/> |  <img src="./data/gif/kie.gif" width="100%"/>  |

 #### 近期更新
 - 2022.05：**新增表格标注**，使用方法见下方`2.2 表格标注`（by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest))

--- a/PPOCRLabel/libs/canvas.py
+++ b/PPOCRLabel/libs/canvas.py
@@ -627,7 +627,7 @@ class Canvas(QWidget):
        # adaptive BBOX label & index font size
        if self.pixmap:
            h, w = self.pixmap.size().height(), self.pixmap.size().width()
-            fontszie = int(max(h, w) / 96)
+            fontszie = int(max(h, w) / 48)
            for s in self.shapes:
                s.fontsize = fontszie
        

--- a/PPOCRLabel/libs/dataPartitionDialog.py
+++ b/PPOCRLabel/libs/dataPartitionDialog.py
-try:
-    from PyQt5.QtGui import *
-    from PyQt5.QtCore import *
-    from PyQt5.QtWidgets import *
-except ImportError:
-    from PyQt4.QtGui import *
-    from PyQt4.QtCore import *
-
-from libs.utils import newIcon
-
-import time
-import datetime
-import json
-import cv2
-import numpy as np
-
-
-BB = QDialogButtonBox
-
-class DataPartitionDialog(QDialog):
-    def __init__(self, parent=None):
-        super().__init__()
-        self.parnet = parent
-        self.title = 'DATA PARTITION'
-
-        self.train_ratio = 70
-        self.val_ratio = 15
-        self.test_ratio = 15
-        
-        self.initUI()
-
-    def initUI(self):
-        self.setWindowTitle(self.title)
-        self.setWindowModality(Qt.ApplicationModal)
-
-        self.flag_accept = True
-
-        if self.parnet.lang == 'ch':
-            msg = "导出JSON前请保存所有图像的标注且关闭EXCEL!"
-        else:
-            msg = "Please save all the annotations and close the EXCEL before exporting JSON!"
-
-        info_msg = QLabel(msg, self)
-        info_msg.setWordWrap(True)
-        info_msg.setStyleSheet("color: red")
-        info_msg.setFont(QFont('Arial', 12))
-
-        train_lbl = QLabel('Train split: ', self)
-        train_lbl.setFont(QFont('Arial', 15))
-        val_lbl = QLabel('Valid split: ', self)
-        val_lbl.setFont(QFont('Arial', 15))
-        test_lbl = QLabel('Test split: ', self)
-        test_lbl.setFont(QFont('Arial', 15))
-
-        self.train_input = QLineEdit(self)
-        self.train_input.setFont(QFont('Arial', 15))
-        self.val_input = QLineEdit(self)
-        self.val_input.setFont(QFont('Arial', 15))
-        self.test_input = QLineEdit(self)
-        self.test_input.setFont(QFont('Arial', 15))
-
-        self.train_input.setText(str(self.train_ratio))
-        self.val_input.setText(str(self.val_ratio))
-        self.test_input.setText(str(self.test_ratio))
-
-        validator = QIntValidator(0, 100)
-        self.train_input.setValidator(validator)
-        self.val_input.setValidator(validator)
-        self.test_input.setValidator(validator)
-
-        gridlayout = QGridLayout()
-        gridlayout.addWidget(info_msg, 0, 0, 1, 2)
-        gridlayout.addWidget(train_lbl, 1, 0)
-        gridlayout.addWidget(val_lbl, 2, 0)
-        gridlayout.addWidget(test_lbl, 3, 0)
-        gridlayout.addWidget(self.train_input, 1, 1)
-        gridlayout.addWidget(self.val_input, 2, 1)
-        gridlayout.addWidget(self.test_input, 3, 1)
-
-        bb = BB(BB.Ok | BB.Cancel, Qt.Horizontal, self)
-        bb.button(BB.Ok).setIcon(newIcon('done'))
-        bb.button(BB.Cancel).setIcon(newIcon('undo'))
-        bb.accepted.connect(self.validate)
-        bb.rejected.connect(self.cancel)
-        gridlayout.addWidget(bb, 4, 0, 1, 2)
-
-        self.setLayout(gridlayout)
-        
-        self.show()
-
-    def validate(self):
-        self.flag_accept = True
-        self.accept()
-
-    def cancel(self):
-        self.flag_accept = False
-        self.reject()
-    
-    def getStatus(self):
-        return self.flag_accept
-
-    def getDataPartition(self):
-        self.train_ratio = int(self.train_input.text())
-        self.val_ratio = int(self.val_input.text())
-        self.test_ratio = int(self.test_input.text())
-
-        return self.train_ratio, self.val_ratio, self.test_ratio
-
-    def closeEvent(self, event):
-        self.flag_accept = False
-        self.reject()
-
-
--- a/PPOCRLabel/libs/utils.py
+++ b/PPOCRLabel/libs/utils.py
@@ -176,18 +176,6 @@ def boxPad(box, imgShape, pad : int) -> np.array:
    return box


-def OBB2HBB(obb) -> np.array:
-    """
-    Convert Oriented Bounding Box to Horizontal Bounding Box.
-    """
-    hbb = np.zeros(4, dtype=np.int32)
-    hbb[0] = min(obb[:, 0])
-    hbb[1] = min(obb[:, 1])
-    hbb[2] = max(obb[:, 0])
-    hbb[3] = max(obb[:, 1])
-    return hbb
-
-
 def expand_list(merged, html_list):
    '''
    Fill blanks according to merged cells
@@ -232,6 +220,26 @@ def convert_token(html_list):
    return token_list


+def rebuild_html_from_ppstructure_label(label_info):
+        from html import escape
+        html_code = label_info['html']['structure']['tokens'].copy()
+        to_insert = [
+            i for i, tag in enumerate(html_code) if tag in ('<td>', '>')
+        ]
+        for i, cell in zip(to_insert[::-1], label_info['html']['cells'][::-1]):
+            if cell['tokens']:
+                cell = [
+                    escape(token) if len(token) == 1 else token
+                    for token in cell['tokens']
+                ]
+                cell = ''.join(cell)
+                html_code.insert(i + 1, cell)
+        html_code = ''.join(html_code)
+        html_code = '<html><body><table>{}</table></body></html>'.format(
+            html_code)
+        return html_code
+
+
 def stepsInfo(lang='en'):
    if lang == 'ch':
        msg = "1. 安装与运行：使用上述命令安装与运行程序。\n" \

--- a/PPOCRLabel/setup.py
+++ b/PPOCRLabel/setup.py
@@ -33,7 +33,7 @@ setup(
    package_dir={'PPOCRLabel': ''},
    include_package_data=True,
    entry_points={"console_scripts": ["PPOCRLabel= PPOCRLabel.PPOCRLabel:main"]},
-    version='1.0.2',
+    version='2.1.1',
    install_requires=requirements,
    license='Apache License 2.0',
    description='PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PPOCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box annotation and four-point annotation modes. Annotations can be directly used for the training of PPOCR detection and recognition models',

--- a/README.md
+++ b/README.md
@@ -26,17 +26,19 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
 </div>

 ## Recent updates
+- **🔥2022.8.24 Release PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)**
+  - Release [PP-Structurev2](./ppstructure/)，with functions and performance fully upgraded, adapted to Chinese scenes, and new support for [Layout Recovery](./ppstructure/recovery) and **one line command to convert PDF to Word**;
+  - [Layout Analysis](./ppstructure/layout) optimization: model storage reduced by 95%, while speed increased by 11 times, and the average CPU time-cost is only 41ms;
+  - [Table Recognition](./ppstructure/table) optimization: 3 optimization strategies are designed, and the model accuracy is improved by 6% under comparable time consumption;
+  - [Key Information Extraction](./ppstructure/kie) optimization：a visual-independent model structure is designed, the accuracy of semantic entity recognition is increased by 2.8%, and the accuracy of relation extraction is increased by 9.1%.
+  
+- **🔥2022.7 Release [OCR scene application collection](./applications/README_en.md)**
+    - Release **9 vertical models** such as digital tube, LCD screen, license plate, handwriting recognition model, high-precision SVTR model, etc, covering the main OCR vertical applications in general, manufacturing, finance, and transportation industries.
+
 - **🔥2022.5.9 Release PaddleOCR [release/2.5](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.5)**
    - Release [PP-OCRv3](./doc/doc_en/ppocr_introduction_en.md#pp-ocrv3): With comparable speed, the effect of Chinese scene is further improved by 5% compared with PP-OCRv2, the effect of English scene is improved by 11%, and the average recognition accuracy of 80 language multilingual models is improved by more than 5%.
    - Release [PPOCRLabelv2](./PPOCRLabel): Add the annotation function for table recognition task, key information extraction task and irregular text image.
    - Release interactive e-book [*"Dive into OCR"*](./doc/doc_en/ocr_book_en.md), covers the cutting-edge theory and code practice of OCR full stack technology.
- 2021.12.21 Release PaddleOCR [release/2.4](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4)
-    - Release 1 text detection algorithm (PSENet), 3 text recognition algorithms (NRTR、SEED、SAR).
-    - Release 1 key information extraction algorithm (SDMGR, [tutorial](./ppstructure/docs/kie_en.md)) and 3 [DocVQA](./ppstructure/vqa) algorithms (LayoutLM, LayoutLMv2, LayoutXLM).
- 2021.9.7 Release PaddleOCR [release/2.3](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.3)
-    - Release [PP-OCRv2](./doc/doc_en/ppocr_introduction_en.md#pp-ocrv2). The inference speed of PP-OCRv2 is 220% higher than that of PP-OCR server in CPU device. The F-score of PP-OCRv2 is 7% higher than that of PP-OCR mobile.
- 2021.8.3 Release PaddleOCR [release/2.2](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.2)
-    - Release a new structured documents analysis toolkit, i.e., [PP-Structure](./ppstructure/README.md), support layout analysis and table recognition (One-key to export chart images to Excel files).

 - [more](./doc/doc_en/update_en.md)

@@ -45,7 +47,9 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools

 PaddleOCR support a variety of cutting-edge algorithms related to OCR, and developed industrial featured models/solution [PP-OCR](./doc/doc_en/ppocr_introduction_en.md) and [PP-Structure](./ppstructure/README.md) on this basis, and get through the whole process of data production, model training, compression, inference and deployment.

-![](./doc/features_en.png)
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/25809855/186171245-40abc4d7-904f-4949-ade1-250f86ed3a90.png">
+</div>

 > It is recommended to start with the “quick experience” in the document tutorial

@@ -113,18 +117,19 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel
    - [Quick Start](./ppstructure/docs/quickstart_en.md)
    - [Model Zoo](./ppstructure/docs/models_list_en.md)
    - [Model training](./doc/doc_en/training_en.md)  
-        - [Layout Parser](./ppstructure/layout/README.md)
+        - [Layout Analysis](./ppstructure/layout/README.md)
        - [Table Recognition](./ppstructure/table/README.md)
-        - [DocVQA](./ppstructure/vqa/README.md)
-        - [Key Information Extraction](./ppstructure/docs/kie_en.md)
+        - [Key Information Extraction](./ppstructure/kie/README.md)
    - [Inference and Deployment](./deploy/README.md)
        - [Python Inference](./ppstructure/docs/inference_en.md)
-        - [C++ Inference]()
-        - [Serving](./deploy/pdserving/README.md)
- [Academic algorithms](./doc/doc_en/algorithms_en.md)
+        - [C++ Inference](./deploy/cpp_infer/readme.md)
+        - [Serving](./deploy/hubserving/readme_en.md)
+- [Academic Algorithms](./doc/doc_en/algorithm_overview_en.md)
    - [Text detection](./doc/doc_en/algorithm_overview_en.md)
    - [Text recognition](./doc/doc_en/algorithm_overview_en.md)
-    - [End-to-end](./doc/doc_en/algorithm_overview_en.md)
+    - [End-to-end OCR](./doc/doc_en/algorithm_overview_en.md)
+    - [Table Recognition](./doc/doc_en/algorithm_overview_en.md)
+    - [Key Information Extraction](./doc/doc_en/algorithm_overview_en.md)    
    - [Add New Algorithms to PaddleOCR](./doc/doc_en/add_new_algorithm_en.md)
 - Data Annotation and Synthesis
    - [Semi-automatic Annotation Tool: PPOCRLabel](./PPOCRLabel/README.md)
@@ -135,9 +140,9 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel
    - [General OCR Datasets(Chinese/English)](doc/doc_en/dataset/datasets_en.md)
    - [HandWritten_OCR_Datasets(Chinese)](doc/doc_en/dataset/handwritten_datasets_en.md)
    - [Various OCR Datasets(multilingual)](doc/doc_en/dataset/vertical_and_multilingual_datasets_en.md)
-    - [layout analysis](doc/doc_en/dataset/layout_datasets_en.md)
-    - [table recognition](doc/doc_en/dataset/table_datasets_en.md)
-    - [DocVQA](doc/doc_en/dataset/docvqa_datasets_en.md)
+    - [Layout Analysis](doc/doc_en/dataset/layout_datasets_en.md)
+    - [Table Recognition](doc/doc_en/dataset/table_datasets_en.md)
+    - [Key Information Extraction](doc/doc_en/dataset/kie_datasets_en.md)
 - [Code Structure](./doc/doc_en/tree_en.md)
 - [Visualization](#Visualization)
 - [Community](#Community)
@@ -176,7 +181,7 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel
 </details>

 <details open>
-<summary>PP-Structure</summary>
+<summary>PP-Structurev2</summary>

 - layout analysis + table recognition  
 <div align="center">
@@ -185,12 +190,28 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel

 - SER (Semantic entity recognition)
 <div align="center">
-    <img src="./ppstructure/docs/vqa/result_ser/zh_val_0_ser.jpg" width="800">
+    <img src="https://user-images.githubusercontent.com/25809855/186094456-01a1dd11-1433-4437-9ab2-6480ac94ec0a.png" width="600">
+</div>
+    
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185310636-6ce02f7c-790d-479f-b163-ea97a5a04808.jpg" width="600">
+</div>
+
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185539517-ccf2372a-f026-4a7c-ad28-c741c770f60a.png" width="600">
 </div>

 - RE (Relation Extraction)
 <div align="center">
-    <img src="./ppstructure/docs/vqa/result_re/zh_val_21_re.jpg" width="800">
+    <img src="https://user-images.githubusercontent.com/25809855/186094813-3a8e16cc-42e5-4982-b9f4-0134dfb5688d.png" width="600">
+</div>   
+
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185393805-c67ff571-cf7e-4217-a4b0-8b396c4f22bb.jpg" width="600">
+</div>
+
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185540080-0431e006-9235-4b6d-b63d-0b3c6e1de48f.jpg" width="600">
 </div>

 </details>

--- a/README_ch.md
+++ b/README_ch.md
@@ -27,28 +27,20 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力

 ## 近期更新

- **🔥2022.5.11~13 每晚8：30【超强OCR技术详解与产业应用实战】三日直播课**
-  - 11日：开源最强OCR系统PP-OCRv3揭秘
-  - 12日：云边端全覆盖的PP-OCRv3训练部署实战
-  - 13日：OCR产业应用全流程拆解与实战
+- **🔥2022.8.24 发布 PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)**
+  - 发布[PP-Structurev2](./ppstructure/)，系统功能性能全面升级，适配中文场景，新增支持[版面复原](./ppstructure/recovery)，支持**一行命令完成PDF转Word**；
+  - [版面分析](./ppstructure/layout)模型优化：模型存储减少95%，速度提升11倍，平均CPU耗时仅需41ms；
+  - [表格识别](./ppstructure/table)模型优化：设计3大优化策略，预测耗时不变情况下，模型精度提升6%；
+  - [关键信息抽取](./ppstructure/kie)模型优化：设计视觉无关模型结构，语义实体识别精度提升2.8%，关系抽取精度提升9.1%。
  
-   赶紧扫码报名吧！
-<div align="center">
-<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG"  width = "150" height = "150" />
-</div>
-
- **🔥2022.5.9 发布PaddleOCR [release/2.5](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.5)**
+- **🔥2022.8 发布 [OCR场景应用集合](./applications)**
+  - 包含数码管、液晶屏、车牌、高精度SVTR模型、手写体识别等**9个垂类模型**，覆盖通用，制造、金融、交通行业的主要OCR垂类应用。
+  
+- **2022.5.9 发布 PaddleOCR [release/2.5](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.5)**
    - 发布[PP-OCRv3](./doc/doc_ch/ppocr_introduction.md#pp-ocrv3)，速度可比情况下，中文场景效果相比于PP-OCRv2再提升5%，英文场景提升11%，80语种多语言模型平均识别准确率提升5%以上；
    - 发布半自动标注工具[PPOCRLabelv2](./PPOCRLabel)：新增表格文字图像、图像关键信息抽取任务和不规则文字图像的标注功能；
    - 发布OCR产业落地工具集：打通22种训练部署软硬件环境与方式，覆盖企业90%的训练部署环境需求；
    - 发布交互式OCR开源电子书[《动手学OCR》](./doc/doc_ch/ocr_book.md)，覆盖OCR全栈技术的前沿理论与代码实践，并配套教学视频。
- 2021.12.21 发布PaddleOCR [release/2.4](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4)
-    - OCR算法新增1种文本检测算法（[PSENet](./doc/doc_ch/algorithm_det_psenet.md)），3种文本识别算法（[NRTR](./doc/doc_ch/algorithm_rec_nrtr.md)、[SEED](./doc/doc_ch/algorithm_rec_seed.md)、[SAR](./doc/doc_ch/algorithm_rec_sar.md)）；
-    - 文档结构化算法新增1种关键信息提取算法（[SDMGR](./ppstructure/docs/kie.md)），3种[DocVQA](./ppstructure/vqa)算法（LayoutLM、LayoutLMv2，LayoutXLM）。
- 2021.9.7 发布PaddleOCR [release/2.3](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.3)
-    - 发布[PP-OCRv2](./doc/doc_ch/ppocr_introduction.md#pp-ocrv2)，CPU推理速度相比于PP-OCR server提升220%；效果相比于PP-OCR mobile 提升7%。
- 2021.8.3 发布PaddleOCR [release/2.2](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.2)
-    - 发布文档结构分析[PP-Structure](./ppstructure/README_ch.md)工具包，支持版面分析与表格识别（含Excel导出）。

 > [更多](./doc/doc_ch/update.md)

@@ -56,7 +48,9 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力

 支持多种OCR相关前沿算法，在此基础上打造产业级特色模型[PP-OCR](./doc/doc_ch/ppocr_introduction.md)和[PP-Structure](./ppstructure/README_ch.md)，并打通数据生产、模型训练、压缩、预测部署全流程。

-![](./doc/features.png)
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/25809855/186170862-b8f80f6c-fee7-4b26-badc-de9c327c76ce.png">
+</div>

 > 上述内容的使用方法建议从文档教程中的快速开始体验

@@ -71,24 +65,22 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 ## 《动手学OCR》电子书
 - [《动手学OCR》电子书📚](./doc/doc_ch/ocr_book.md)

-## 场景应用
- PaddleOCR场景应用覆盖通用，制造、金融、交通行业的主要OCR垂类应用，在PP-OCR、PP-Structure的通用能力基础之上，以notebook的形式展示利用场景数据微调、模型优化方法、数据增广等内容，为开发者快速落地OCR应用提供示范与启发。详情可查看[README](./applications)。

 <a name="开源社区"></a>
 ## 开源社区
-
+- **项目合作📑：** 如果您是企业开发者且有明确的OCR垂类应用需求，填写[问卷](https://paddle.wjx.cn/vj/QwF7GKw.aspx)后可免费与官方团队展开不同层次的合作。
 - **加入社区👬：** 微信扫描二维码并填写问卷之后，加入交流群领取福利
-  - **获取5月11-13日每晚20:30《OCR超强技术详解与产业应用实战》的直播课链接**
+  - **获取PaddleOCR最新发版解说《OCR超强技术详解与产业应用实战》系列直播课回放链接**
  - **10G重磅OCR学习大礼包：**《动手学OCR》电子书，配套讲解视频和notebook项目；66篇OCR相关顶会前沿论文打包放送，包括CVPR、AAAI、IJCAI、ICCV等；PaddleOCR历次发版直播课视频；OCR社区优秀开发者项目分享视频。
-
- **社区贡献**🏅️：[社区贡献](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等，是官方为社区开发者打造的荣誉墙，也是帮助优质项目宣传的广播站。
+- **社区项目**🏅️：[社区项目](./doc/doc_ch/thirdparty.md)文档中包含了社区用户**使用PaddleOCR开发的各种工具、应用**以及**为PaddleOCR贡献的功能、优化的文档与代码**等，是官方为社区开发者打造的荣誉墙，也是帮助优质项目宣传的广播站。
 - **社区常规赛**🎁：社区常规赛是面向OCR开发者的积分赛事，覆盖文档、代码、模型和应用四大类型，以季度为单位评选并发放奖励，赛题详情与报名方法可参考[链接](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)。

 <div align="center">
-<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG"  width = "200" height = "200" />
+<img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/dygraph/doc/joinus.PNG"  width = "150" height = "150" />
 </div>


+
 <a name="模型下载"></a>
 ## PP-OCR系列模型列表（更新中）

@@ -96,14 +88,21 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 | ------------------------------------- | ----------------------- | --------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
 | 中英文超轻量PP-OCRv3模型（16.2M）     | ch_PP-OCRv3_xx          | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) |
 | 英文超轻量PP-OCRv3模型（13.4M）     | en_PP-OCRv3_xx          | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) |
-| 中英文超轻量PP-OCRv2模型（13.0M）     | ch_PP-OCRv2_xx          | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar) |
-| 中英文超轻量PP-OCR mobile模型（9.4M） | ch_ppocr_mobile_v2.0_xx | 移动端&服务器端 | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) |
-| 中英文通用PP-OCR server模型（143.4M） | ch_ppocr_server_v2.0_xx | 服务器端        | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) | [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) |

-更多模型下载（包括多语言），可以参考[PP-OCR 系列模型下载](./doc/doc_ch/models_list.md)，文档分析相关模型参考[PP-Structure 系列模型下载](./ppstructure/docs/models_list.md)
+- 超轻量OCR系列更多模型下载（包括多语言），可以参考[PP-OCR系列模型下载](./doc/doc_ch/models_list.md)，文档分析相关模型参考[PP-Structure系列模型下载](./ppstructure/docs/models_list.md)
+
+### PaddleOCR场景应用模型

+| 行业 | 类别         | 亮点                               | 文档说明                                                     | 模型下载                                      |
+| ---- | ------------ | ---------------------------------- | ------------------------------------------------------------ | --------------------------------------------- |
+| 制造 | 数码管识别   | 数码管数据合成、漏识别调优         | [光功率计数码管字符识别](./applications/光功率计数码管字符识别/光功率计数码管字符识别.md) | [下载链接](./applications/README.md#模型下载) |
+| 金融 | 通用表单识别 | 多模态通用表单结构化提取           | [多模态表单识别](./applications/多模态表单识别.md)           | [下载链接](./applications/README.md#模型下载) |
+| 交通 | 车牌识别     | 多角度图像处理、轻量模型、端侧部署 | [轻量级车牌识别](./applications/轻量级车牌识别.md)           | [下载链接](./applications/README.md#模型下载) |
+
+- 更多制造、金融、交通行业的主要OCR垂类应用模型（如电表、液晶屏、高精度SVTR模型等），可参考[场景应用模型下载](./applications)

 <a name="文档教程"></a>
+
 ## 文档教程

 - [运行环境准备](./doc/doc_ch/environment.md)
@@ -120,7 +119,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
        - [知识蒸馏](./doc/doc_ch/knowledge_distillation.md)
    - [推理部署](./deploy/README_ch.md)
        - [基于Python预测引擎推理](./doc/doc_ch/inference_ppocr.md)
-        - [基于C++预测引擎推理](./deploy/cpp_infer/readme.md)
+        - [基于C++预测引擎推理](./deploy/cpp_infer/readme_ch.md)
        - [服务化部署](./deploy/pdserving/README_CN.md)
        - [端侧部署](./deploy/lite/readme.md)
        - [Paddle2ONNX模型转化与预测](./deploy/paddle2onnx/readme.md)
@@ -132,16 +131,17 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
    - [模型训练](./doc/doc_ch/training.md)
        - [版面分析](./ppstructure/layout/README_ch.md)
        - [表格识别](./ppstructure/table/README_ch.md)
-        - [关键信息提取](./ppstructure/docs/kie.md)
-        - [DocVQA](./ppstructure/vqa/README_ch.md)
+        - [关键信息提取](./ppstructure/kie/README_ch.md)
    - [推理部署](./deploy/README_ch.md)
        - [基于Python预测引擎推理](./ppstructure/docs/inference.md)
-        - [基于C++预测引擎推理]()
-        - [服务化部署](./deploy/pdserving/README_CN.md)
- [前沿算法与模型🚀](./doc/doc_ch/algorithm.md)
-    - [文本检测算法](./doc/doc_ch/algorithm_overview.md#11-%E6%96%87%E6%9C%AC%E6%A3%80%E6%B5%8B%E7%AE%97%E6%B3%95)
-    - [文本识别算法](./doc/doc_ch/algorithm_overview.md#12-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
-    - [端到端算法](./doc/doc_ch/algorithm_overview.md#2-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
+        - [基于C++预测引擎推理](./deploy/cpp_infer/readme_ch.md)
+        - [服务化部署](./deploy/hubserving/readme.md)
+- [前沿算法与模型🚀](./doc/doc_ch/algorithm_overview.md)
+    - [文本检测算法](./doc/doc_ch/algorithm_overview.md)
+    - [文本识别算法](./doc/doc_ch/algorithm_overview.md)
+    - [端到端OCR算法](./doc/doc_ch/algorithm_overview.md)
+    - [表格识别算法](./doc/doc_ch/algorithm_overview.md)
+    - [关键信息抽取算法](./doc/doc_ch/algorithm_overview.md)
    - [使用PaddleOCR架构添加新算法](./doc/doc_ch/add_new_algorithm.md)
 - [场景应用](./applications)
 - 数据标注与合成
@@ -155,7 +155,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
    - [垂类多语言OCR数据集](doc/doc_ch/dataset/vertical_and_multilingual_datasets.md)
    - [版面分析数据集](doc/doc_ch/dataset/layout_datasets.md)
    - [表格识别数据集](doc/doc_ch/dataset/table_datasets.md)
-    - [DocVQA数据集](doc/doc_ch/dataset/docvqa_datasets.md)
+    - [关键信息提取数据集](doc/doc_ch/dataset/kie_datasets.md)
 - [代码组织结构](./doc/doc_ch/tree.md)
 - [效果展示](#效果展示)
 - [《动手学OCR》电子书📚](./doc/doc_ch/ocr_book.md)
@@ -214,14 +214,30 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力

 - SER（语义实体识别）  
 <div align="center">
-    <img src="./ppstructure/docs/vqa/result_ser/zh_val_0_ser.jpg" width="800">
+    <img src="https://user-images.githubusercontent.com/14270174/185310636-6ce02f7c-790d-479f-b163-ea97a5a04808.jpg" width="600">
 </div>

+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185539517-ccf2372a-f026-4a7c-ad28-c741c770f60a.png" width="600">
+</div>
+    
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/25809855/186094456-01a1dd11-1433-4437-9ab2-6480ac94ec0a.png" width="600">
+</div>
+    
 - RE（关系提取）
 <div align="center">
-    <img src="./ppstructure/docs/vqa/result_re/zh_val_21_re.jpg" width="800">
+    <img src="https://user-images.githubusercontent.com/14270174/185393805-c67ff571-cf7e-4217-a4b0-8b396c4f22bb.jpg" width="600">
+</div>
+
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185540080-0431e006-9235-4b6d-b63d-0b3c6e1de48f.jpg" width="600">
 </div>

+<div align="center">
+    <img src="https://user-images.githubusercontent.com/25809855/186094813-3a8e16cc-42e5-4982-b9f4-0134dfb5688d.png" width="600">
+</div>
+    
 </details>

 <a name="许可证书"></a>

--- a/__init__.py
+++ b/__init__.py
@@ -16,5 +16,6 @@ from .paddleocr import *
 __version__ = paddleocr.VERSION
 __all__ = [
    'PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result',
-    'save_structure_res', 'download_with_progressbar'
+    'save_structure_res', 'download_with_progressbar', 'sorted_layout_boxes',
+    'convert_info_docx', 'to_excel'
 ]
--- a/applications/README.md
+++ b/applications/README.md
@@ -20,10 +20,10 @@ PaddleOCR场景应用覆盖通用，制造、金融、交通行业的主要OCR

 ### 通用

-| 类别                   | 亮点         | 模型下载       | 教程                                    |
-| ---------------------- | ------------ | -------------- | --------------------------------------- |
-| 高精度中文识别模型SVTR | 比PP-OCRv3识别模型精度高3%，可用于数据挖掘或对预测效率要求不高的场景。| [模型下载](#2) | [中文](./高精度中文识别模型.md)/English |
-| 手写体识别             | 新增字形支持 |                |                                         |
+| 类别                   | 亮点                                                         | 模型下载       | 教程                                    | 示例图                                                       |
+| ---------------------- | ------------------------------------------------------------ | -------------- | --------------------------------------- | ------------------------------------------------------------ |
+| 高精度中文识别模型SVTR | 比PP-OCRv3识别模型精度高3%，<br />可用于数据挖掘或对预测效率要求不高的场景。 | [模型下载](#2) | [中文](./高精度中文识别模型.md)/English | <img src="../doc/ppocr_v3/svtr_tiny.png" width=200>          |
+| 手写体识别             | 新增字形支持                                                 | [模型下载](#2) | [中文](./手写文字识别.md)/English       | <img src="https://ai-studio-static-online.cdn.bcebos.com/7a8865b2836f42d382e7c3fdaedc4d307d797fa2bcd0466e9f8b7705efff5a7b"  width = "200" height = "100" /> |

 <a name="12"></a>

@@ -42,14 +42,14 @@ PaddleOCR场景应用覆盖通用，制造、金融、交通行业的主要OCR

 ### 金融

-| 类别           | 亮点                     | 模型下载       | 教程                                | 示例图                                                       |
-| -------------- | ------------------------ | -------------- | ----------------------------------- | ------------------------------------------------------------ |
-| 表单VQA        | 多模态通用表单结构化提取 | [模型下载](#2) | [中文](./多模态表单识别.md)/English | <img src="https://ai-studio-static-online.cdn.bcebos.com/a3b25766f3074d2facdf88d4a60fc76612f51992fd124cf5bd846b213130665b"  width = "200" height = "200" /> |
-| 增值税发票     | 尽请期待                 |                |                                     |                                                              |
-| 印章检测与识别 | 端到端弯曲文本识别       |                |                                     |                                                              |
-| 通用卡证识别   | 通用结构化提取           |                |                                     |                                                              |
-| 身份证识别     | 结构化提取、图像阴影     |                |                                     |                                                              |
-| 合同比对       | 密集文本检测、NLP串联    |                |                                     |                                                              |
+| 类别           | 亮点                          | 模型下载       | 教程                                  | 示例图                                                       |
+| -------------- | ----------------------------- | -------------- | ------------------------------------- | ------------------------------------------------------------ |
+| 表单VQA        | 多模态通用表单结构化提取      | [模型下载](#2) | [中文](./多模态表单识别.md)/English   | <img src="https://ai-studio-static-online.cdn.bcebos.com/a3b25766f3074d2facdf88d4a60fc76612f51992fd124cf5bd846b213130665b"  width = "200" height = "200" /> |
+| 增值税发票     | 关键信息抽取，SER、RE任务训练 | [模型下载](#2) | [中文](./发票关键信息抽取.md)/English | <img src="https://user-images.githubusercontent.com/14270174/185393805-c67ff571-cf7e-4217-a4b0-8b396c4f22bb.jpg"  width = "200"  /> |
+| 印章检测与识别 | 端到端弯曲文本识别            |                |                                       |                                                              |
+| 通用卡证识别   | 通用结构化提取                |                |                                       |                                                              |
+| 身份证识别     | 结构化提取、图像阴影          |                |                                       |                                                              |
+| 合同比对       | 密集文本检测、NLP串联         |                |                                       |                                                              |

 <a name="14"></a>


--- a/applications/README_en.md
+++ b/applications/README_en.md
+English| [简体中文](README.md) 
+
+# Application
+
+PaddleOCR scene application covers general, manufacturing, finance, transportation industry of the main OCR vertical applications, on the basis of the general capabilities of PP-OCR, PP-Structure, in the form of notebook to show the use of scene data fine-tuning, model optimization methods, data augmentation and other content, for developers to quickly land OCR applications to provide demonstration and inspiration.
+
+- [Tutorial](#1)
+  - [General](#11)
+  - [Manufacturing](#12)
+  - [Finance](#13)
+  - [Transportation](#14)
+
+- [Model Download](#2)
+
+<a name="1"></a>
+
+## Tutorial
+
+<a name="11"></a>
+
+### General
+
+| Case                                           | Feature          | Model Download       | Tutorial                                | Example                                                      |
+| ---------------------------------------------- | ---------------- | -------------------- | --------------------------------------- | ------------------------------------------------------------ |
+| High-precision Chineses recognition model SVTR | New model        | [Model Download](#2) | [中文](./高精度中文识别模型.md)/English | <img src="../doc/ppocr_v3/svtr_tiny.png" width=200>          |
+| Chinese handwriting recognition                | New font support | [Model Download](#2) | [中文](./手写文字识别.md)/English       | <img src="https://ai-studio-static-online.cdn.bcebos.com/7a8865b2836f42d382e7c3fdaedc4d307d797fa2bcd0466e9f8b7705efff5a7b"  width = "200" height = "100" /> |
+
+<a name="12"></a>
+
+### Manufacturing
+
+| Case                           | Feature                                                      | Model Download       | Tutorial                                                     | Example                                                      |
+| ------------------------------ | ------------------------------------------------------------ | -------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
+| Digital tube                   | Digital tube data sythesis, recognition model fine-tuning    | [Model Download](#2) | [中文](./光功率计数码管字符识别/光功率计数码管字符识别.md)/English | <img src="https://ai-studio-static-online.cdn.bcebos.com/7d5774a273f84efba5b9ce7fd3f86e9ef24b6473e046444db69fa3ca20ac0986"  width = "200" height = "100" /> |
+| LCD screen                     | Detection model distillation, serving deployment             | [Model Download](#2) | [中文](./液晶屏读数识别.md)/English                          | <img src="https://ai-studio-static-online.cdn.bcebos.com/901ab741cb46441ebec510b37e63b9d8d1b7c95f63cc4e5e8757f35179ae6373"  width = "200" height = "100" /> |
+| Packaging production data      | Dot matrix character synthesis, overexposure and overdark text recognition | [Model Download](#2) | [中文](./包装生产日期识别.md)/English                        | <img src="https://ai-studio-static-online.cdn.bcebos.com/d9e0533cc1df47ffa3bbe99de9e42639a3ebfa5bce834bafb1ca4574bf9db684"  width = "200" height = "100" /> |
+| PCB text recognition           | Small size text detection and recognition                    | [Model Download](#2) | [中文](./PCB字符识别/PCB字符识别.md)/English                 | <img src="https://ai-studio-static-online.cdn.bcebos.com/95d8e95bf1ab476987f2519c0f8f0c60a0cdc2c444804ed6ab08f2f7ab054880"  width = "200" height = "100" /> |
+| Meter text recognition         | High-resolution image detection fine-tuning                  | [Model Download](#2) |                                                              |                                                              |
+| LCD character defect detection | Non-text character recognition                               |                      |                                                              |                                                              |
+
+<a name="13"></a>
+
+### Finance
+
+| Case                                | Feature                                            | Model Download       | Tutorial                              | Example                                                      |
+| ----------------------------------- | -------------------------------------------------- | -------------------- | ------------------------------------- | ------------------------------------------------------------ |
+| Form visual question and answer     | Multimodal general form structured extraction      | [Model Download](#2) | [中文](./多模态表单识别.md)/English   | <img src="https://ai-studio-static-online.cdn.bcebos.com/a3b25766f3074d2facdf88d4a60fc76612f51992fd124cf5bd846b213130665b"  width = "200" height = "200" /> |
+| VAT invoice                         | Key information extraction, SER, RE task fine-tune | [Model Download](#2) | [中文](./发票关键信息抽取.md)/English | <img src="https://user-images.githubusercontent.com/14270174/185393805-c67ff571-cf7e-4217-a4b0-8b396c4f22bb.jpg"  width = "200"  /> |
+| Seal detection and recognition      | End-to-end curved text recognition                 |                      |                                       |                                                              |
+| Universal card recognition          | Universal structured extraction                    |                      |                                       |                                                              |
+| ID card recognition                 | Structured extraction, image shading               |                      |                                       |                                                              |
+| Contract key information extraction | Dense text detection, NLP concatenation            |                      |                                       |                                                              |
+
+<a name="14"></a>
+
+### Transportation
+
+| Case                                            | Feature                                                      | Model Download       | Tutorial                            | Example                                                      |
+| ----------------------------------------------- | ------------------------------------------------------------ | -------------------- | ----------------------------------- | ------------------------------------------------------------ |
+| License plate recognition                       | Multi-angle images, lightweight models, edge-side deployment | [Model Download](#2) | [中文](./轻量级车牌识别.md)/English | <img src="https://ai-studio-static-online.cdn.bcebos.com/76b6a0939c2c4cf49039b6563c4b28e241e11285d7464e799e81c58c0f7707a7"  width = "200" height = "100" /> |
+| Driver's license/driving license identification | coming soon                                                  |                      |                                     |                                                              |
+| Express text recognition                        | coming soon                                                  |                      |                                     |                                                              |
+
+<a name="2"></a>
+
+## Model Download
+
+- For international developers: We're building a way to download these trained models, and since the current tutorials are Chinese, if you are good at both Chinese and English, or willing to polish English documents, please let us know in [discussion](https://github.com/PaddlePaddle/PaddleOCR/discussions).
+- For Chinese developer: If you want to download the trained application model in the above scenarios, scan the QR code below with your WeChat, follow the PaddlePaddle official account to fill in the questionnaire, and join the PaddleOCR official group to get the 20G OCR learning materials (including "Dive into OCR" e-book, course video, application models and other materials)
+
+  <div align="center">
+  <img src="https://ai-studio-static-online.cdn.bcebos.com/dd721099bd50478f9d5fb13d8dd00fad69c22d6848244fd3a1d3980d7fefc63e"  width = "150" height = "150" />
+  </div>
+
+  If you are an enterprise developer and have not found a suitable solution in the above scenarios, you can fill in the [OCR Application Cooperation Survey Questionnaire](https://paddle.wjx.cn/vj/QwF7GKw.aspx) to carry out different levels of cooperation with the official team **for free**, including but not limited to problem abstraction, technical solution determination, project Q&A, joint research and development, etc. If you have already used paddleOCR in your project, you can also fill out this questionnaire to jointly  promote with the PaddlePaddle and enhance the technical publicity of enterprises. Looking forward to your submission!
+
+<a href="https://trackgit.com">
+<img src="https://us-central1-trackgit-analytics.cloudfunctions.net/token/ping/l6u6aszdfexs2jnrlil6" alt="trackgit-views" />
+</a>
--- a/applications/发票关键信息抽取.md
+++ b/applications/发票关键信息抽取.md
+
+# 基于VI-LayoutXLM的发票关键信息抽取
+
+- [1. 项目背景及意义](#1-项目背景及意义)
+- [2. 项目内容](#2-项目内容)
+- [3. 安装环境](#3-安装环境)
+- [4. 关键信息抽取](#4-关键信息抽取)
+  - [4.1 文本检测](#41-文本检测)
+  - [4.2 文本识别](#42-文本识别)
+  - [4.3 语义实体识别](#43-语义实体识别)
+  - [4.4 关系抽取](#44-关系抽取)
+
+
+
+## 1. 项目背景及意义
+
+关键信息抽取在文档场景中被广泛使用，如身份证中的姓名、住址信息抽取，快递单中的姓名、联系方式等关键字段内容的抽取。传统基于模板匹配的方案需要针对不同的场景制定模板并进行适配，较为繁琐，不够鲁棒。基于该问题，我们借助飞桨提供的PaddleOCR套件中的关键信息抽取方案，实现对增值税发票场景的关键信息抽取。
+
+## 2. 项目内容
+
+本项目基于PaddleOCR开源套件，以VI-LayoutXLM多模态关键信息抽取模型为基础，针对增值税发票场景进行适配，提取该场景的关键信息。
+
+## 3. 安装环境
+
+```bash
+# 首先git官方的PaddleOCR项目，安装需要的依赖
+# 第一次运行打开该注释
+git clone https://gitee.com/PaddlePaddle/PaddleOCR.git
+cd PaddleOCR
+# 安装PaddleOCR的依赖
+pip install -r requirements.txt
+# 安装关键信息抽取任务的依赖
+pip install -r ./ppstructure/kie/requirements.txt
+```
+
+## 4. 关键信息抽取
+
+基于文档图像的关键信息抽取包含3个部分：（1）文本检测（2）文本识别（3）关键信息抽取方法，包括语义实体识别或者关系抽取，下面分别进行介绍。
+
+### 4.1 文本检测
+
+
+本文重点关注发票的关键信息抽取模型训练与预测过程，因此在关键信息抽取过程中，直接使用标注的文本检测与识别标注信息进行测试，如果你希望自定义该场景的文本检测模型，完成端到端的关键信息抽取部分，请参考[文本检测模型训练教程](../doc/doc_ch/detection.md)，按照训练数据格式准备数据，并完成该场景下垂类文本检测模型的微调过程。
+
+
+### 4.2 文本识别
+
+本文重点关注发票的关键信息抽取模型训练与预测过程，因此在关键信息抽取过程中，直接使用提供的文本检测与识别标注信息进行测试，如果你希望自定义该场景的文本检测模型，完成端到端的关键信息抽取部分，请参考[文本识别模型训练教程](../doc/doc_ch/recognition.md)，按照训练数据格式准备数据，并完成该场景下垂类文本识别模型的微调过程。
+
+### 4.3 语义实体识别 （Semantic Entity Recognition）
+
+语义实体识别指的是给定一段文本行，确定其类别（如`姓名`、`住址`等类别）。PaddleOCR中提供了基于VI-LayoutXLM的多模态语义实体识别方法，融合文本、位置与版面信息，相比LayoutXLM多模态模型，去除了其中的视觉骨干网络特征提取部分，引入符合阅读顺序的文本行排序方法，同时使用UDML联合互蒸馏方法进行训练，最终在精度与速度方面均超越LayoutXLM。更多关于VI-LayoutXLM的算法介绍与精度指标，请参考：[VI-LayoutXLM算法介绍](../doc/doc_ch/algorithm_kie_vi_layoutxlm.md)。
+
+#### 4.3.1 准备数据
+
+发票场景为例，我们首先需要标注出其中的关键字段，我们将其标注为`问题-答案`的key-value pair，如下，编号No为12270830，则`No`字段标注为question，`12270830`字段标注为answer。如下图所示。
+
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185381131-76b6e260-04fe-46d9-baca-6bdd7fe0d0ce.jpg" width="800">
+</div>
+
+**注意：**
+
+* 如果文本检测模型数据标注过程中，没有标注 **非关键信息内容** 的检测框，那么在标注关键信息抽取任务的时候，也不需要标注该部分，如上图所示；如果标注的过程，如果同时标注了**非关键信息内容** 的检测框，那么我们需要将该部分的label记为other。
+* 标注过程中，需要以文本行为单位进行标注，无需标注单个字符的位置信息。
+
+
+已经处理好的增值税发票数据集从这里下载：[增值税发票数据集下载链接](https://aistudio.baidu.com/aistudio/datasetdetail/165561)。
+
+下载好发票数据集，并解压在train_data目录下，目录结构如下所示。
+
+```
+train_data
+  |--zzsfp
+       |---class_list.txt
+       |---imgs/
+       |---train.json
+       |---val.json
+```
+
+其中`class_list.txt`是包含`other`, `question`, `answer`，3个种类的的类别列表（不区分大小写），`imgs`目录底下，`train.json`与`val.json`分别表示训练与评估集合的标注文件。训练集中包含30张图片，验证集中包含8张图片。部分标注如下所示。
+
+```py
+b33.jpg [{"transcription": "No", "label": "question", "points": [[2882, 472], [3026, 472], [3026, 588], [2882, 588]], }, {"transcription": "12269563", "label": "answer", "points": [[3066, 448], [3598, 448], [3598, 576], [3066, 576]], ]}]
+```
+
+相比于OCR检测的标注，仅多了`label`字段。
+
+
+#### 4.3.2 开始训练
+
+
+VI-LayoutXLM的配置为[ser_vi_layoutxlm_xfund_zh_udml.yml](../configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml)，需要修改数据、类别数目以及配置文件。
+
+```yml
+Architecture:
+  model_type: &model_type "kie"
+  name: DistillationModel
+  algorithm: Distillation
+  Models:
+    Teacher:
+      pretrained:
+      freeze_params: false
+      return_all_feats: true
+      model_type: *model_type
+      algorithm: &algorithm "LayoutXLM"
+      Transform:
+      Backbone:
+        name: LayoutXLMForSer
+        pretrained: True
+        # one of base or vi
+        mode: vi
+        checkpoints:
+        # 定义类别数目
+        num_classes: &num_classes 5
+   ...
+
+PostProcess:
+  name: DistillationSerPostProcess
+  model_name: ["Student", "Teacher"]
+  key: backbone_out
+  # 定义类别文件
+  class_path: &class_path train_data/zzsfp/class_list.txt
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    # 定义训练数据目录与标注文件
+    data_dir: train_data/zzsfp/imgs
+    label_file_list:
+      - train_data/zzsfp/train.json
+  ...
+
+Eval:
+  dataset:
+    # 定义评估数据目录与标注文件
+    name: SimpleDataSet
+    data_dir: train_data/zzsfp/imgs
+    label_file_list:
+      - train_data/zzsfp/val.json
+  ...
+```
+
+LayoutXLM与VI-LayoutXLM针对该场景的训练结果如下所示。
+
+| 模型 | 迭代轮数 | Hmean |
+| :---: | :---: | :---: |
+| LayoutXLM | 50 | 100% |
+| VI-LayoutXLM | 50 | 100% |
+
+可以看出，由于当前数据量较少，场景比较简单，因此2个模型的Hmean均达到了100%。
+
+
+#### 4.3.3 模型评估
+
+模型训练过程中，使用的是知识蒸馏的策略，最终保留了学生模型的参数，在评估时，我们需要针对学生模型的配置文件进行修改: [ser_vi_layoutxlm_xfund_zh.yml](../configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml)，修改内容与训练配置相同，包括**类别数、类别映射文件、数据目录**。
+
+修改完成后，执行下面的命令完成评估过程。
+
+```bash
+# 注意：需要根据你的配置文件地址与保存的模型地址，对评估命令进行修改
+python3 tools/eval.py -c ./fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy
+```
+
+输出结果如下所示。
+
+```
+[2022/08/18 08:49:58] ppocr INFO: metric eval ***************
+[2022/08/18 08:49:58] ppocr INFO: precision:1.0
+[2022/08/18 08:49:58] ppocr INFO: recall:1.0
+[2022/08/18 08:49:58] ppocr INFO: hmean:1.0
+[2022/08/18 08:49:58] ppocr INFO: fps:1.9740402401574881
+```
+
+#### 4.3.4 模型预测
+
+使用下面的命令进行预测。
+
+```bash
+python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/XFUND/zh_val/val.json Global.infer_mode=False
+```
+
+预测结果会保存在配置文件中的`Global.save_res_path`目录中。
+
+部分预测结果如下所示。
+
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185310636-6ce02f7c-790d-479f-b163-ea97a5a04808.jpg" width="800">
+</div>
+
+
+* 注意：在预测时，使用的文本检测与识别结果为标注的结果，直接从json文件里面进行读取。
+
+如果希望使用OCR引擎结果得到的结果进行推理，则可以使用下面的命令进行推理。
+
+
+```bash
+python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True
+```
+
+结果如下所示。
+
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185384321-61153faa-e407-45c4-8e7c-a39540248189.jpg" width="800">
+</div>
+
+它会使用PP-OCRv3的文本检测与识别模型进行获取文本位置与内容信息。
+
+可以看出，由于训练的过程中，没有标注额外的字段为other类别，所以大多数检测出来的字段被预测为question或者answer。
+
+如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型，可以使用下面的方法传入检测与识别的inference 模型路径，即可完成OCR文本检测与识别以及SER的串联过程。
+
+```bash
+python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
+```
+
+### 4.4 关系抽取（Relation Extraction）
+
+使用SER模型，可以获取图像中所有的question与answer的字段，继续这些字段的类别，我们需要进一步获取question与answer之间的连接，因此需要进一步训练关系抽取模型，解决该问题。本文也基于VI-LayoutXLM多模态预训练模型，进行下游RE任务的模型训练。
+
+#### 4.4.1 准备数据
+
+以发票场景为例，相比于SER任务，RE中还需要标记每个文本行的id信息以及链接关系linking，如下所示。
+
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185387870-dc9125a0-9ceb-4036-abf3-184b6e65dc7d.jpg" width="800">
+</div>
+
+
+标注文件的部分内容如下所示。
+
+```py
+b33.jpg [{"transcription": "No", "label": "question", "points": [[2882, 472], [3026, 472], [3026, 588], [2882, 588]], "id": 0, "linking": [[0, 1]]}, {"transcription": "12269563", "label": "answer", "points": [[3066, 448], [3598, 448], [3598, 576], [3066, 576]], "id": 1, "linking": [[0, 1]]}]
+```
+
+相比与SER的标注，多了`id`与`linking`的信息，分别表示唯一标识以及连接关系。
+
+已经处理好的增值税发票数据集从这里下载：[增值税发票数据集下载链接](https://aistudio.baidu.com/aistudio/datasetdetail/165561)。
+
+#### 4.4.2 开始训练
+
+基于VI-LayoutXLM的RE任务配置为[re_vi_layoutxlm_xfund_zh_udml.yml](../configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh_udml.yml)，需要修改**数据路径、类别列表文件**。
+
+```yml
+Train:
+  dataset:
+    name: SimpleDataSet
+    # 定义训练数据目录与标注文件
+    data_dir: train_data/zzsfp/imgs
+    label_file_list:
+      - train_data/zzsfp/train.json
+    transforms:
+      - DecodeImage: # load image
+          img_mode: RGB
+          channel_first: False
+      - VQATokenLabelEncode: # Class handling label
+          contains_re: True
+          algorithm: *algorithm
+          class_path: &class_path train_data/zzsfp/class_list.txt
+  ...
+
+Eval:
+  dataset:
+    # 定义评估数据目录与标注文件
+    name: SimpleDataSet
+    data_dir: train_data/zzsfp/imgs
+    label_file_list:
+      - train_data/zzsfp/val.json
+  ...
+
+```
+
+LayoutXLM与VI-LayoutXLM针对该场景的训练结果如下所示。
+
+| 模型 | 迭代轮数 | Hmean |
+| :---: | :---: | :---: |
+| LayoutXLM | 50 | 98.0% |
+| VI-LayoutXLM | 50 | 99.3% |
+
+可以看出，对于VI-LayoutXLM相比LayoutXLM的Hmean高了1.3%。
+
+如需获取已训练模型，请扫码填写问卷，加入PaddleOCR官方交流群获取全部OCR垂类模型下载链接、《动手学OCR》电子书等全套OCR学习资料🎁
+
+<div align="center">
+<img src="https://ai-studio-static-online.cdn.bcebos.com/dd721099bd50478f9d5fb13d8dd00fad69c22d6848244fd3a1d3980d7fefc63e"  width = "150" height = "150" />
+</div>
+
+
+#### 4.4.3 模型评估
+
+模型训练过程中，使用的是知识蒸馏的策略，最终保留了学生模型的参数，在评估时，我们需要针对学生模型的配置文件进行修改: [re_vi_layoutxlm_xfund_zh.yml](../configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml)，修改内容与训练配置相同，包括**类别映射文件、数据目录**。
+
+修改完成后，执行下面的命令完成评估过程。
+
+```bash
+# 注意：需要根据你的配置文件地址与保存的模型地址，对评估命令进行修改
+python3 tools/eval.py -c ./fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy
+```
+
+输出结果如下所示。
+
+```py
+[2022/08/18 12:17:14] ppocr INFO: metric eval ***************
+[2022/08/18 12:17:14] ppocr INFO: precision:1.0
+[2022/08/18 12:17:14] ppocr INFO: recall:0.9873417721518988
+[2022/08/18 12:17:14] ppocr INFO: hmean:0.9936305732484078
+[2022/08/18 12:17:14] ppocr INFO: fps:2.765963539771157
+```
+
+#### 4.4.4 模型预测
+
+使用下面的命令进行预测。
+
+```bash
+# -c 后面的是RE任务的配置文件
+# -o 后面的字段是RE任务的配置
+# -c_ser 后面的是SER任务的配置文件
+# -c_ser 后面的字段是SER任务的配置
+python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_trained/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=False -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_trained/best_accuracy
+```
+
+预测结果会保存在配置文件中的`Global.save_res_path`目录中。
+
+部分预测结果如下所示。
+
+<div align="center">
+    <img src="https://user-images.githubusercontent.com/14270174/185393805-c67ff571-cf7e-4217-a4b0-8b396c4f22bb.jpg" width="800">
+</div>
+
+
+* 注意：在预测时，使用的文本检测与识别结果为标注的结果，直接从json文件里面进行读取。
+
+如果希望使用OCR引擎结果得到的结果进行推理，则可以使用下面的命令进行推理。
+
+```bash
+python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy
+```
+
+如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型，可以使用下面的方法传入，即可完成SER + RE的串联过程。
+
+```bash
+python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
+```
--- a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml
+++ b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml
@@ -14,6 +14,9 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_en/img_10.jpg
  save_res_path: ./output/det_db/predicts_db.txt
+  use_amp: False
+  amp_level: O2
+  amp_custom_black_list: ['exp']

 Architecture:
  name: DistillationModel
@@ -188,7 +191,6 @@ Eval:
          channel_first: False
      - DetLabelEncode: # Class handling label
      - DetResizeForTest:
-#           image_shape: [736, 1280]
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]

--- a/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml
+++ b/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml
@@ -24,6 +24,7 @@ Architecture:
  model_type: det
  Models:
    Student:
+      pretrained:
      model_type: det
      algorithm: DB
      Transform: null
@@ -40,6 +41,7 @@ Architecture:
        name: DBHead
        k: 50
    Student2:
+      pretrained:
      model_type: det
      algorithm: DB
      Transform: null
@@ -91,14 +93,11 @@ Loss:
      - ["Student", "Student2"]
      maps_name: "thrink_maps"
      weight: 1.0
-      # act: None
      model_name_pairs: ["Student", "Student2"]
      key: maps
  - DistillationDBLoss:
      weight: 1.0
      model_name_list: ["Student", "Student2"]
-      # key: maps
-      # name: DBLoss
      balance_loss: true
      main_loss_type: DiceLoss
      alpha: 5
@@ -197,6 +196,7 @@ Train:
    drop_last: false
    batch_size_per_card: 8
    num_workers: 4
+
 Eval:
  dataset:
    name: SimpleDataSet
@@ -204,31 +204,21 @@ Eval:
    label_file_list:
      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
    transforms:
-    - DecodeImage:
-        img_mode: BGR
-        channel_first: false
-    - DetLabelEncode: null
-    - DetResizeForTest: null
-    - NormalizeImage:
-        scale: 1./255.
-        mean:
-        - 0.485
-        - 0.456
-        - 0.406
-        std:
-        - 0.229
-        - 0.224
-        - 0.225
-        order: hwc
-    - ToCHWImage: null
-    - KeepKeys:
-        keep_keys:
-        - image
-        - shape
-        - polys
-        - ignore_tags
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
  loader:
-    shuffle: false
-    drop_last: false
-    batch_size_per_card: 1
-    num_workers: 2
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/configs/det/det_r18_vd_ct.yml
+++ b/configs/det/det_r18_vd_ct.yml
+Global:
+  use_gpu: true
+  epoch_num: 600
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/det_ct/
+  save_epoch_step: 10
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0,1000]
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet18_vd_pretrained.pdparams
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_en/img623.jpg
+  save_res_path: ./output/det_ct/predicts_ct.txt
+
+Architecture:
+  model_type: det
+  algorithm: CT
+  Transform:
+  Backbone:
+    name: ResNet_vd
+    layers: 18
+  Neck:
+    name: CTFPN
+  Head:
+    name: CT_Head
+    in_channels: 512
+    hidden_dim: 128
+    num_classes: 3
+
+Loss:
+  name: CTLoss
+
+Optimizer:
+  name: Adam
+  lr:  #PolynomialDecay
+    name: Linear 
+    learning_rate: 0.001
+    end_lr: 0.
+    epochs: 600
+    step_each_epoch: 1254
+    power: 0.9
+
+PostProcess:
+  name: CTPostProcess
+  box_type: poly
+
+Metric:
+  name: CTMetric
+  main_indicator: f_score
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/total_text/train
+    label_file_list:
+      - ./train_data/total_text/train/train.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage:
+          img_mode: RGB
+          channel_first: False
+      - CTLabelEncode: # Class handling label
+      - RandomScale:
+      - MakeShrink:
+      - GroupRandomHorizontalFlip:
+      - GroupRandomRotate:
+      - GroupRandomCropPadding:
+      - MakeCentripetalShift:
+      - ColorJitter:
+          brightness: 0.125
+          saturation: 0.5 
+      - ToCHWImage: 
+      - NormalizeImage:
+      - KeepKeys:
+          keep_keys: ['image', 'gt_kernel', 'training_mask', 'gt_instance', 'gt_kernel_instance', 'training_mask_distance', 'gt_distance'] # the order of the dataloader list
+  loader:
+    shuffle: True
+    drop_last: True
+    batch_size_per_card: 4
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/total_text/test
+    label_file_list:
+      - ./train_data/total_text/test/test.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage:
+          img_mode: RGB
+          channel_first: False
+      - CTLabelEncode: # Class handling label
+      - ScaleAlignedShort:
+      - NormalizeImage:
+          order: 'hwc'
+      - ToCHWImage: 
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'texts'] # the order of the dataloader list          
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1
+    num_workers: 2
--- a/configs/e2e/e2e_r50_vd_pg.yml
+++ b/configs/e2e/e2e_r50_vd_pg.yml
@@ -13,6 +13,7 @@ Global:
  save_inference_dir:
  use_visualdl: False
  infer_img:
+  infer_visual_type: EN # two mode: EN is for english datasets, CN is for chinese datasets
  valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words
  save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt
  character_dict_path: ppocr/utils/ic15_dict.txt
@@ -32,6 +33,7 @@ Architecture:
    name: PGFPN
  Head:
    name: PGHead
+    character_dict_path: ppocr/utils/ic15_dict.txt # the same as Global:character_dict_path

 Loss:
  name: PGLoss
@@ -45,16 +47,18 @@ Optimizer:
  beta1: 0.9
  beta2: 0.999
  lr:
+    name: Cosine
    learning_rate: 0.001
+    warmup_epoch: 50
  regularizer:
    name: 'L2'
-    factor: 0
-
+    factor: 0.0001

 PostProcess:
  name: PGPostProcess
  score_thresh: 0.5
  mode: fast   # fast or slow two ways
+  point_gather_mode: align # same as PGProcessTrain: point_gather_mode

 Metric:
  name: E2EMetric
@@ -76,9 +80,12 @@ Train:
      - E2ELabelEncodeTrain:
      - PGProcessTrain:
          batch_size: 14  # same as loader: batch_size_per_card
+          use_resize: True
+          use_random_crop: False
          min_crop_size: 24
          min_text_size: 4
          max_text_size: 512
+          point_gather_mode: align # two mode: align and none, align mode is better than none mode
      - KeepKeys:
          keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
  loader:

--- a/configs/kie/layoutlm_series/re_layoutlmv2_xfund_zh.yml
+++ b/configs/kie/layoutlm_series/re_layoutlmv2_xfund_zh.yml
@@ -11,11 +11,11 @@ Global:
  save_inference_dir:
  use_visualdl: False
  seed: 2022
-  infer_img: ppstructure/docs/vqa/input/zh_val_21.jpg
+  infer_img: ppstructure/docs/kie/input/zh_val_21.jpg
  save_res_path: ./output/re_layoutlmv2_xfund_zh/res/

 Architecture:
-  model_type: vqa
+  model_type: kie
  algorithm: &algorithm "LayoutLMv2"
  Transform:
  Backbone:

--- a/configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml
+++ b/configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml
@@ -11,11 +11,11 @@ Global:
  save_inference_dir:
  use_visualdl: False
  seed: 2022
-  infer_img: ppstructure/docs/vqa/input/zh_val_21.jpg
+  infer_img: ppstructure/docs/kie/input/zh_val_21.jpg
  save_res_path: ./output/re_layoutxlm_xfund_zh/res/

 Architecture:
-  model_type: vqa
+  model_type: kie
  algorithm: &algorithm "LayoutXLM"
  Transform:
  Backbone:

--- a/configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml
+++ b/configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml
@@ -11,11 +11,11 @@ Global:
  save_inference_dir:
  use_visualdl: False
  seed: 2022
-  infer_img: ppstructure/docs/vqa/input/zh_val_42.jpg
+  infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
  save_res_path: ./output/re_layoutlm_xfund_zh/res

 Architecture:
-  model_type: vqa
+  model_type: kie
  algorithm: &algorithm "LayoutLM"
  Transform:
  Backbone:

--- a/configs/kie/layoutlm_series/ser_layoutlmv2_xfund_zh.yml
+++ b/configs/kie/layoutlm_series/ser_layoutlmv2_xfund_zh.yml
@@ -11,11 +11,11 @@ Global:
  save_inference_dir:
  use_visualdl: False
  seed: 2022
-  infer_img: ppstructure/docs/vqa/input/zh_val_42.jpg
+  infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
  save_res_path: ./output/ser_layoutlmv2_xfund_zh/res/

 Architecture:
-  model_type: vqa
+  model_type: kie
  algorithm: &algorithm "LayoutLMv2"
  Transform:
  Backbone:

--- a/configs/kie/layoutlm_series/ser_layoutxlm_xfund_zh.yml
+++ b/configs/kie/layoutlm_series/ser_layoutxlm_xfund_zh.yml
@@ -11,11 +11,11 @@ Global:
  save_inference_dir:
  use_visualdl: False
  seed: 2022
-  infer_img: ppstructure/docs/vqa/input/zh_val_42.jpg
+  infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
  save_res_path: ./output/ser_layoutxlm_xfund_zh/res

 Architecture:
-  model_type: vqa
+  model_type: kie
  algorithm: &algorithm "LayoutXLM"
  Transform:
  Backbone:

--- a/configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml
+++ b/configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml
@@ -11,11 +11,13 @@ Global:
  save_inference_dir:
  use_visualdl: False
  seed: 2022
-  infer_img: ppstructure/docs/vqa/input/zh_val_21.jpg
+  infer_img: ppstructure/docs/kie/input/zh_val_21.jpg
  save_res_path: ./output/re/xfund_zh/with_gt
+  kie_rec_model_dir: 
+  kie_det_model_dir:

 Architecture:
-  model_type: vqa
+  model_type: kie
  algorithm: &algorithm "LayoutXLM"
  Transform:
  Backbone:

--- a/configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh_udml.yml
+++ b/configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh_udml.yml
@@ -11,11 +11,11 @@ Global:
  save_inference_dir:
  use_visualdl: False
  seed: 2022
-  infer_img: ppstructure/docs/vqa/input/zh_val_21.jpg
+  infer_img: ppstructure/docs/kie/input/zh_val_21.jpg
  save_res_path: ./output/re/xfund_zh/with_gt

 Architecture:
-  model_type: &model_type "vqa"
+  model_type: &model_type "kie"
  name: DistillationModel
  algorithm: Distillation
  Models:

--- a/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml
+++ b/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml
@@ -11,16 +11,18 @@ Global:
  save_inference_dir:
  use_visualdl: False
  seed: 2022
-  infer_img: ppstructure/docs/vqa/input/zh_val_42.jpg
+  infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
  # if you want to predict using the groundtruth ocr info,
  # you can use the following config
  # infer_img: train_data/XFUND/zh_val/val.json
  # infer_mode: False

  save_res_path: ./output/ser/xfund_zh/res
+  kie_rec_model_dir: 
+  kie_det_model_dir:

 Architecture:
-  model_type: vqa
+  model_type: kie
  algorithm: &algorithm "LayoutXLM"
  Transform:
  Backbone:

--- a/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml
+++ b/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml
@@ -11,12 +11,12 @@ Global:
  save_inference_dir:
  use_visualdl: False
  seed: 2022
-  infer_img: ppstructure/docs/vqa/input/zh_val_42.jpg
+  infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
  save_res_path: ./output/ser_layoutxlm_xfund_zh/res


 Architecture:
-  model_type: &model_type "vqa"
+  model_type: &model_type "kie"
  name: DistillationModel
  algorithm: Distillation
  Models:

--- a/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml
+++ b/configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml
@@ -12,7 +12,7 @@ Global:
  checkpoints:
  save_inference_dir:
  use_visualdl: false
-  infer_img: doc/imgs_words/ch/word_1.jpg
+  infer_img: ./doc/imgs_words/arabic/ar_2.jpg
  character_dict_path: ppocr/utils/dict/arabic_dict.txt
  max_text_length: &max_text_length 25
  infer_mode: false

--- a/configs/rec/rec_r31_robustscanner.yml
+++ b/configs/rec/rec_r31_robustscanner.yml
+Global:
+  use_gpu: true
+  epoch_num: 5
+  log_smooth_window: 20
+  print_batch_step: 20
+  save_model_dir: ./output/rec/rec_r31_robustscanner/
+  save_epoch_step: 1
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: ./inference/rec_inference
+  # for data or label process
+  character_dict_path: ppocr/utils/dict90.txt
+  max_text_length: &max_text_length 40
+  infer_mode: False
+  use_space_char: False
+  rm_symbol: True
+  save_res_path: ./output/rec/predicts_robustscanner.txt
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Piecewise
+    decay_epochs: [3, 4]
+    values: [0.001, 0.0001, 0.00001] 
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+Architecture:
+  model_type: rec
+  algorithm: RobustScanner
+  Transform:
+  Backbone:
+    name: ResNet31
+    init_type: KaimingNormal
+  Head:
+    name: RobustScannerHead
+    enc_outchannles: 128
+    hybrid_dec_rnn_layers: 2
+    hybrid_dec_dropout: 0
+    position_dec_rnn_layers: 2
+    start_idx: 91
+    mask: True
+    padding_idx: 92
+    encode_value: False
+    max_text_length: *max_text_length
+
+Loss:
+  name: SARLoss
+
+PostProcess:
+  name: SARLabelDecode
+
+Metric:
+  name: RecMetric
+  is_filter: True
+
+
+Train:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/training/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - SARLabelEncode: # Class handling label
+      - RobustScannerRecResizeImg:
+          image_shape: [3, 48, 48, 160] # h:48 w:[48,160]
+          width_downsample_ratio: 0.25
+          max_text_length: *max_text_length
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'valid_ratio', 'word_positons'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 64
+    drop_last: True
+    num_workers: 8
+    use_shared_memory: False
+
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/evaluation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - SARLabelEncode: # Class handling label
+      - RobustScannerRecResizeImg:
+          image_shape: [3, 48, 48, 160]
+          max_text_length: *max_text_length
+          width_downsample_ratio: 0.25
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'valid_ratio', 'word_positons'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 64
+    num_workers: 4
+    use_shared_memory: False
+  
--- a/configs/sr/sr_tsrn_transformer_strock.yml
+++ b/configs/sr/sr_tsrn_transformer_strock.yml
+Global:
+  use_gpu: true
+  epoch_num: 500
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/sr/sr_tsrn_transformer_strock/
+  save_epoch_step: 3
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 1000]
+  cal_metric_during_train: False
+  pretrained_model:
+  checkpoints:
+  save_inference_dir: sr_output
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_52.png
+  # for data or label process
+  character_dict_path: ./train_data/srdata/english_decomposition.txt
+  max_text_length: 100
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/sr/predicts_gestalt.txt
+
+Optimizer:
+  name: Adam
+  beta1: 0.5
+  beta2: 0.999
+  clip_norm: 0.25
+  lr:
+    learning_rate: 0.0001
+
+Architecture:
+  model_type: sr
+  algorithm: Gestalt
+  Transform:
+    name: TSRN
+    STN: True
+    infer_mode: False
+
+Loss:
+  name: StrokeFocusLoss
+  character_dict_path: ./train_data/srdata/english_decomposition.txt
+
+PostProcess:
+  name: None
+
+Metric:
+  name: SRMetric
+  main_indicator: all
+
+Train:
+  dataset:
+    name: LMDBDataSetSR
+    data_dir: ./train_data/srdata/train
+    transforms:
+      - SRResize:
+          imgH: 32
+          imgW: 128
+          down_sample_scale: 2
+      - SRLabelEncode: # Class handling label
+      - KeepKeys:
+          keep_keys: ['img_lr', 'img_hr', 'length', 'input_tensor', 'label'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    batch_size_per_card: 16
+    drop_last: True
+    num_workers: 4
+
+Eval:
+  dataset:
+    name: LMDBDataSetSR
+    data_dir: ./train_data/srdata/test
+    transforms:
+      - SRResize:
+          imgH: 32
+          imgW: 128
+          down_sample_scale: 2
+      - SRLabelEncode: # Class handling label
+      - KeepKeys:
+          keep_keys: ['img_lr', 'img_hr','length', 'input_tensor', 'label'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 16
+    num_workers: 4
+
--- a/configs/table/SLANet.yml
+++ b/configs/table/SLANet.yml
+Global:
+  use_gpu: true
+  epoch_num: 100
+  log_smooth_window: 20
+  print_batch_step: 20
+  save_model_dir: ./output/SLANet
+  save_epoch_step: 400
+  # evaluation is run every 1000 iterations after the 0th iteration
+  eval_batch_step: [0, 1000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir: ./output/SLANet/infer
+  use_visualdl: False
+  infer_img: ppstructure/docs/table/table.jpg
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/table_structure_dict.txt
+  character_type: en
+  max_text_length: &max_text_length 500
+  box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
+  infer_mode: False
+  use_sync_bn: True
+  save_res_path: 'output/infer'
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  clip_norm: 5.0
+  lr:
+    name: Piecewise
+    learning_rate: 0.001
+    decay_epochs : [40, 50]
+    values : [0.001, 0.0001, 0.00005]
+  regularizer:
+    name: 'L2'
+    factor: 0.00000
+
+Architecture:
+  model_type: table
+  algorithm: SLANet
+  Backbone:
+    name: PPLCNet
+    scale: 1.0
+    pretrained: true
+    use_ssld: true
+  Neck:
+    name: CSPPAN
+    out_channels: 96
+  Head:
+    name: SLAHead
+    hidden_size: 256
+    max_text_length: *max_text_length
+    loc_reg_num: &loc_reg_num 4
+
+Loss:
+  name: SLALoss
+  structure_weight: 1.0
+  loc_weight: 2.0
+  loc_loss: smooth_l1
+
+PostProcess:
+  name: TableLabelDecode
+  merge_no_span_structure: &merge_no_span_structure True
+
+Metric:
+  name: TableMetric
+  main_indicator: acc
+  compute_bbox_metric: False
+  loc_reg_num: *loc_reg_num
+  box_format: *box_format
+
+Train:
+  dataset:
+    name: PubTabDataSet
+    data_dir: train_data/table/pubtabnet/train/
+    label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - TableLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: *merge_no_span_structure
+          replace_empty_cell_token: False
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
+      - TableBoxEncode:
+          in_box_format: *box_format
+          out_box_format: *box_format
+      - ResizeTableImage:
+          max_len: 488
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - PaddingTableImage:
+          size: [488, 488]
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
+  loader:
+    shuffle: True
+    batch_size_per_card: 48
+    drop_last: True
+    num_workers: 1
+
+Eval:
+  dataset:
+    name: PubTabDataSet
+    data_dir: train_data/table/pubtabnet/val/
+    label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - TableLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: *merge_no_span_structure
+          replace_empty_cell_token: False
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
+      - TableBoxEncode:
+          in_box_format: *box_format
+          out_box_format: *box_format
+      - ResizeTableImage:
+          max_len: 488
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - PaddingTableImage:
+          size: [488, 488]
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 48
+    num_workers: 1
--- a/configs/table/SLANet_ch.yml
+++ b/configs/table/SLANet_ch.yml
+Global:
+  use_gpu: True
+  epoch_num: 400
+  log_smooth_window: 20
+  print_batch_step: 20
+  save_model_dir: ./output/SLANet_ch
+  save_epoch_step: 400
+  # evaluation is run every 331 iterations after the 0th iteration
+  eval_batch_step: [0, 331]
+  cal_metric_during_train: True
+  pretrained_model: 
+  checkpoints: 
+  save_inference_dir: ./output/SLANet_ch/infer
+  use_visualdl: False
+  infer_img: ppstructure/docs/table/table.jpg
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
+  character_type: en
+  max_text_length: &max_text_length 500
+  box_format: &box_format xyxyxyxy # 'xywh', 'xyxy', 'xyxyxyxy'
+  infer_mode: False
+  use_sync_bn: True
+  save_res_path: output/infer
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  clip_norm: 5.0
+  lr:
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0.00000
+
+Architecture:
+  model_type: table
+  algorithm: SLANet
+  Backbone:
+    name: PPLCNet
+    scale: 1.0
+    pretrained: True
+    use_ssld: True
+  Neck:
+    name: CSPPAN
+    out_channels: 96
+  Head:
+    name: SLAHead
+    hidden_size: 256
+    max_text_length: *max_text_length
+    loc_reg_num: &loc_reg_num 8
+
+Loss:
+  name: SLALoss
+  structure_weight: 1.0
+  loc_weight: 2.0
+  loc_loss: smooth_l1
+
+PostProcess:
+  name: TableLabelDecode
+  merge_no_span_structure: &merge_no_span_structure True
+
+Metric:
+  name: TableMetric
+  main_indicator: acc
+  compute_bbox_metric: False
+  loc_reg_num: *loc_reg_num
+  box_format: *box_format
+  del_thead_tbody: True
+
+Train:
+  dataset:
+    name: PubTabDataSet
+    data_dir: train_data/table/train/
+    label_file_list: [train_data/table/train.txt]
+    transforms:
+      - DecodeImage:
+          img_mode: BGR
+          channel_first: False
+      - TableLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: *merge_no_span_structure
+          replace_empty_cell_token: False
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
+      - TableBoxEncode:
+          in_box_format: *box_format
+          out_box_format: *box_format
+      - ResizeTableImage:
+          max_len: 488
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - PaddingTableImage:
+          size: [488, 488]
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
+  loader:
+    shuffle: True
+    batch_size_per_card: 48
+    drop_last: True
+    num_workers: 1
+
+Eval:
+  dataset:
+    name: PubTabDataSet
+    data_dir: train_data/table/val/
+    label_file_list: [train_data/table/val.txt]
+    transforms:
+      - DecodeImage:
+          img_mode: BGR
+          channel_first: False
+      - TableLabelEncode:
+          learn_empty_box: False
+          merge_no_span_structure: *merge_no_span_structure
+          replace_empty_cell_token: False
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
+      - TableBoxEncode:
+          in_box_format: *box_format
+          out_box_format: *box_format
+      - ResizeTableImage:
+          max_len: 488
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - PaddingTableImage:
+          size: [488, 488]
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 48
+    num_workers: 1
--- a/configs/table/table_master.yml
+++ b/configs/table/table_master.yml
@@ -8,16 +8,15 @@ Global:
  eval_batch_step: [0,  6259]
  cal_metric_during_train: true
  pretrained_model: null
-  checkpoints: 
+  checkpoints:
  save_inference_dir: output/table_master/infer
  use_visualdl: false
  infer_img: ppstructure/docs/table/table.jpg
  save_res_path: ./output/table_master
  character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt
  infer_mode: false
-  max_text_length: 500
-  process_total_num: 0
-  process_cut_num: 0
+  max_text_length: &max_text_length 500
+  box_format: &box_format 'xywh' # 'xywh', 'xyxy', 'xyxyxyxy'


 Optimizer:
@@ -52,7 +51,8 @@ Architecture:
    headers: 8
    dropout: 0
    d_ff: 2024
-    max_text_length: 500
+    max_text_length: *max_text_length
+    loc_reg_num: &loc_reg_num 4

 Loss:
  name: TableMasterLoss
@@ -61,11 +61,13 @@ Loss:
 PostProcess:
  name: TableMasterLabelDecode
  box_shape: pad
+  merge_no_span_structure: &merge_no_span_structure True

 Metric:
  name: TableMetric
  main_indicator: acc
  compute_bbox_metric: False
+  box_format: *box_format

 Train:
  dataset:
@@ -78,15 +80,18 @@ Train:
          channel_first: False
      - TableMasterLabelEncode:
          learn_empty_box: False
-          merge_no_span_structure: True
+          merge_no_span_structure: *merge_no_span_structure
          replace_empty_cell_token: True
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
      - ResizeTableImage:
          max_len: 480
          resize_bboxes: True
      - PaddingTableImage:
          size: [480, 480]
      - TableBoxEncode:
-          use_xywh: True
+          in_box_format: *box_format
+          out_box_format: *box_format
      - NormalizeImage:
          scale: 1./255.
          mean: [0.5, 0.5, 0.5]
@@ -112,15 +117,18 @@ Eval:
          channel_first: False
      - TableMasterLabelEncode:
          learn_empty_box: False
-          merge_no_span_structure: True
+          merge_no_span_structure: *merge_no_span_structure
          replace_empty_cell_token: True
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
      - ResizeTableImage:
          max_len: 480
          resize_bboxes: True
      - PaddingTableImage:
          size: [480, 480]
      - TableBoxEncode:
-          use_xywh: True
+          in_box_format: *box_format
+          out_box_format: *box_format
      - NormalizeImage:
          scale: 1./255.
          mean: [0.5, 0.5, 0.5]

--- a/configs/table/table_mv3.yml
+++ b/configs/table/table_mv3.yml
@@ -17,10 +17,9 @@ Global:
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  character_type: en
-  max_text_length: 800
+  max_text_length: &max_text_length 500
+  box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
  infer_mode: False
-  process_total_num: 0
-  process_cut_num: 0

 Optimizer:
  name: Adam
@@ -39,12 +38,14 @@ Architecture:
  Backbone:
    name: MobileNetV3
    scale: 1.0
-    model_name: large
+    model_name: small
+    disable_se: true
  Head:
    name: TableAttentionHead
    hidden_size: 256
    loc_type: 2
-    max_text_length: 800
+    max_text_length: *max_text_length
+    loc_reg_num: &loc_reg_num 4

 Loss:
  name: TableAttentionLoss
@@ -72,6 +73,8 @@ Train:
          learn_empty_box: False
          merge_no_span_structure: False
          replace_empty_cell_token: False
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
      - TableBoxEncode:
      - ResizeTableImage:
          max_len: 488
@@ -87,15 +90,15 @@ Train:
          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  loader:
    shuffle: True
-    batch_size_per_card: 32
+    batch_size_per_card: 48
    drop_last: True
    num_workers: 1

 Eval:
  dataset:
    name: PubTabDataSet
-    data_dir: /home/zhoujun20/table/PubTabNe/pubtabnet/val/
-    label_file_list: [/home/zhoujun20/table/PubTabNe/pubtabnet/val_500.jsonl]
+    data_dir: train_data/table/pubtabnet/val/
+    label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
@@ -104,6 +107,8 @@ Eval:
          learn_empty_box: False
          merge_no_span_structure: False
          replace_empty_cell_token: False
+          loc_reg_num: *loc_reg_num
+          max_text_length: *max_text_length
      - TableBoxEncode:
      - ResizeTableImage:
          max_len: 488
@@ -120,5 +125,5 @@ Eval:
  loader:
    shuffle: False
    drop_last: False
-    batch_size_per_card: 16
+    batch_size_per_card: 48
    num_workers: 1
--- a/deploy/android_demo/app/src/main/cpp/native.cpp
+++ b/deploy/android_demo/app/src/main/cpp/native.cpp
@@ -47,7 +47,7 @@ str_to_cpu_mode(const std::string &cpu_mode) {
  std::string upper_key;
  std::transform(cpu_mode.cbegin(), cpu_mode.cend(), upper_key.begin(),
                 ::toupper);
-  auto index = cpu_mode_map.find(upper_key);
+  auto index = cpu_mode_map.find(upper_key.c_str());
  if (index == cpu_mode_map.end()) {
    LOGE("cpu_mode not found %s", upper_key.c_str());
    return paddle::lite_api::LITE_POWER_HIGH;
@@ -116,4 +116,4 @@ Java_com_baidu_paddle_lite_demo_ocr_OCRPredictorNative_release(
  ppredictor::OCR_PPredictor *ppredictor =
      (ppredictor::OCR_PPredictor *)java_pointer;
  delete ppredictor;
-}
\ No newline at end of file
+}
--- a/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/OCRPredictorNative.java
+++ b/deploy/android_demo/app/src/main/java/com/baidu/paddle/lite/demo/ocr/OCRPredictorNative.java
@@ -54,7 +54,7 @@ public class OCRPredictorNative {
    }

    public void destory() {
-        if (nativePointer > 0) {
+        if (nativePointer != 0) {
            release(nativePointer);
            nativePointer = 0;
        }

--- a/deploy/cpp_infer/docs/windows_vs2019_build.md
+++ b/deploy/cpp_infer/docs/windows_vs2019_build.md
@@ -109,8 +109,10 @@ CUDA_LIB、CUDNN_LIB、TENSORRT_DIR、WITH_GPU、WITH_TENSORRT

 运行之前，将下面文件拷贝到`build/Release/`文件夹下
 1. `paddle_inference/paddle/lib/paddle_inference.dll`
-2. `opencv/build/x64/vc15/bin/opencv_world455.dll`
-3. 如果使用openblas版本的预测库还需要拷贝 `paddle_inference/third_party/install/openblas/lib/openblas.dll`
+2. `paddle_inference/third_party/install/onnxruntime/lib/onnxruntime.dll`
+3. `paddle_inference/third_party/install/paddle2onnx/lib/paddle2onnx.dll`
+4. `opencv/build/x64/vc15/bin/opencv_world455.dll`
+5. 如果使用openblas版本的预测库还需要拷贝 `paddle_inference/third_party/install/openblas/lib/openblas.dll`

 ### Step4: 预测


--- a/deploy/cpp_infer/include/args.h
+++ b/deploy/cpp_infer/include/args.h
@@ -30,7 +30,8 @@ DECLARE_string(image_dir);
 DECLARE_string(type);
 // detection related
 DECLARE_string(det_model_dir);
-DECLARE_int32(max_side_len);
+DECLARE_string(limit_type);
+DECLARE_int32(limit_side_len);
 DECLARE_double(det_db_thresh);
 DECLARE_double(det_db_box_thresh);
 DECLARE_double(det_db_unclip_ratio);
@@ -48,7 +49,14 @@ DECLARE_int32(rec_batch_num);
 DECLARE_string(rec_char_dict_path);
 DECLARE_int32(rec_img_h);
 DECLARE_int32(rec_img_w);
+// structure model related
+DECLARE_string(table_model_dir);
+DECLARE_int32(table_max_len);
+DECLARE_int32(table_batch_num);
+DECLARE_string(table_char_dict_path);
+DECLARE_bool(merge_no_span_structure);
 // forward related
 DECLARE_bool(det);
 DECLARE_bool(rec);
 DECLARE_bool(cls);
+DECLARE_bool(table);
\ No newline at end of file
--- a/deploy/cpp_infer/include/ocr_det.h
+++ b/deploy/cpp_infer/include/ocr_det.h
@@ -41,8 +41,8 @@ public:
  explicit DBDetector(const std::string &model_dir, const bool &use_gpu,
                      const int &gpu_id, const int &gpu_mem,
                      const int &cpu_math_library_num_threads,
-                      const bool &use_mkldnn, const int &max_side_len,
-                      const double &det_db_thresh,
+                      const bool &use_mkldnn, const string &limit_type,
+                      const int &limit_side_len, const double &det_db_thresh,
                      const double &det_db_box_thresh,
                      const double &det_db_unclip_ratio,
                      const std::string &det_db_score_mode,
@@ -54,7 +54,8 @@ public:
    this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
    this->use_mkldnn_ = use_mkldnn;

-    this->max_side_len_ = max_side_len;
+    this->limit_type_ = limit_type;
+    this->limit_side_len_ = limit_side_len;

    this->det_db_thresh_ = det_db_thresh;
    this->det_db_box_thresh_ = det_db_box_thresh;
@@ -84,7 +85,8 @@ private:
  int cpu_math_library_num_threads_ = 4;
  bool use_mkldnn_ = false;

-  int max_side_len_ = 960;
+  string limit_type_ = "max";
+  int limit_side_len_ = 960;

  double det_db_thresh_ = 0.3;
  double det_db_box_thresh_ = 0.5;
@@ -106,7 +108,7 @@ private:
  Permute permute_op_;

  // post-process
-  PostProcessor post_processor_;
+  DBPostProcessor post_processor_;
 };

 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/include/paddleocr.h
+++ b/deploy/cpp_infer/include/paddleocr.h
@@ -47,11 +47,7 @@ public:
  ocr(std::vector<cv::String> cv_all_img_names, bool det = true,
      bool rec = true, bool cls = true);

-private:
-  DBDetector *detector_ = nullptr;
-  Classifier *classifier_ = nullptr;
-  CRNNRecognizer *recognizer_ = nullptr;
-
+protected:
  void det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
           std::vector<double> &times);
  void rec(std::vector<cv::Mat> img_list,
@@ -62,6 +58,11 @@ private:
           std::vector<double> &times);
  void log(std::vector<double> &det_times, std::vector<double> &rec_times,
           std::vector<double> &cls_times, int img_num);
+
+private:
+  DBDetector *detector_ = nullptr;
+  Classifier *classifier_ = nullptr;
+  CRNNRecognizer *recognizer_ = nullptr;
 };

 } // namespace PaddleOCR
--- a/deploy/cpp_infer/include/paddlestructure.h
+++ b/deploy/cpp_infer/include/paddlestructure.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+#include "paddle_api.h"
+#include "paddle_inference_api.h"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+
+#include <cstring>
+#include <fstream>
+#include <numeric>
+
+#include <include/paddleocr.h>
+#include <include/preprocess_op.h>
+#include <include/structure_table.h>
+#include <include/utility.h>
+
+using namespace paddle_infer;
+
+namespace PaddleOCR {
+
+class PaddleStructure : public PPOCR {
+public:
+  explicit PaddleStructure();
+  ~PaddleStructure();
+  std::vector<std::vector<StructurePredictResult>>
+  structure(std::vector<cv::String> cv_all_img_names, bool layout = false,
+            bool table = true);
+
+private:
+  StructureTableRecognizer *recognizer_ = nullptr;
+
+  void table(cv::Mat img, StructurePredictResult &structure_result,
+             std::vector<double> &time_info_table,
+             std::vector<double> &time_info_det,
+             std::vector<double> &time_info_rec,
+             std::vector<double> &time_info_cls);
+  std::string rebuild_table(std::vector<std::string> rec_html_tags,
+                            std::vector<std::vector<int>> rec_boxes,
+                            std::vector<OCRPredictResult> &ocr_result);
+
+  float iou(std::vector<int> &box1, std::vector<int> &box2);
+  float dis(std::vector<int> &box1, std::vector<int> &box2);
+
+  static bool comparison_dis(const std::vector<float> &dis1,
+                             const std::vector<float> &dis2) {
+    if (dis1[1] < dis2[1]) {
+      return true;
+    } else if (dis1[1] == dis2[1]) {
+      return dis1[0] < dis2[0];
+    } else {
+      return false;
+    }
+  }
+};
+
+} // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/include/postprocess_op.h
+++ b/deploy/cpp_infer/include/postprocess_op.h
@@ -34,7 +34,7 @@ using namespace std;

 namespace PaddleOCR {

-class PostProcessor {
+class DBPostProcessor {
 public:
  void GetContourArea(const std::vector<std::vector<float>> &box,
                      float unclip_ratio, float &distance);
@@ -90,4 +90,20 @@ private:
  }
 };

+class TablePostProcessor {
+public:
+  void init(std::string label_path, bool merge_no_span_structure = true);
+  void Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs,
+           std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
+           std::vector<int> &structure_probs_shape,
+           std::vector<std::vector<std::string>> &rec_html_tag_batch,
+           std::vector<std::vector<std::vector<int>>> &rec_boxes_batch,
+           std::vector<int> &width_list, std::vector<int> &height_list);
+
+private:
+  std::vector<std::string> label_list_;
+  std::string end = "eos";
+  std::string beg = "sos";
+};
+
 } // namespace PaddleOCR
--- a/deploy/cpp_infer/include/preprocess_op.h
+++ b/deploy/cpp_infer/include/preprocess_op.h
@@ -48,11 +48,12 @@ class PermuteBatch {
 public:
  virtual void Run(const std::vector<cv::Mat> imgs, float *data);
 };
-    
+
 class ResizeImgType0 {
 public:
-  virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len,
-                   float &ratio_h, float &ratio_w, bool use_tensorrt);
+  virtual void Run(const cv::Mat &img, cv::Mat &resize_img, string limit_type,
+                   int limit_side_len, float &ratio_h, float &ratio_w,
+                   bool use_tensorrt);
 };

 class CrnnResizeImg {
@@ -69,4 +70,16 @@ public:
                   const std::vector<int> &rec_image_shape = {3, 48, 192});
 };

+class TableResizeImg {
+public:
+  virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
+                   const int max_len = 488);
+};
+
+class TablePadImg {
+public:
+  virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
+                   const int max_len = 488);
+};
+
 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/include/structure_table.h
+++ b/deploy/cpp_infer/include/structure_table.h
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+#include "paddle_api.h"
+#include "paddle_inference_api.h"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+
+#include <cstring>
+#include <fstream>
+#include <numeric>
+
+#include <include/postprocess_op.h>
+#include <include/preprocess_op.h>
+#include <include/utility.h>
+
+using namespace paddle_infer;
+
+namespace PaddleOCR {
+
+class StructureTableRecognizer {
+public:
+  explicit StructureTableRecognizer(
+      const std::string &model_dir, const bool &use_gpu, const int &gpu_id,
+      const int &gpu_mem, const int &cpu_math_library_num_threads,
+      const bool &use_mkldnn, const string &label_path,
+      const bool &use_tensorrt, const std::string &precision,
+      const int &table_batch_num, const int &table_max_len,
+      const bool &merge_no_span_structure) {
+    this->use_gpu_ = use_gpu;
+    this->gpu_id_ = gpu_id;
+    this->gpu_mem_ = gpu_mem;
+    this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
+    this->use_mkldnn_ = use_mkldnn;
+    this->use_tensorrt_ = use_tensorrt;
+    this->precision_ = precision;
+    this->table_batch_num_ = table_batch_num;
+    this->table_max_len_ = table_max_len;
+
+    this->post_processor_.init(label_path, merge_no_span_structure);
+    LoadModel(model_dir);
+  }
+
+  // Load Paddle inference model
+  void LoadModel(const std::string &model_dir);
+
+  void Run(std::vector<cv::Mat> img_list,
+           std::vector<std::vector<std::string>> &rec_html_tags,
+           std::vector<float> &rec_scores,
+           std::vector<std::vector<std::vector<int>>> &rec_boxes,
+           std::vector<double> &times);
+
+private:
+  std::shared_ptr<Predictor> predictor_;
+
+  bool use_gpu_ = false;
+  int gpu_id_ = 0;
+  int gpu_mem_ = 4000;
+  int cpu_math_library_num_threads_ = 4;
+  bool use_mkldnn_ = false;
+  int table_max_len_ = 488;
+
+  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
+  std::vector<float> scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
+  bool is_scale_ = true;
+
+  bool use_tensorrt_ = false;
+  std::string precision_ = "fp32";
+  int table_batch_num_ = 1;
+
+  // pre-process
+  TableResizeImg resize_op_;
+  Normalize normalize_op_;
+  PermuteBatch permute_op_;
+  TablePadImg pad_op_;
+
+  // post-process
+  TablePostProcessor post_processor_;
+
+}; // class StructureTableRecognizer
+
+} // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/include/utility.h
+++ b/deploy/cpp_infer/include/utility.h
@@ -40,6 +40,15 @@ struct OCRPredictResult {
  int cls_label = -1;
 };

+struct StructurePredictResult {
+  std::vector<int> box;
+  std::vector<std::vector<int>> cell_box;
+  std::string type;
+  std::vector<OCRPredictResult> text_res;
+  std::string html;
+  float html_score = -1;
+};
+
 class Utility {
 public:
  static std::vector<std::string> ReadDict(const std::string &path);
@@ -48,6 +57,10 @@ public:
                              const std::vector<OCRPredictResult> &ocr_result,
                              const std::string &save_path);

+  static void VisualizeBboxes(const cv::Mat &srcimg,
+                              const StructurePredictResult &structure_result,
+                              const std::string &save_path);
+
  template <class ForwardIterator>
  inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
    return std::distance(first, std::max_element(first, last));
@@ -68,6 +81,25 @@ public:
  static void CreateDir(const std::string &path);

  static void print_result(const std::vector<OCRPredictResult> &ocr_result);
+
+  static cv::Mat crop_image(cv::Mat &img, std::vector<int> &area);
+
+  static void sorted_boxes(std::vector<OCRPredictResult> &ocr_result);
+
+  static std::vector<int> xyxyxyxy2xyxy(std::vector<std::vector<int>> &box);
+  static std::vector<int> xyxyxyxy2xyxy(std::vector<int> &box);
+
+private:
+  static bool comparison_box(const OCRPredictResult &result1,
+                             const OCRPredictResult &result2) {
+    if (result1.box[0][1] < result2.box[0][1]) {
+      return true;
+    } else if (result1.box[0][1] == result2.box[0][1]) {
+      return result1.box[0][0] < result2.box[0][0];
+    } else {
+      return false;
+    }
+  }
 };

 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
@@ -171,6 +171,9 @@ inference/
 |-- cls
 |   |--inference.pdiparams
 |   |--inference.pdmodel
+|-- table
+|   |--inference.pdiparams
+|   |--inference.pdmodel
 ```


@@ -275,6 +278,17 @@ Specifically,
    --cls=true \
 ```

+
+##### 7. table
+```shell
+./build/ppocr --det_model_dir=inference/det_db \
+    --rec_model_dir=inference/rec_rcnn \
+    --table_model_dir=inference/table \
+    --image_dir=../../ppstructure/docs/table/table.jpg \
+    --type=structure \
+    --table=true
+```
+
 More parameters are as follows,

 - Common parameters
@@ -293,9 +307,9 @@ More parameters are as follows,

 |parameter|data type|default|meaning|
 | :---: | :---: | :---: | :---: |
-|det|bool|true|前向是否执行文字检测|
-|rec|bool|true|前向是否执行文字识别|
-|cls|bool|false|前向是否执行文字方向分类|
+|det|bool|true|Whether to perform text detection in the forward direction|
+|rec|bool|true|Whether to perform text recognition in the forward direction|
+|cls|bool|false|Whether to perform text direction classification in the forward direction|


 - Detection related parameters
@@ -329,6 +343,16 @@ More parameters are as follows,
 |rec_img_h|int|48|image height of recognition|
 |rec_img_w|int|320|image width of recognition|

+- Table recognition related parameters
+
+|parameter|data type|default|meaning|
+| :---: | :---: | :---: | :---: |
+|table_model_dir|string|-|Address of table recognition inference model|
+|table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict.txt|dictionary file|
+|table_max_len|int|488|The size of the long side of the input image of the table recognition model, the final input image size of the network is（table_max_len，table_max_len）|
+|merge_no_span_structure|bool|true|Whether to merge <td> and </td> to <td></td|
+
+
 * Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `rec_char_dict_path` and `rec_model_dir`.


@@ -344,6 +368,12 @@ predict img: ../../doc/imgs/12.jpg
 The detection visualized image saved in ./output//12.jpg
 ```

+- table
+
+```bash
+predict img: ../../ppstructure/docs/table/table.jpg
+0       type: table, region: [0,0,371,293], res: <html><body><table><thead><tr><td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN [3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
+```

 <a name="3"></a>
 ## 3. FAQ

--- a/deploy/cpp_infer/readme_ch.md
+++ b/deploy/cpp_infer/readme_ch.md
@@ -181,6 +181,9 @@ inference/
 |-- cls
 |   |--inference.pdiparams
 |   |--inference.pdmodel
+|-- table
+|   |--inference.pdiparams
+|   |--inference.pdmodel
 ```

 <a name="22"></a>
@@ -285,6 +288,16 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
    --cls=true \
 ```

+##### 7. 表格识别
+```shell
+./build/ppocr --det_model_dir=inference/det_db \
+    --rec_model_dir=inference/rec_rcnn \
+    --table_model_dir=inference/table \
+    --image_dir=../../ppstructure/docs/table/table.jpg \
+    --type=structure \
+    --table=true
+```
+
 更多支持的可调节参数解释如下：

 - 通用参数
@@ -328,21 +341,33 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 |cls_thresh|float|0.9|方向分类器的得分阈值|
 |cls_batch_num|int|1|方向分类器batchsize|

- 识别模型相关
+- 文字识别模型相关

 |参数名称|类型|默认参数|意义|
 | :---: | :---: | :---: | :---: |
-|rec_model_dir|string|-|识别模型inference model地址|
+|rec_model_dir|string|-|文字识别模型inference model地址|
 |rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件|
-|rec_batch_num|int|6|识别模型batchsize|
-|rec_img_h|int|48|识别模型输入图像高度|
-|rec_img_w|int|320|识别模型输入图像宽度|
+|rec_batch_num|int|6|文字识别模型batchsize|
+|rec_img_h|int|48|文字识别模型输入图像高度|
+|rec_img_w|int|320|文字识别模型输入图像宽度|
+
+
+- 表格识别模型相关
+
+|参数名称|类型|默认参数|意义|
+| :---: | :---: | :---: | :---: |
+|table_model_dir|string|-|表格识别模型inference model地址|
+|table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict.txt|字典文件|
+|table_max_len|int|488|表格识别模型输入图像长边大小，最终网络输入图像大小为（table_max_len，table_max_len）|
+|merge_no_span_structure|bool|true|是否合并<td> 和 </td> 为<td></td>|


 * PaddleOCR也支持多语言的预测，更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分，如果希望进行多语言预测，只需将修改`rec_char_dict_path`（字典文件路径）以及`rec_model_dir`（inference模型路径）字段即可。

 最终屏幕上会输出检测结果如下。

+- ocr
+
 ```bash
 predict img: ../../doc/imgs/12.jpg
 ../../doc/imgs/12.jpg
@@ -353,6 +378,13 @@ predict img: ../../doc/imgs/12.jpg
 The detection visualized image saved in ./output//12.jpg
 ```

+- table
+
+```bash
+predict img: ../../ppstructure/docs/table/table.jpg
+0       type: table, region: [0,0,371,293], res: <html><body><table><thead><tr><td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN [3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
+```
+
 <a name="3"></a>
 ## 3. FAQ


--- a/deploy/cpp_infer/src/args.cpp
+++ b/deploy/cpp_infer/src/args.cpp
@@ -30,7 +30,8 @@ DEFINE_string(
    "Perform ocr or structure, the value is selected in ['ocr','structure'].");
 // detection related
 DEFINE_string(det_model_dir, "", "Path of det inference model.");
-DEFINE_int32(max_side_len, 960, "max_side_len of input image.");
+DEFINE_string(limit_type, "max", "limit_type of input image.");
+DEFINE_int32(limit_side_len, 960, "limit_side_len of input image.");
 DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh.");
 DEFINE_double(det_db_box_thresh, 0.6, "Threshold of det_db_box_thresh.");
 DEFINE_double(det_db_unclip_ratio, 1.5, "Threshold of det_db_unclip_ratio.");
@@ -50,7 +51,18 @@ DEFINE_string(rec_char_dict_path, "../../ppocr/utils/ppocr_keys_v1.txt",
 DEFINE_int32(rec_img_h, 48, "rec image height");
 DEFINE_int32(rec_img_w, 320, "rec image width");

+// structure model related
+DEFINE_string(table_model_dir, "", "Path of table struture inference model.");
+DEFINE_int32(table_max_len, 488, "max len size of input image.");
+DEFINE_int32(table_batch_num, 1, "table_batch_num.");
+DEFINE_bool(merge_no_span_structure, true,
+            "Whether merge <td> and </td> to <td></td>");
+DEFINE_string(table_char_dict_path,
+              "../../ppocr/utils/dict/table_structure_dict_ch.txt",
+              "Path of dictionary.");
+
 // ocr forward related
 DEFINE_bool(det, true, "Whether use det in forward.");
 DEFINE_bool(rec, true, "Whether use rec in forward.");
-DEFINE_bool(cls, false, "Whether use cls in forward.");
\ No newline at end of file
+DEFINE_bool(cls, false, "Whether use cls in forward.");
+DEFINE_bool(table, false, "Whether use table structure in forward.");
\ No newline at end of file
--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -19,6 +19,7 @@

 #include <include/args.h>
 #include <include/paddleocr.h>
+#include <include/paddlestructure.h>

 using namespace PaddleOCR;

@@ -32,6 +33,12 @@ void check_params() {
    }
  }
  if (FLAGS_rec) {
+    std::cout
+        << "In PP-OCRv3, rec_image_shape parameter defaults to '3, 48, 320',"
+           "if you are using recognition model with PP-OCRv2 or an older "
+           "version, "
+           "please set --rec_image_shape='3,32,320"
+        << std::endl;
    if (FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) {
      std::cout << "Usage[rec]: ./ppocr "
                   "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
@@ -47,6 +54,17 @@ void check_params() {
      exit(1);
    }
  }
+  if (FLAGS_table) {
+    if (FLAGS_table_model_dir.empty() || FLAGS_det_model_dir.empty() ||
+        FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) {
+      std::cout << "Usage[table]: ./ppocr "
+                << "--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
+                << "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
+                << "--table_model_dir=/PATH/TO/TABLE_INFERENCE_MODEL/ "
+                << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
+      exit(1);
+    }
+  }
  if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" &&
      FLAGS_precision != "int8") {
    cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl;
@@ -54,21 +72,7 @@ void check_params() {
  }
 }

-int main(int argc, char **argv) {
-  // Parsing command-line
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  check_params();
-
-  if (!Utility::PathExists(FLAGS_image_dir)) {
-    std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir
-              << endl;
-    exit(1);
-  }
-
-  std::vector<cv::String> cv_all_img_names;
-  cv::glob(FLAGS_image_dir, cv_all_img_names);
-  std::cout << "total images num: " << cv_all_img_names.size() << endl;
-
+void ocr(std::vector<cv::String> &cv_all_img_names) {
  PPOCR ocr = PPOCR();

  std::vector<std::vector<OCRPredictResult>> ocr_results =
@@ -109,3 +113,55 @@ int main(int argc, char **argv) {
    }
  }
 }
+
+void structure(std::vector<cv::String> &cv_all_img_names) {
+  PaddleOCR::PaddleStructure engine = PaddleOCR::PaddleStructure();
+  std::vector<std::vector<StructurePredictResult>> structure_results =
+      engine.structure(cv_all_img_names, false, FLAGS_table);
+  for (int i = 0; i < cv_all_img_names.size(); i++) {
+    cout << "predict img: " << cv_all_img_names[i] << endl;
+    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
+    for (int j = 0; j < structure_results[i].size(); j++) {
+      std::cout << j << "\ttype: " << structure_results[i][j].type
+                << ", region: [";
+      std::cout << structure_results[i][j].box[0] << ","
+                << structure_results[i][j].box[1] << ","
+                << structure_results[i][j].box[2] << ","
+                << structure_results[i][j].box[3] << "], res: ";
+      if (structure_results[i][j].type == "table") {
+        std::cout << structure_results[i][j].html << std::endl;
+        std::string file_name = Utility::basename(cv_all_img_names[i]);
+
+        Utility::VisualizeBboxes(srcimg, structure_results[i][j],
+                                 FLAGS_output + "/" + std::to_string(j) + "_" +
+                                     file_name);
+      } else {
+        Utility::print_result(structure_results[i][j].text_res);
+      }
+    }
+  }
+}
+
+int main(int argc, char **argv) {
+  // Parsing command-line
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  check_params();
+
+  if (!Utility::PathExists(FLAGS_image_dir)) {
+    std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir
+              << endl;
+    exit(1);
+  }
+
+  std::vector<cv::String> cv_all_img_names;
+  cv::glob(FLAGS_image_dir, cv_all_img_names);
+  std::cout << "total images num: " << cv_all_img_names.size() << endl;
+
+  if (FLAGS_type == "ocr") {
+    ocr(cv_all_img_names);
+  } else if (FLAGS_type == "structure") {
+    structure(cv_all_img_names);
+  } else {
+    std::cout << "only value in ['ocr','structure'] is supported" << endl;
+  }
+}
--- a/deploy/cpp_infer/src/ocr_cls.cpp
+++ b/deploy/cpp_infer/src/ocr_cls.cpp
@@ -112,6 +112,11 @@ void Classifier::LoadModel(const std::string &model_dir) {
        precision = paddle_infer::Config::Precision::kInt8;
      }
      config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
+      if (!Utility::PathExists("./trt_cls_shape.txt")){
+        config.CollectShapeRangeInfo("./trt_cls_shape.txt");
+      } else { 
+        config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true);
+      }
    }
  } else {
    config.DisableGpu();

--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -32,49 +32,13 @@ void DBDetector::LoadModel(const std::string &model_dir) {
      if (this->precision_ == "int8") {
        precision = paddle_infer::Config::Precision::kInt8;
      }
-      config.EnableTensorRtEngine(1 << 20, 1, 20, precision, false, false);
-      std::map<std::string, std::vector<int>> min_input_shape = {
-          {"x", {1, 3, 50, 50}},
-          {"conv2d_92.tmp_0", {1, 120, 20, 20}},
-          {"conv2d_91.tmp_0", {1, 24, 10, 10}},
-          {"conv2d_59.tmp_0", {1, 96, 20, 20}},
-          {"nearest_interp_v2_1.tmp_0", {1, 256, 10, 10}},
-          {"nearest_interp_v2_2.tmp_0", {1, 256, 20, 20}},
-          {"conv2d_124.tmp_0", {1, 256, 20, 20}},
-          {"nearest_interp_v2_3.tmp_0", {1, 64, 20, 20}},
-          {"nearest_interp_v2_4.tmp_0", {1, 64, 20, 20}},
-          {"nearest_interp_v2_5.tmp_0", {1, 64, 20, 20}},
-          {"elementwise_add_7", {1, 56, 2, 2}},
-          {"nearest_interp_v2_0.tmp_0", {1, 256, 2, 2}}};
-      std::map<std::string, std::vector<int>> max_input_shape = {
-          {"x", {1, 3, this->max_side_len_, this->max_side_len_}},
-          {"conv2d_92.tmp_0", {1, 120, 400, 400}},
-          {"conv2d_91.tmp_0", {1, 24, 200, 200}},
-          {"conv2d_59.tmp_0", {1, 96, 400, 400}},
-          {"nearest_interp_v2_1.tmp_0", {1, 256, 200, 200}},
-          {"nearest_interp_v2_2.tmp_0", {1, 256, 400, 400}},
-          {"conv2d_124.tmp_0", {1, 256, 400, 400}},
-          {"nearest_interp_v2_3.tmp_0", {1, 64, 400, 400}},
-          {"nearest_interp_v2_4.tmp_0", {1, 64, 400, 400}},
-          {"nearest_interp_v2_5.tmp_0", {1, 64, 400, 400}},
-          {"elementwise_add_7", {1, 56, 400, 400}},
-          {"nearest_interp_v2_0.tmp_0", {1, 256, 400, 400}}};
-      std::map<std::string, std::vector<int>> opt_input_shape = {
-          {"x", {1, 3, 640, 640}},
-          {"conv2d_92.tmp_0", {1, 120, 160, 160}},
-          {"conv2d_91.tmp_0", {1, 24, 80, 80}},
-          {"conv2d_59.tmp_0", {1, 96, 160, 160}},
-          {"nearest_interp_v2_1.tmp_0", {1, 256, 80, 80}},
-          {"nearest_interp_v2_2.tmp_0", {1, 256, 160, 160}},
-          {"conv2d_124.tmp_0", {1, 256, 160, 160}},
-          {"nearest_interp_v2_3.tmp_0", {1, 64, 160, 160}},
-          {"nearest_interp_v2_4.tmp_0", {1, 64, 160, 160}},
-          {"nearest_interp_v2_5.tmp_0", {1, 64, 160, 160}},
-          {"elementwise_add_7", {1, 56, 40, 40}},
-          {"nearest_interp_v2_0.tmp_0", {1, 256, 40, 40}}};
-
-      config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
-                                    opt_input_shape);
+      config.EnableTensorRtEngine(1 << 30, 1, 20, precision, false, false);
+      if (!Utility::PathExists("./trt_det_shape.txt")){
+        config.CollectShapeRangeInfo("./trt_det_shape.txt");
+      } else { 
+        config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true);
+      }
+      
    }
  } else {
    config.DisableGpu();
@@ -109,7 +73,8 @@ void DBDetector::Run(cv::Mat &img,
  img.copyTo(srcimg);

  auto preprocess_start = std::chrono::steady_clock::now();
-  this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w,
+  this->resize_op_.Run(img, resize_img, this->limit_type_,
+                       this->limit_side_len_, ratio_h, ratio_w,
                       this->use_tensorrt_);

  this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,

--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -147,20 +147,12 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
      if (this->precision_ == "int8") {
        precision = paddle_infer::Config::Precision::kInt8;
      }
-      config.EnableTensorRtEngine(1 << 20, 10, 15, precision, false, false);
-      int imgH = this->rec_image_shape_[1];
-      int imgW = this->rec_image_shape_[2];
-      std::map<std::string, std::vector<int>> min_input_shape = {
-          {"x", {1, 3, imgH, 10}}, {"lstm_0.tmp_0", {10, 1, 96}}};
-      std::map<std::string, std::vector<int>> max_input_shape = {
-          {"x", {this->rec_batch_num_, 3, imgH, 2500}},
-          {"lstm_0.tmp_0", {1000, 1, 96}}};
-      std::map<std::string, std::vector<int>> opt_input_shape = {
-          {"x", {this->rec_batch_num_, 3, imgH, imgW}},
-          {"lstm_0.tmp_0", {25, 1, 96}}};
-
-      config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
-                                    opt_input_shape);
+      if (!Utility::PathExists("./trt_rec_shape.txt")){
+        config.CollectShapeRangeInfo("./trt_rec_shape.txt");
+      } else { 
+        config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true);
+      }
+      
    }
  } else {
    config.DisableGpu();

--- a/deploy/cpp_infer/src/paddleocr.cpp
+++ b/deploy/cpp_infer/src/paddleocr.cpp
@@ -23,10 +23,10 @@ PPOCR::PPOCR() {
  if (FLAGS_det) {
    this->detector_ = new DBDetector(
        FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
-        FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_max_side_len,
-        FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
-        FLAGS_det_db_score_mode, FLAGS_use_dilation, FLAGS_use_tensorrt,
-        FLAGS_precision);
+        FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_limit_type,
+        FLAGS_limit_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh,
+        FLAGS_det_db_unclip_ratio, FLAGS_det_db_score_mode, FLAGS_use_dilation,
+        FLAGS_use_tensorrt, FLAGS_precision);
  }

  if (FLAGS_cls && FLAGS_use_angle_cls) {
@@ -56,7 +56,8 @@ void PPOCR::det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
    res.box = boxes[i];
    ocr_results.push_back(res);
  }
-
+  // sort boex from top to bottom, from left to right
+  Utility::sorted_boxes(ocr_results);
  times[0] += det_times[0];
  times[1] += det_times[1];
  times[2] += det_times[2];

--- a/deploy/cpp_infer/src/paddlestructure.cpp
+++ b/deploy/cpp_infer/src/paddlestructure.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <include/args.h>
+#include <include/paddlestructure.h>
+
+#include "auto_log/autolog.h"
+#include <numeric>
+#include <sys/stat.h>
+
+namespace PaddleOCR {
+
+PaddleStructure::PaddleStructure() {
+  if (FLAGS_table) {
+    this->recognizer_ = new StructureTableRecognizer(
+        FLAGS_table_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
+        FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_table_char_dict_path,
+        FLAGS_use_tensorrt, FLAGS_precision, FLAGS_table_batch_num,
+        FLAGS_table_max_len, FLAGS_merge_no_span_structure);
+  }
+};
+
+std::vector<std::vector<StructurePredictResult>>
+PaddleStructure::structure(std::vector<cv::String> cv_all_img_names,
+                           bool layout, bool table) {
+  std::vector<double> time_info_det = {0, 0, 0};
+  std::vector<double> time_info_rec = {0, 0, 0};
+  std::vector<double> time_info_cls = {0, 0, 0};
+  std::vector<double> time_info_table = {0, 0, 0};
+
+  std::vector<std::vector<StructurePredictResult>> structure_results;
+
+  if (!Utility::PathExists(FLAGS_output) && FLAGS_det) {
+    Utility::CreateDir(FLAGS_output);
+  }
+  for (int i = 0; i < cv_all_img_names.size(); ++i) {
+    std::vector<StructurePredictResult> structure_result;
+    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
+    if (!srcimg.data) {
+      std::cerr << "[ERROR] image read failed! image path: "
+                << cv_all_img_names[i] << endl;
+      exit(1);
+    }
+    if (layout) {
+    } else {
+      StructurePredictResult res;
+      res.type = "table";
+      res.box = std::vector<int>(4, 0);
+      res.box[2] = srcimg.cols;
+      res.box[3] = srcimg.rows;
+      structure_result.push_back(res);
+    }
+    cv::Mat roi_img;
+    for (int i = 0; i < structure_result.size(); i++) {
+      // crop image
+      roi_img = Utility::crop_image(srcimg, structure_result[i].box);
+      if (structure_result[i].type == "table") {
+        this->table(roi_img, structure_result[i], time_info_table,
+                    time_info_det, time_info_rec, time_info_cls);
+      }
+    }
+    structure_results.push_back(structure_result);
+  }
+  return structure_results;
+};
+
+void PaddleStructure::table(cv::Mat img,
+                            StructurePredictResult &structure_result,
+                            std::vector<double> &time_info_table,
+                            std::vector<double> &time_info_det,
+                            std::vector<double> &time_info_rec,
+                            std::vector<double> &time_info_cls) {
+  // predict structure
+  std::vector<std::vector<std::string>> structure_html_tags;
+  std::vector<float> structure_scores(1, 0);
+  std::vector<std::vector<std::vector<int>>> structure_boxes;
+  std::vector<double> structure_imes;
+  std::vector<cv::Mat> img_list;
+  img_list.push_back(img);
+  this->recognizer_->Run(img_list, structure_html_tags, structure_scores,
+                         structure_boxes, structure_imes);
+  time_info_table[0] += structure_imes[0];
+  time_info_table[1] += structure_imes[1];
+  time_info_table[2] += structure_imes[2];
+
+  std::vector<OCRPredictResult> ocr_result;
+  std::string html;
+  int expand_pixel = 3;
+
+  for (int i = 0; i < img_list.size(); i++) {
+    // det
+    this->det(img_list[i], ocr_result, time_info_det);
+    // crop image
+    std::vector<cv::Mat> rec_img_list;
+    std::vector<int> ocr_box;
+    for (int j = 0; j < ocr_result.size(); j++) {
+      ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[j].box);
+      ocr_box[0] = max(0, ocr_box[0] - expand_pixel);
+      ocr_box[1] = max(0, ocr_box[1] - expand_pixel),
+      ocr_box[2] = min(img_list[i].cols, ocr_box[2] + expand_pixel);
+      ocr_box[3] = min(img_list[i].rows, ocr_box[3] + expand_pixel);
+
+      cv::Mat crop_img = Utility::crop_image(img_list[i], ocr_box);
+      rec_img_list.push_back(crop_img);
+    }
+    // rec
+    this->rec(rec_img_list, ocr_result, time_info_rec);
+    // rebuild table
+    html = this->rebuild_table(structure_html_tags[i], structure_boxes[i],
+                               ocr_result);
+    structure_result.html = html;
+    structure_result.cell_box = structure_boxes[i];
+    structure_result.html_score = structure_scores[i];
+  }
+};
+
+std::string
+PaddleStructure::rebuild_table(std::vector<std::string> structure_html_tags,
+                               std::vector<std::vector<int>> structure_boxes,
+                               std::vector<OCRPredictResult> &ocr_result) {
+  // match text in same cell
+  std::vector<std::vector<string>> matched(structure_boxes.size(),
+                                           std::vector<std::string>());
+
+  std::vector<int> ocr_box;
+  std::vector<int> structure_box;
+  for (int i = 0; i < ocr_result.size(); i++) {
+    ocr_box = Utility::xyxyxyxy2xyxy(ocr_result[i].box);
+    ocr_box[0] -= 1;
+    ocr_box[1] -= 1;
+    ocr_box[2] += 1;
+    ocr_box[3] += 1;
+    std::vector<std::vector<float>> dis_list(structure_boxes.size(),
+                                             std::vector<float>(3, 100000.0));
+    for (int j = 0; j < structure_boxes.size(); j++) {
+      if (structure_boxes[i].size() == 8) {
+        structure_box = Utility::xyxyxyxy2xyxy(structure_boxes[j]);
+      } else {
+        structure_box = structure_boxes[j];
+      }
+      dis_list[j][0] = this->dis(ocr_box, structure_box);
+      dis_list[j][1] = 1 - this->iou(ocr_box, structure_box);
+      dis_list[j][2] = j;
+    }
+    // find min dis idx
+    std::sort(dis_list.begin(), dis_list.end(),
+              PaddleStructure::comparison_dis);
+    matched[dis_list[0][2]].push_back(ocr_result[i].text);
+  }
+
+  // get pred html
+  std::string html_str = "";
+  int td_tag_idx = 0;
+  for (int i = 0; i < structure_html_tags.size(); i++) {
+    if (structure_html_tags[i].find("</td>") != std::string::npos) {
+      if (structure_html_tags[i].find("<td></td>") != std::string::npos) {
+        html_str += "<td>";
+      }
+      if (matched[td_tag_idx].size() > 0) {
+        bool b_with = false;
+        if (matched[td_tag_idx][0].find("<b>") != std::string::npos &&
+            matched[td_tag_idx].size() > 1) {
+          b_with = true;
+          html_str += "<b>";
+        }
+        for (int j = 0; j < matched[td_tag_idx].size(); j++) {
+          std::string content = matched[td_tag_idx][j];
+          if (matched[td_tag_idx].size() > 1) {
+            // remove blank, <b> and </b>
+            if (content.length() > 0 && content.at(0) == ' ') {
+              content = content.substr(0);
+            }
+            if (content.length() > 2 && content.substr(0, 3) == "<b>") {
+              content = content.substr(3);
+            }
+            if (content.length() > 4 &&
+                content.substr(content.length() - 4) == "</b>") {
+              content = content.substr(0, content.length() - 4);
+            }
+            if (content.empty()) {
+              continue;
+            }
+            // add blank
+            if (j != matched[td_tag_idx].size() - 1 &&
+                content.at(content.length() - 1) != ' ') {
+              content += ' ';
+            }
+          }
+          html_str += content;
+        }
+        if (b_with) {
+          html_str += "</b>";
+        }
+      }
+      if (structure_html_tags[i].find("<td></td>") != std::string::npos) {
+        html_str += "</td>";
+      } else {
+        html_str += structure_html_tags[i];
+      }
+      td_tag_idx += 1;
+    } else {
+      html_str += structure_html_tags[i];
+    }
+  }
+  return html_str;
+}
+
+float PaddleStructure::iou(std::vector<int> &box1, std::vector<int> &box2) {
+  int area1 = max(0, box1[2] - box1[0]) * max(0, box1[3] - box1[1]);
+  int area2 = max(0, box2[2] - box2[0]) * max(0, box2[3] - box2[1]);
+
+  // computing the sum_area
+  int sum_area = area1 + area2;
+
+  // find the each point of intersect rectangle
+  int x1 = max(box1[0], box2[0]);
+  int y1 = max(box1[1], box2[1]);
+  int x2 = min(box1[2], box2[2]);
+  int y2 = min(box1[3], box2[3]);
+
+  // judge if there is an intersect
+  if (y1 >= y2 || x1 >= x2) {
+    return 0.0;
+  } else {
+    int intersect = (x2 - x1) * (y2 - y1);
+    return intersect / (sum_area - intersect + 0.00000001);
+  }
+}
+
+float PaddleStructure::dis(std::vector<int> &box1, std::vector<int> &box2) {
+  int x1_1 = box1[0];
+  int y1_1 = box1[1];
+  int x2_1 = box1[2];
+  int y2_1 = box1[3];
+
+  int x1_2 = box2[0];
+  int y1_2 = box2[1];
+  int x2_2 = box2[2];
+  int y2_2 = box2[3];
+
+  float dis =
+      abs(x1_2 - x1_1) + abs(y1_2 - y1_1) + abs(x2_2 - x2_1) + abs(y2_2 - y2_1);
+  float dis_2 = abs(x1_2 - x1_1) + abs(y1_2 - y1_1);
+  float dis_3 = abs(x2_2 - x2_1) + abs(y2_2 - y2_1);
+  return dis + min(dis_2, dis_3);
+}
+
+PaddleStructure::~PaddleStructure() {
+  if (this->recognizer_ != nullptr) {
+    delete this->recognizer_;
+  }
+};
+
+} // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/src/postprocess_op.cpp
+++ b/deploy/cpp_infer/src/postprocess_op.cpp
@@ -17,8 +17,8 @@

 namespace PaddleOCR {

-void PostProcessor::GetContourArea(const std::vector<std::vector<float>> &box,
-                                   float unclip_ratio, float &distance) {
+void DBPostProcessor::GetContourArea(const std::vector<std::vector<float>> &box,
+                                     float unclip_ratio, float &distance) {
  int pts_num = 4;
  float area = 0.0f;
  float dist = 0.0f;
@@ -35,8 +35,8 @@ void PostProcessor::GetContourArea(const std::vector<std::vector<float>> &box,
  distance = area * unclip_ratio / dist;
 }

-cv::RotatedRect PostProcessor::UnClip(std::vector<std::vector<float>> box,
-                                      const float &unclip_ratio) {
+cv::RotatedRect DBPostProcessor::UnClip(std::vector<std::vector<float>> box,
+                                        const float &unclip_ratio) {
  float distance = 1.0;

  GetContourArea(box, unclip_ratio, distance);
@@ -67,7 +67,7 @@ cv::RotatedRect PostProcessor::UnClip(std::vector<std::vector<float>> box,
  return res;
 }

-float **PostProcessor::Mat2Vec(cv::Mat mat) {
+float **DBPostProcessor::Mat2Vec(cv::Mat mat) {
  auto **array = new float *[mat.rows];
  for (int i = 0; i < mat.rows; ++i)
    array[i] = new float[mat.cols];
@@ -81,7 +81,7 @@ float **PostProcessor::Mat2Vec(cv::Mat mat) {
 }

 std::vector<std::vector<int>>
-PostProcessor::OrderPointsClockwise(std::vector<std::vector<int>> pts) {
+DBPostProcessor::OrderPointsClockwise(std::vector<std::vector<int>> pts) {
  std::vector<std::vector<int>> box = pts;
  std::sort(box.begin(), box.end(), XsortInt);

@@ -99,7 +99,7 @@ PostProcessor::OrderPointsClockwise(std::vector<std::vector<int>> pts) {
  return rect;
 }

-std::vector<std::vector<float>> PostProcessor::Mat2Vector(cv::Mat mat) {
+std::vector<std::vector<float>> DBPostProcessor::Mat2Vector(cv::Mat mat) {
  std::vector<std::vector<float>> img_vec;
  std::vector<float> tmp;

@@ -113,20 +113,20 @@ std::vector<std::vector<float>> PostProcessor::Mat2Vector(cv::Mat mat) {
  return img_vec;
 }

-bool PostProcessor::XsortFp32(std::vector<float> a, std::vector<float> b) {
+bool DBPostProcessor::XsortFp32(std::vector<float> a, std::vector<float> b) {
  if (a[0] != b[0])
    return a[0] < b[0];
  return false;
 }

-bool PostProcessor::XsortInt(std::vector<int> a, std::vector<int> b) {
+bool DBPostProcessor::XsortInt(std::vector<int> a, std::vector<int> b) {
  if (a[0] != b[0])
    return a[0] < b[0];
  return false;
 }

-std::vector<std::vector<float>> PostProcessor::GetMiniBoxes(cv::RotatedRect box,
-                                                            float &ssid) {
+std::vector<std::vector<float>>
+DBPostProcessor::GetMiniBoxes(cv::RotatedRect box, float &ssid) {
  ssid = std::max(box.size.width, box.size.height);

  cv::Mat points;
@@ -160,8 +160,8 @@ std::vector<std::vector<float>> PostProcessor::GetMiniBoxes(cv::RotatedRect box,
  return array;
 }

-float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
-                                     cv::Mat pred) {
+float DBPostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
+                                       cv::Mat pred) {
  int width = pred.cols;
  int height = pred.rows;
  std::vector<float> box_x;
@@ -206,8 +206,8 @@ float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
  return score;
 }

-float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
-                                  cv::Mat pred) {
+float DBPostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
+                                    cv::Mat pred) {
  auto array = box_array;
  int width = pred.cols;
  int height = pred.rows;
@@ -244,7 +244,7 @@ float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
  return score;
 }

-std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
+std::vector<std::vector<std::vector<int>>> DBPostProcessor::BoxesFromBitmap(
    const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
    const float &det_db_unclip_ratio, const std::string &det_db_score_mode) {
  const int min_size = 3;
@@ -321,9 +321,9 @@ std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
  return boxes;
 }

-std::vector<std::vector<std::vector<int>>>
-PostProcessor::FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
-                               float ratio_h, float ratio_w, cv::Mat srcimg) {
+std::vector<std::vector<std::vector<int>>> DBPostProcessor::FilterTagDetRes(
+    std::vector<std::vector<std::vector<int>>> boxes, float ratio_h,
+    float ratio_w, cv::Mat srcimg) {
  int oriimg_h = srcimg.rows;
  int oriimg_w = srcimg.cols;

@@ -352,4 +352,92 @@ PostProcessor::FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
  return root_points;
 }

+void TablePostProcessor::init(std::string label_path,
+                              bool merge_no_span_structure) {
+  this->label_list_ = Utility::ReadDict(label_path);
+  if (merge_no_span_structure) {
+    this->label_list_.push_back("<td></td>");
+    std::vector<std::string>::iterator it;
+    for (it = this->label_list_.begin(); it != this->label_list_.end();) {
+      if (*it == "<td>") {
+        it = this->label_list_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+  // add_special_char
+  this->label_list_.insert(this->label_list_.begin(), this->beg);
+  this->label_list_.push_back(this->end);
+}
+
+void TablePostProcessor::Run(
+    std::vector<float> &loc_preds, std::vector<float> &structure_probs,
+    std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
+    std::vector<int> &structure_probs_shape,
+    std::vector<std::vector<std::string>> &rec_html_tag_batch,
+    std::vector<std::vector<std::vector<int>>> &rec_boxes_batch,
+    std::vector<int> &width_list, std::vector<int> &height_list) {
+  for (int batch_idx = 0; batch_idx < structure_probs_shape[0]; batch_idx++) {
+    // image tags and boxs
+    std::vector<std::string> rec_html_tags;
+    std::vector<std::vector<int>> rec_boxes;
+
+    float score = 0.f;
+    int count = 0;
+    float char_score = 0.f;
+    int char_idx = 0;
+
+    // step
+    for (int step_idx = 0; step_idx < structure_probs_shape[1]; step_idx++) {
+      std::string html_tag;
+      std::vector<int> rec_box;
+      // html tag
+      int step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
+                           structure_probs_shape[2];
+      char_idx = int(Utility::argmax(
+          &structure_probs[step_start_idx],
+          &structure_probs[step_start_idx + structure_probs_shape[2]]));
+      char_score = float(*std::max_element(
+          &structure_probs[step_start_idx],
+          &structure_probs[step_start_idx + structure_probs_shape[2]]));
+      html_tag = this->label_list_[char_idx];
+
+      if (step_idx > 0 && html_tag == this->end) {
+        break;
+      }
+      if (html_tag == this->beg) {
+        continue;
+      }
+      count += 1;
+      score += char_score;
+      rec_html_tags.push_back(html_tag);
+
+      // box
+      if (html_tag == "<td>" || html_tag == "<td" || html_tag == "<td></td>") {
+        for (int point_idx = 0; point_idx < loc_preds_shape[2]; point_idx++) {
+          step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
+                               loc_preds_shape[2] +
+                           point_idx;
+          float point = loc_preds[step_start_idx];
+          if (point_idx % 2 == 0) {
+            point = int(point * width_list[batch_idx]);
+          } else {
+            point = int(point * height_list[batch_idx]);
+          }
+          rec_box.push_back(point);
+        }
+        rec_boxes.push_back(rec_box);
+      }
+    }
+    score /= count;
+    if (isnan(score) || rec_boxes.size() == 0) {
+      score = -1;
+    }
+    rec_scores.push_back(score);
+    rec_boxes_batch.push_back(rec_boxes);
+    rec_html_tag_batch.push_back(rec_html_tags);
+  }
+}
+
 } // namespace PaddleOCR
--- a/deploy/cpp_infer/src/preprocess_op.cpp
+++ b/deploy/cpp_infer/src/preprocess_op.cpp
@@ -69,18 +69,28 @@ void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
 }

 void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
-                         int max_size_len, float &ratio_h, float &ratio_w,
-                         bool use_tensorrt) {
+                         string limit_type, int limit_side_len, float &ratio_h,
+                         float &ratio_w, bool use_tensorrt) {
  int w = img.cols;
  int h = img.rows;
-
  float ratio = 1.f;
-  int max_wh = w >= h ? w : h;
-  if (max_wh > max_size_len) {
-    if (h > w) {
-      ratio = float(max_size_len) / float(h);
-    } else {
-      ratio = float(max_size_len) / float(w);
+  if (limit_type == "min") {
+    int min_wh = min(h, w);
+    if (min_wh < limit_side_len) {
+      if (h < w) {
+        ratio = float(limit_side_len) / float(h);
+      } else {
+        ratio = float(limit_side_len) / float(w);
+      }
+    }
+  } else {
+    int max_wh = max(h, w);
+    if (max_wh > limit_side_len) {
+      if (h > w) {
+        ratio = float(limit_side_len) / float(h);
+      } else {
+        ratio = float(limit_side_len) / float(w);
+      }
    }
  }

@@ -143,4 +153,26 @@ void ClsResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
  }
 }

+void TableResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img,
+                         const int max_len) {
+  int w = img.cols;
+  int h = img.rows;
+
+  int max_wh = w >= h ? w : h;
+  float ratio = w >= h ? float(max_len) / float(w) : float(max_len) / float(h);
+
+  int resize_h = int(float(h) * ratio);
+  int resize_w = int(float(w) * ratio);
+
+  cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
+}
+
+void TablePadImg::Run(const cv::Mat &img, cv::Mat &resize_img,
+                      const int max_len) {
+  int w = img.cols;
+  int h = img.rows;
+  cv::copyMakeBorder(img, resize_img, 0, max_len - h, 0, max_len - w,
+                     cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
+}
+
 } // namespace PaddleOCR
--- a/deploy/cpp_infer/src/structure_table.cpp
+++ b/deploy/cpp_infer/src/structure_table.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <include/structure_table.h>
+
+namespace PaddleOCR {
+
+void StructureTableRecognizer::Run(
+    std::vector<cv::Mat> img_list,
+    std::vector<std::vector<std::string>> &structure_html_tags,
+    std::vector<float> &structure_scores,
+    std::vector<std::vector<std::vector<int>>> &structure_boxes,
+    std::vector<double> &times) {
+  std::chrono::duration<float> preprocess_diff =
+      std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
+  std::chrono::duration<float> inference_diff =
+      std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
+  std::chrono::duration<float> postprocess_diff =
+      std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
+
+  int img_num = img_list.size();
+  for (int beg_img_no = 0; beg_img_no < img_num;
+       beg_img_no += this->table_batch_num_) {
+    // preprocess
+    auto preprocess_start = std::chrono::steady_clock::now();
+    int end_img_no = min(img_num, beg_img_no + this->table_batch_num_);
+    int batch_num = end_img_no - beg_img_no;
+    std::vector<cv::Mat> norm_img_batch;
+    std::vector<int> width_list;
+    std::vector<int> height_list;
+    for (int ino = beg_img_no; ino < end_img_no; ino++) {
+      cv::Mat srcimg;
+      img_list[ino].copyTo(srcimg);
+      cv::Mat resize_img;
+      cv::Mat pad_img;
+      this->resize_op_.Run(srcimg, resize_img, this->table_max_len_);
+      this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
+                              this->is_scale_);
+      this->pad_op_.Run(resize_img, pad_img, this->table_max_len_);
+      norm_img_batch.push_back(pad_img);
+      width_list.push_back(srcimg.cols);
+      height_list.push_back(srcimg.rows);
+    }
+
+    std::vector<float> input(
+        batch_num * 3 * this->table_max_len_ * this->table_max_len_, 0.0f);
+    this->permute_op_.Run(norm_img_batch, input.data());
+    auto preprocess_end = std::chrono::steady_clock::now();
+    preprocess_diff += preprocess_end - preprocess_start;
+    // inference.
+    auto input_names = this->predictor_->GetInputNames();
+    auto input_t = this->predictor_->GetInputHandle(input_names[0]);
+    input_t->Reshape(
+        {batch_num, 3, this->table_max_len_, this->table_max_len_});
+    auto inference_start = std::chrono::steady_clock::now();
+    input_t->CopyFromCpu(input.data());
+    this->predictor_->Run();
+    auto output_names = this->predictor_->GetOutputNames();
+    auto output_tensor0 = this->predictor_->GetOutputHandle(output_names[0]);
+    auto output_tensor1 = this->predictor_->GetOutputHandle(output_names[1]);
+    std::vector<int> predict_shape0 = output_tensor0->shape();
+    std::vector<int> predict_shape1 = output_tensor1->shape();
+
+    int out_num0 = std::accumulate(predict_shape0.begin(), predict_shape0.end(),
+                                   1, std::multiplies<int>());
+    int out_num1 = std::accumulate(predict_shape1.begin(), predict_shape1.end(),
+                                   1, std::multiplies<int>());
+    std::vector<float> loc_preds;
+    std::vector<float> structure_probs;
+    loc_preds.resize(out_num0);
+    structure_probs.resize(out_num1);
+
+    output_tensor0->CopyToCpu(loc_preds.data());
+    output_tensor1->CopyToCpu(structure_probs.data());
+    auto inference_end = std::chrono::steady_clock::now();
+    inference_diff += inference_end - inference_start;
+    // postprocess
+    auto postprocess_start = std::chrono::steady_clock::now();
+    std::vector<std::vector<std::string>> structure_html_tag_batch;
+    std::vector<float> structure_score_batch;
+    std::vector<std::vector<std::vector<int>>> structure_boxes_batch;
+    this->post_processor_.Run(loc_preds, structure_probs, structure_score_batch,
+                              predict_shape0, predict_shape1,
+                              structure_html_tag_batch, structure_boxes_batch,
+                              width_list, height_list);
+    for (int m = 0; m < predict_shape0[0]; m++) {
+
+      structure_html_tag_batch[m].insert(structure_html_tag_batch[m].begin(),
+                                         "<table>");
+      structure_html_tag_batch[m].insert(structure_html_tag_batch[m].begin(),
+                                         "<body>");
+      structure_html_tag_batch[m].insert(structure_html_tag_batch[m].begin(),
+                                         "<html>");
+      structure_html_tag_batch[m].push_back("</table>");
+      structure_html_tag_batch[m].push_back("</body>");
+      structure_html_tag_batch[m].push_back("</html>");
+      structure_html_tags.push_back(structure_html_tag_batch[m]);
+      structure_scores.push_back(structure_score_batch[m]);
+      structure_boxes.push_back(structure_boxes_batch[m]);
+    }
+    auto postprocess_end = std::chrono::steady_clock::now();
+    postprocess_diff += postprocess_end - postprocess_start;
+    times.push_back(double(preprocess_diff.count() * 1000));
+    times.push_back(double(inference_diff.count() * 1000));
+    times.push_back(double(postprocess_diff.count() * 1000));
+  }
+}
+
+void StructureTableRecognizer::LoadModel(const std::string &model_dir) {
+  AnalysisConfig config;
+  config.SetModel(model_dir + "/inference.pdmodel",
+                  model_dir + "/inference.pdiparams");
+
+  if (this->use_gpu_) {
+    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
+    if (this->use_tensorrt_) {
+      auto precision = paddle_infer::Config::Precision::kFloat32;
+      if (this->precision_ == "fp16") {
+        precision = paddle_infer::Config::Precision::kHalf;
+      }
+      if (this->precision_ == "int8") {
+        precision = paddle_infer::Config::Precision::kInt8;
+      }
+      config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
+    }
+  } else {
+    config.DisableGpu();
+    if (this->use_mkldnn_) {
+      config.EnableMKLDNN();
+    }
+    config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
+  }
+
+  // false for zero copy tensor
+  config.SwitchUseFeedFetchOps(false);
+  // true for multiple input
+  config.SwitchSpecifyInputNames(true);
+
+  config.SwitchIrOptim(true);
+
+  config.EnableMemoryOptim();
+  config.DisableGlogInfo();
+
+  this->predictor_ = CreatePredictor(config);
+}
+} // namespace PaddleOCR
--- a/deploy/cpp_infer/src/utility.cpp
+++ b/deploy/cpp_infer/src/utility.cpp
@@ -65,6 +65,37 @@ void Utility::VisualizeBboxes(const cv::Mat &srcimg,
            << std::endl;
 }

+void Utility::VisualizeBboxes(const cv::Mat &srcimg,
+                              const StructurePredictResult &structure_result,
+                              const std::string &save_path) {
+  cv::Mat img_vis;
+  srcimg.copyTo(img_vis);
+  for (int n = 0; n < structure_result.cell_box.size(); n++) {
+    if (structure_result.cell_box[n].size() == 8) {
+      cv::Point rook_points[4];
+      for (int m = 0; m < structure_result.cell_box[n].size(); m += 2) {
+        rook_points[m / 2] =
+            cv::Point(int(structure_result.cell_box[n][m]),
+                      int(structure_result.cell_box[n][m + 1]));
+      }
+      const cv::Point *ppt[1] = {rook_points};
+      int npt[] = {4};
+      cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
+    } else if (structure_result.cell_box[n].size() == 4) {
+      cv::Point rook_points[2];
+      rook_points[0] = cv::Point(int(structure_result.cell_box[n][0]),
+                                 int(structure_result.cell_box[n][1]));
+      rook_points[1] = cv::Point(int(structure_result.cell_box[n][2]),
+                                 int(structure_result.cell_box[n][3]));
+      cv::rectangle(img_vis, rook_points[0], rook_points[1], CV_RGB(0, 255, 0),
+                    2, 8, 0);
+    }
+  }
+
+  cv::imwrite(save_path, img_vis);
+  std::cout << "The table visualized image saved in " + save_path << std::endl;
+}
+
 // list all files under a directory
 void Utility::GetAllFiles(const char *dir_name,
                          std::vector<std::string> &all_inputs) {
@@ -248,4 +279,66 @@ void Utility::print_result(const std::vector<OCRPredictResult> &ocr_result) {
    std::cout << std::endl;
  }
 }
+
+cv::Mat Utility::crop_image(cv::Mat &img, std::vector<int> &area) {
+  cv::Mat crop_im;
+  int crop_x1 = std::max(0, area[0]);
+  int crop_y1 = std::max(0, area[1]);
+  int crop_x2 = std::min(img.cols - 1, area[2] - 1);
+  int crop_y2 = std::min(img.rows - 1, area[3] - 1);
+
+  crop_im = cv::Mat::zeros(area[3] - area[1], area[2] - area[0], 16);
+  cv::Mat crop_im_window =
+      crop_im(cv::Range(crop_y1 - area[1], crop_y2 + 1 - area[1]),
+              cv::Range(crop_x1 - area[0], crop_x2 + 1 - area[0]));
+  cv::Mat roi_img =
+      img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1));
+  crop_im_window += roi_img;
+  return crop_im;
+}
+
+void Utility::sorted_boxes(std::vector<OCRPredictResult> &ocr_result) {
+  std::sort(ocr_result.begin(), ocr_result.end(), Utility::comparison_box);
+  if (ocr_result.size() > 0) {
+    for (int i = 0; i < ocr_result.size() - 1; i++) {
+      for (int j = i; j > 0; j--) {
+        if (abs(ocr_result[j + 1].box[0][1] - ocr_result[j].box[0][1]) < 10 &&
+            (ocr_result[j + 1].box[0][0] < ocr_result[j].box[0][0])) {
+          std::swap(ocr_result[i], ocr_result[i + 1]);
+        }
+      }
+    }
+  }
+}
+
+std::vector<int> Utility::xyxyxyxy2xyxy(std::vector<std::vector<int>> &box) {
+  int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
+  int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
+  int left = int(*std::min_element(x_collect, x_collect + 4));
+  int right = int(*std::max_element(x_collect, x_collect + 4));
+  int top = int(*std::min_element(y_collect, y_collect + 4));
+  int bottom = int(*std::max_element(y_collect, y_collect + 4));
+  std::vector<int> box1(4, 0);
+  box1[0] = left;
+  box1[1] = top;
+  box1[2] = right;
+  box1[3] = bottom;
+  return box1;
+}
+
+std::vector<int> Utility::xyxyxyxy2xyxy(std::vector<int> &box) {
+  int x_collect[4] = {box[0], box[2], box[4], box[6]};
+  int y_collect[4] = {box[1], box[3], box[5], box[7]};
+  int left = int(*std::min_element(x_collect, x_collect + 4));
+  int right = int(*std::max_element(x_collect, x_collect + 4));
+  int top = int(*std::min_element(y_collect, y_collect + 4));
+  int bottom = int(*std::max_element(y_collect, y_collect + 4));
+  std::vector<int> box1(4, 0);
+  box1[0] = left;
+  box1[1] = top;
+  box1[2] = right;
+  box1[3] = bottom;
+  return box1;
+}
+
 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/hubserving/ocr_system/module.py
+++ b/deploy/hubserving/ocr_system/module.py
@@ -118,7 +118,7 @@ class OCRSystem(hub.Module):
                all_results.append([])
                continue
            starttime = time.time()
-            dt_boxes, rec_res = self.text_sys(img)
+            dt_boxes, rec_res, _ = self.text_sys(img)
            elapse = time.time() - starttime
            logger.info("Predict time: {}".format(elapse))


--- a/deploy/hubserving/readme.md
+++ b/deploy/hubserving/readme.md
@@ -20,13 +20,14 @@ PaddleOCR提供2种服务部署方式：

 # 基于PaddleHub Serving的服务部署

-hubserving服务部署目录下包括文本检测、文本方向分类，文本识别、文本检测+文本方向分类+文本识别3阶段串联，表格识别和PP-Structure六种服务包，请根据需求选择相应的服务包进行安装和启动。目录结构如下：
+hubserving服务部署目录下包括文本检测、文本方向分类，文本识别、文本检测+文本方向分类+文本识别3阶段串联，版面分析、表格识别和PP-Structure七种服务包，请根据需求选择相应的服务包进行安装和启动。目录结构如下：
 ```
 deploy/hubserving/
  └─  ocr_cls     文本方向分类模块服务包
  └─  ocr_det     文本检测模块服务包
  └─  ocr_rec     文本识别模块服务包
  └─  ocr_system  文本检测+文本方向分类+文本识别串联服务包
+  └─  structure_layout  版面分析服务包
  └─  structure_table  表格识别服务包
  └─  structure_system  PP-Structure服务包
 ```
@@ -41,6 +42,7 @@ deploy/hubserving/ocr_system/
 ```
 ## 1. 近期更新

+* 2022.08.23 新增版面分析服务。
 * 2022.05.05 新增PP-OCRv3检测和识别模型。
 * 2022.03.30 新增PP-Structure和表格识别两种服务。

@@ -59,8 +61,9 @@ pip3 install paddlehub==2.1.0 --upgrade -i https://mirror.baidu.com/pypi/simple
 检测模型：./inference/ch_PP-OCRv3_det_infer/
 识别模型：./inference/ch_PP-OCRv3_rec_infer/
 方向分类器：./inference/ch_ppocr_mobile_v2.0_cls_infer/
-表格结构识别模型：./inference/en_ppocr_mobile_v2.0_table_structure_infer/
-```  
+版面分析模型：./inference/picodet_lcnet_x1_0_fgd_layout_infer/
+表格结构识别模型：./inference/ch_ppstructure_mobile_v2.0_SLANet_infer/
+```

 **模型路径可在`params.py`中查看和修改。** 更多模型可以从PaddleOCR提供的模型库[PP-OCR](../../doc/doc_ch/models_list.md)和[PP-Structure](../../ppstructure/docs/models_list.md)下载，也可以替换成自己训练转换好的模型。

@@ -86,6 +89,9 @@ hub install deploy/hubserving/structure_table/

 # 或，安装PP-Structure服务模块：  
 hub install deploy/hubserving/structure_system/
+
+# 或，安装版面分析服务模块：  
+hub install deploy/hubserving/structure_layout/
 ```

 * 在Windows环境下(文件夹的分隔符为`\`)，安装示例如下：
@@ -107,6 +113,9 @@ hub install deploy\hubserving\structure_table\

 # 或，安装PP-Structure服务模块：  
 hub install deploy\hubserving\structure_system\
+
+# 或，安装版面分析服务模块：
+hub install deploy\hubserving\structure_layout\
 ```

 ### 2.4 启动服务
@@ -117,7 +126,7 @@ $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \
                    --port XXXX \
                    --use_multiprocess \
                    --workers \
-```  
+```

 **参数：**  

@@ -167,12 +176,12 @@ $ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \
 ```shell
 export CUDA_VISIBLE_DEVICES=3
 hub serving start -c deploy/hubserving/ocr_system/config.json
-```  
+```

 ## 3. 发送预测请求
 配置好服务端，可使用以下命令发送预测请求，获取预测结果:  

-```python tools/test_hubserving.py server_url image_path```  
+```python tools/test_hubserving.py --server_url=server_url --image_dir=image_path```  

 需要给脚本传递2个参数：  
 - **server_url**：服务地址，格式为  
@@ -184,6 +193,7 @@ hub serving start -c deploy/hubserving/ocr_system/config.json
 `http://127.0.0.1:8868/predict/ocr_system`  
 `http://127.0.0.1:8869/predict/structure_table`  
 `http://127.0.0.1:8870/predict/structure_system`  
+`http://127.0.0.1:8870/predict/structure_layout`  
 - **image_dir**：测试图像路径，可以是单张图片路径，也可以是图像集合目录路径  
 - **visualize**：是否可视化结果，默认为False  
 - **output**：可视化结果保存路径，默认为`./hubserving_result`
@@ -202,17 +212,19 @@ hub serving start -c deploy/hubserving/ocr_system/config.json
 |text_region|list|文本位置坐标|
 |html|str|表格的html字符串|
 |regions|list|版面分析+表格识别+OCR的结果，每一项为一个list，包含表示区域坐标的`bbox`，区域类型的`type`和区域结果的`res`三个字段|
+|layout|list|版面分析的结果，每一项一个dict，包含版面区域坐标的`bbox`，区域类型的`label`|

 不同模块返回的字段不同，如，文本识别服务模块返回结果不含`text_region`字段，具体信息如下：

-| 字段名/模块名 | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system |
-|  ---  |  ---  |  ---  |  ---  |  ---  | ---  |---  |
-|angle| | ✔ | | ✔ | ||
-|text| | |✔|✔| | ✔ |
-|confidence| |✔ |✔| | | ✔|
-|text_region| ✔| | |✔ | | ✔|
-|html| | | | |✔ |✔|
-|regions| | | | |✔ |✔ |
+| 字段名/模块名 | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system | Structure_layout |
+|  ---  |  ---  |  ---  |  ---  |  ---  | ---  |  ---  |  ---  |
+|angle| | ✔ | | ✔ | |||
+|text| | |✔|✔| | ✔ |  |
+|confidence| |✔ |✔| | | ✔| |
+|text_region| ✔| | |✔ | | ✔| |
+|html| | | | |✔ |✔||
+|regions| | | | |✔ |✔ | |
+|layout| | | | | | | ✔ |

 **说明：** 如果需要增加、删除、修改返回字段，可在相应模块的`module.py`文件中进行修改，完整流程参考下一节自定义修改服务模块。


--- a/deploy/hubserving/readme_en.md
+++ b/deploy/hubserving/readme_en.md
@@ -20,13 +20,14 @@ PaddleOCR provides 2 service deployment methods:

 # Service deployment based on PaddleHub Serving  

-The hubserving service deployment directory includes six service packages: text detection, text angle class, text recognition, text detection+text angle class+text recognition three-stage series connection, table recognition and PP-Structure. Please select the corresponding service package to install and start service according to your needs. The directory is as follows:  
+The hubserving service deployment directory includes seven service packages: text detection, text angle class, text recognition, text detection+text angle class+text recognition three-stage series connection, layout analysis, table recognition and PP-Structure. Please select the corresponding service package to install and start service according to your needs. The directory is as follows:  
 ```
 deploy/hubserving/
  └─  ocr_det     text detection module service package
  └─  ocr_cls     text angle class module service package
  └─  ocr_rec     text recognition module service package
  └─  ocr_system  text detection+text angle class+text recognition three-stage series connection service package
+  └─  structure_layout  layout analysis service package
  └─  structure_table  table recognition service package
  └─  structure_system  PP-Structure service package
 ```
@@ -43,6 +44,7 @@ deploy/hubserving/ocr_system/

 * 2022.05.05 add PP-OCRv3 text detection and recognition models.
 * 2022.03.30 add PP-Structure and table recognition services。
+* 2022.08.23 add layout analysis services。


 ## 2. Quick start service
@@ -61,7 +63,8 @@ Before installing the service module, you need to prepare the inference model an
 text detection model: ./inference/ch_PP-OCRv3_det_infer/
 text recognition model: ./inference/ch_PP-OCRv3_rec_infer/
 text angle classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/
-tanle recognition: ./inference/en_ppocr_mobile_v2.0_table_structure_infer/
+layout parse model: ./inference/picodet_lcnet_x1_0_fgd_layout_infer/
+tanle recognition: ./inference/ch_ppstructure_mobile_v2.0_SLANet_infer/
 ```  

 **The model path can be found and modified in `params.py`.** More models provided by PaddleOCR can be obtained from the [model library](../../doc/doc_en/models_list_en.md). You can also use models trained by yourself.
@@ -88,6 +91,9 @@ hub install deploy/hubserving/structure_table/

 # Or install PP-Structure service module
 hub install deploy/hubserving/structure_system/
+
+# Or install layout analysis service module
+hub install deploy/hubserving/structure_layout/
 ```

 * On Windows platform, the examples are as follows.
@@ -109,6 +115,9 @@ hub install deploy/hubserving/structure_table/

 # Or install PP-Structure service module
 hub install deploy\hubserving\structure_system\
+
+# Or install layout analysis service module
+hub install deploy\hubserving\structure_layout\
 ```

 ### 2.4 Start service
@@ -177,7 +186,7 @@ hub serving start -c deploy/hubserving/ocr_system/config.json
 ## 3. Send prediction requests
 After the service starts, you can use the following command to send a prediction request to obtain the prediction result:  
 ```shell
-python tools/test_hubserving.py server_url image_path
+python tools/test_hubserving.py --server_url=server_url --image_dir=image_path
 ```  

 Two parameters need to be passed to the script:
@@ -189,8 +198,9 @@ For example, if using the configuration file to start the text angle classificat
 `http://127.0.0.1:8866/predict/ocr_cls`  
 `http://127.0.0.1:8867/predict/ocr_rec`  
 `http://127.0.0.1:8868/predict/ocr_system`  
-`http://127.0.0.1:8869/predict/structure_table`
+`http://127.0.0.1:8869/predict/structure_table`  
 `http://127.0.0.1:8870/predict/structure_system`  
+`http://127.0.0.1:8870/predict/structure_layout`  
 - **image_dir**：Test image path, can be a single image path or an image directory path
 - **visualize**：Whether to visualize the results, the default value is False
 - **output**：The floder to save Visualization result, default value is `./hubserving_result`
@@ -211,17 +221,19 @@ The returned result is a list. Each item in the list is a dict. The dict may con
 |text_region|list|text location coordinates|
 |html|str|table html str|
 |regions|list|The result of layout analysis + table recognition + OCR, each item is a list, including `bbox` indicating area coordinates, `type` of area type and `res` of area results|
+|layout|list|The result of layout analysis, each item is a dict, including `bbox` indicating area coordinates, `label` of area type|

 The fields returned by different modules are different. For example, the results returned by the text recognition service module do not contain `text_region`. The details are as follows:

-| field name/module name | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system |
-|  ---  |  ---  |  ---  |  ---  |  ---  | ---  |---  |
-|angle| | ✔ | | ✔ | ||
-|text| | |✔|✔| | ✔ |
-|confidence| |✔ |✔| | | ✔|
-|text_region| ✔| | |✔ | | ✔|
-|html| | | | |✔ |✔|
-|regions| | | | |✔ |✔ |
+| field name/module name | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system | structure_layout |
+|  ---  |  ---  |  ---  |  ---  |  ---  | ---  |---  |---  |
+|angle| | ✔ | | ✔ | || |
+|text| | |✔|✔| | ✔ | |
+|confidence| |✔ |✔| | | ✔| |
+|text_region| ✔| | |✔ | | ✔| |
+|html| | | | |✔ |✔| |
+|regions| | | | |✔ |✔ | |
+|layout| | | | | | |✔ |

 **Note：** If you need to add, delete or modify the returned fields, you can modify the file `module.py` of the corresponding module. For the complete process, refer to the user-defined modification service module in the next section.


--- a/deploy/hubserving/structure_layout/__init__.py
+++ b/deploy/hubserving/structure_layout/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
--- a/deploy/hubserving/structure_layout/config.json
+++ b/deploy/hubserving/structure_layout/config.json
+{
+    "modules_info": {
+        "structure_layout": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8871,
+    "use_multiprocess": false,
+    "workers": 2
+}
+
--- a/deploy/hubserving/structure_layout/module.py
+++ b/deploy/hubserving/structure_layout/module.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+sys.path.insert(0, ".")
+import copy
+
+import time
+import paddlehub
+from paddlehub.common.logger import logger
+from paddlehub.module.module import moduleinfo, runnable, serving
+import cv2
+import paddlehub as hub
+
+from tools.infer.utility import base64_to_cv2
+from ppstructure.layout.predict_layout import LayoutPredictor as _LayoutPredictor
+from ppstructure.utility import parse_args
+from deploy.hubserving.structure_layout.params import read_params
+
+
+@moduleinfo(
+    name="structure_layout",
+    version="1.0.0",
+    summary="PP-Structure layout service",
+    author="paddle-dev",
+    author_email="paddle-dev@baidu.com",
+    type="cv/structure_layout")
+class LayoutPredictor(hub.Module):
+    def _initialize(self, use_gpu=False, enable_mkldnn=False):
+        """
+        initialize with the necessary elements
+        """
+        cfg = self.merge_configs()
+        cfg.use_gpu = use_gpu
+        if use_gpu:
+            try:
+                _places = os.environ["CUDA_VISIBLE_DEVICES"]
+                int(_places[0])
+                print("use gpu: ", use_gpu)
+                print("CUDA_VISIBLE_DEVICES: ", _places)
+                cfg.gpu_mem = 8000
+            except:
+                raise RuntimeError(
+                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
+                )
+        cfg.ir_optim = True
+        cfg.enable_mkldnn = enable_mkldnn
+
+        self.layout_predictor = _LayoutPredictor(cfg)
+
+    def merge_configs(self):
+        # deafult cfg
+        backup_argv = copy.deepcopy(sys.argv)
+        sys.argv = sys.argv[:1]
+        cfg = parse_args()
+
+        update_cfg_map = vars(read_params())
+
+        for key in update_cfg_map:
+            cfg.__setattr__(key, update_cfg_map[key])
+
+        sys.argv = copy.deepcopy(backup_argv)
+        return cfg
+
+    def read_images(self, paths=[]):
+        images = []
+        for img_path in paths:
+            assert os.path.isfile(
+                img_path), "The {} isn't a valid file.".format(img_path)
+            img = cv2.imread(img_path)
+            if img is None:
+                logger.info("error in loading image:{}".format(img_path))
+                continue
+            images.append(img)
+        return images
+
+    def predict(self, images=[], paths=[]):
+        """
+        Get the chinese texts in the predicted images.
+        Args:
+            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
+            paths (list[str]): The paths of images. If paths not images
+        Returns:
+            res (list): The layout results of images.
+        """
+
+        if images != [] and isinstance(images, list) and paths == []:
+            predicted_data = images
+        elif images == [] and isinstance(paths, list) and paths != []:
+            predicted_data = self.read_images(paths)
+        else:
+            raise TypeError("The input data is inconsistent with expectations.")
+
+        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
+
+        all_results = []
+        for img in predicted_data:
+            if img is None:
+                logger.info("error in loading image")
+                all_results.append([])
+                continue
+            starttime = time.time()
+            res, _ = self.layout_predictor(img)
+            elapse = time.time() - starttime
+            logger.info("Predict time: {}".format(elapse))
+
+            for item in res:
+                item['bbox'] = item['bbox'].tolist()
+            all_results.append({'layout': res})
+        return all_results
+
+    @serving
+    def serving_method(self, images, **kwargs):
+        """
+        Run as a service.
+        """
+        images_decode = [base64_to_cv2(image) for image in images]
+        results = self.predict(images_decode, **kwargs)
+        return results
+
+
+if __name__ == '__main__':
+    layout = LayoutPredictor()
+    layout._initialize()
+    image_path = ['./ppstructure/docs/table/1.png']
+    res = layout.predict(paths=image_path)
+    print(res)
--- a/deploy/hubserving/structure_layout/params.py
+++ b/deploy/hubserving/structure_layout/params.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+class Config(object):
+    pass
+
+
+def read_params():
+    cfg = Config()
+
+    # params for layout analysis
+    cfg.layout_model_dir = './inference/picodet_lcnet_x1_0_fgd_layout_infer/'
+    cfg.layout_dict_path = './ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt'
+    cfg.layout_score_threshold = 0.5
+    cfg.layout_nms_threshold = 0.5
+    return cfg
--- a/deploy/hubserving/structure_system/module.py
+++ b/deploy/hubserving/structure_system/module.py
@@ -119,7 +119,7 @@ class StructureSystem(hub.Module):
                all_results.append([])
                continue
            starttime = time.time()
-            res = self.table_sys(img)
+            res, _ = self.table_sys(img)
            elapse = time.time() - starttime
            logger.info("Predict time: {}".format(elapse))

@@ -144,6 +144,6 @@ class StructureSystem(hub.Module):
 if __name__ == '__main__':
    structure_system = StructureSystem()
    structure_system._initialize()
-    image_path = ['./doc/table/1.png']
+    image_path = ['./ppstructure/docs/table/1.png']
    res = structure_system.predict(paths=image_path)
    print(res)
--- a/deploy/hubserving/structure_system/params.py
+++ b/deploy/hubserving/structure_system/params.py
@@ -23,8 +23,10 @@ def read_params():
    cfg = table_read_params()

    # params for layout parser model
-    cfg.layout_path_model = 'lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config'
-    cfg.layout_label_map = None
+    cfg.layout_model_dir = ''
+    cfg.layout_dict_path = './ppocr/utils/dict/layout_publaynet_dict.txt'
+    cfg.layout_score_threshold = 0.5
+    cfg.layout_nms_threshold = 0.5

    cfg.mode = 'structure'
    cfg.output = './output'

--- a/deploy/hubserving/structure_table/module.py
+++ b/deploy/hubserving/structure_table/module.py
@@ -118,11 +118,11 @@ class TableSystem(hub.Module):
                all_results.append([])
                continue
            starttime = time.time()
-            pred_html = self.table_sys(img)
+            res, _ = self.table_sys(img)
            elapse = time.time() - starttime
            logger.info("Predict time: {}".format(elapse))

-            all_results.append({'html': pred_html})
+            all_results.append({'html': res['html']})
        return all_results

    @serving
@@ -138,6 +138,6 @@ class TableSystem(hub.Module):
 if __name__ == '__main__':
    table_system = TableSystem()
    table_system._initialize()
-    image_path = ['./doc/table/table.jpg']
+    image_path = ['./ppstructure/docs/table/table.jpg']
    res = table_system.predict(paths=image_path)
    print(res)
--- a/deploy/lite/config.txt
+++ b/deploy/lite/config.txt
@@ -5,4 +5,4 @@ det_db_unclip_ratio  1.6
 det_db_use_dilate 0
 det_use_polygon_score 1
 use_direction_classify  1
-rec_image_height  32
\ No newline at end of file
+rec_image_height  48
\ No newline at end of file
--- a/deploy/lite/readme.md
+++ b/deploy/lite/readme.md
@@ -99,6 +99,8 @@ The following table also provides a series of models that can be deployed on mob

 |Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch|
 |---|---|---|---|---|---|---|
+|PP-OCRv3|extra-lightweight chinese OCR optimized model|16.2M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.nb)|v2.10|
+|PP-OCRv3(slim)|extra-lightweight chinese OCR optimized model|5.9M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb)|v2.10|
 |PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
 |PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.6M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|

@@ -134,17 +136,16 @@ Introduction to paddle_lite_opt parameters:
 The following takes the ultra-lightweight Chinese model of PaddleOCR as an example to introduce the use of the compiled opt file to complete the conversion of the inference model to the Paddle-Lite optimized model

 ```
-# 【[Recommendation] Download the Chinese and English inference model of PP-OCRv2
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf  ch_PP-OCRv2_det_slim_quant_infer.tar
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf  ch_PP-OCRv2_rec_slim_quant_infer.tar
+# 【[Recommendation] Download the Chinese and English inference model of PP-OCRv3
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf  ch_PP-OCRv3_det_slim_infer.tar
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf  ch_PP-OCRv2_rec_slim_quant_infer.tar
 wget  https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf  ch_ppocr_mobile_v2.0_cls_slim_infer.tar
 # Convert detection model
-./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel  --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_det_slim_infer/inference.pdmodel  --param_file=./ch_PP-OCRv3_det_slim_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv3_det_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
 # Convert recognition model
-./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel  --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdmodel  --param_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv3_rec_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
 # Convert angle classifier model
-./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel  --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams  --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
-
+paddle_lite_opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel  --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams  --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
 ```

 After the conversion is successful, there will be more files ending with `.nb` in the inference model directory, which is the successfully converted model file.
@@ -197,15 +198,15 @@ Some preparatory work is required first.
 cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/
 ```

-Prepare the test image, taking PaddleOCR/doc/imgs/11.jpg as an example, copy the image file to the demo/cxx/ocr/debug/ folder. Prepare the model files optimized by the lite opt tool, ch_det_mv3_db_opt.nb, ch_rec_mv3_crnn_opt.nb, and place them under the demo/cxx/ocr/debug/ folder.
+Prepare the test image, taking PaddleOCR/doc/imgs/11.jpg as an example, copy the image file to the demo/cxx/ocr/debug/ folder. Prepare the model files optimized by the lite opt tool, ch_PP-OCRv3_det_slim_opt.nb , ch_PP-OCRv3_rec_slim_opt.nb , and place them under the demo/cxx/ocr/debug/ folder.

 The structure of the OCR demo is as follows after the above command is executed:

 ```
 demo/cxx/ocr/
 |-- debug/  
-|   |--ch_PP-OCRv2_det_slim_opt.nb           Detection model
-|   |--ch_PP-OCRv2_rec_slim_opt.nb           Recognition model
+|   |--ch_PP-OCRv3_det_slim_opt.nb           Detection model
+|   |--ch_PP-OCRv3_rec_slim_opt.nb           Recognition model
 |   |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb           Text direction classification model
 |   |--11.jpg                           Image for OCR
 |   |--ppocr_keys_v1.txt                Dictionary file
@@ -240,7 +241,7 @@ det_db_thresh  0.3        # Used to filter the binarized image of DB prediction,
 det_db_box_thresh  0.5    # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate
 det_db_unclip_ratio  1.6  # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text
 use_direction_classify  0  # Whether to use the direction classifier, 0 means not to use, 1 means to use
-rec_image_height  32      # The height of the input image of the recognition model, the PP-OCRv3 model needs to be set to 48, and the PP-OCRv2 model needs to be set to 32
+rec_image_height  48      # The height of the input image of the recognition model, the PP-OCRv3 model needs to be set to 48, and the PP-OCRv2 model needs to be set to 32
 ```

 5. Run Model on phone
@@ -260,14 +261,14 @@ After the above steps are completed, you can use adb to push the file to the pho
 export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
 # The use of ocr_db_crnn is:
 # ./ocr_db_crnn Mode Detection model file Orientation classifier model file Recognition model file  Hardware  Precision  Threads Batchsize  Test image path Dictionary file path
- ./ocr_db_crnn system ch_PP-OCRv2_det_slim_opt.nb  ch_PP-OCRv2_rec_slim_opt.nb  ch_ppocr_mobile_v2.0_cls_slim_opt.nb  arm8 INT8 10 1  ./11.jpg  config.txt  ppocr_keys_v1.txt  True
+ ./ocr_db_crnn system ch_PP-OCRv3_det_slim_opt.nb  ch_PP-OCRv3_rec_slim_opt.nb  ch_ppocr_mobile_v2.0_cls_slim_opt.nb  arm8 INT8 10 1  ./11.jpg  config.txt  ppocr_keys_v1.txt  True
 # precision can be INT8 for quantitative model or FP32 for normal model.

 # Only using detection model
-./ocr_db_crnn  det ch_PP-OCRv2_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg  config.txt
+./ocr_db_crnn  det ch_PP-OCRv3_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg  config.txt

 # Only using recognition model
-./ocr_db_crnn  rec ch_PP-OCRv2_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
+./ocr_db_crnn  rec ch_PP-OCRv3_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
 ```

 If you modify the code, you need to recompile and push to the phone.

--- a/deploy/lite/readme_ch.md
+++ b/deploy/lite/readme_ch.md
@@ -97,6 +97,8 @@ Paddle-Lite 提供了多种策略来自动优化原始的模型，其中包括

 |模型版本|模型简介|模型大小|检测模型|文本方向分类模型|识别模型|Paddle-Lite版本|
 |---|---|---|---|---|---|---|
+|PP-OCRv3|蒸馏版超轻量中文OCR移动端模型|16.2M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.nb)|v2.10|
+|PP-OCRv3(slim)|蒸馏版超轻量中文OCR移动端模型|5.9M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb)|v2.10|
 |PP-OCRv2|蒸馏版超轻量中文OCR移动端模型|11M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
 |PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.6M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|

@@ -131,16 +133,16 @@ paddle_lite_opt 参数介绍：
 下面以PaddleOCR的超轻量中文模型为例，介绍使用编译好的opt文件完成inference模型到Paddle-Lite优化模型的转换。

 ```
-# 【推荐】 下载 PP-OCRv2版本的中英文 inference模型
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf  ch_PP-OCRv2_det_slim_quant_infer.tar
-wget  https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf  ch_PP-OCRv2_rec_slim_quant_infer.tar
+# 【推荐】 下载 PP-OCRv3版本的中英文 inference模型
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf  ch_PP-OCRv3_det_slim_infer.tar
+wget  https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf  ch_PP-OCRv2_rec_slim_quant_infer.tar
 wget  https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf  ch_ppocr_mobile_v2.0_cls_slim_infer.tar
 # 转换检测模型
-./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel  --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_det_slim_infer/inference.pdmodel  --param_file=./ch_PP-OCRv3_det_slim_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv3_det_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
 # 转换识别模型
-./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel  --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdmodel  --param_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdiparams  --optimize_out=./ch_PP-OCRv3_rec_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
 # 转换方向分类器模型
-./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel  --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams  --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel  --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams  --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm  --optimize_out_type=naive_buffer

 ```

@@ -194,15 +196,15 @@ wget  https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls
 ```

 准备测试图像，以`PaddleOCR/doc/imgs/11.jpg`为例，将测试的图像复制到`demo/cxx/ocr/debug/`文件夹下。
- 准备lite opt工具优化后的模型文件，比如使用`ch_PP-OCRv2_det_slim_opt.ch_PP-OCRv2_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`，模型文件放置在`demo/cxx/ocr/debug/`文件夹下。
+ 准备lite opt工具优化后的模型文件，比如使用`ch_PP-OCRv3_det_slim_opt.ch_PP-OCRv3_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`，模型文件放置在`demo/cxx/ocr/debug/`文件夹下。

 执行完成后，ocr文件夹下将有如下文件格式：

 ```
 demo/cxx/ocr/
 |-- debug/  
-|   |--ch_PP-OCRv2_det_slim_opt.nb           优化后的检测模型文件
-|   |--ch_PP-OCRv2_rec_slim_opt.nb           优化后的识别模型文件
+|   |--ch_PP-OCRv3_det_slim_opt.nb           优化后的检测模型文件
+|   |--ch_PP-OCRv3_rec_slim_opt.nb           优化后的识别模型文件
 |   |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb           优化后的文字方向分类器模型文件
 |   |--11.jpg                           待测试图像
 |   |--ppocr_keys_v1.txt                中文字典文件
@@ -239,7 +241,7 @@ det_db_thresh  0.3        # 用于过滤DB预测的二值化图像，设置为0.
 det_db_box_thresh  0.5    # 检测器后处理过滤box的阈值，如果检测存在漏框情况，可酌情减小
 det_db_unclip_ratio  1.6  # 表示文本框的紧致程度，越小则文本框更靠近文本
 use_direction_classify  0  # 是否使用方向分类器，0表示不使用，1表示使用
-rec_image_height  32      # 识别模型输入图像的高度，PP-OCRv3模型设置为48，PP-OCRv2模型需要设置为32
+rec_image_height  48      # 识别模型输入图像的高度，PP-OCRv3模型设置为48，PP-OCRv2模型需要设置为32
 ```

 5. 启动调试
@@ -259,13 +261,13 @@ rec_image_height  32      # 识别模型输入图像的高度，PP-OCRv3模型
 export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
 # 开始使用，ocr_db_crnn可执行文件的使用方式为:
 # ./ocr_db_crnn 预测模式  检测模型文件 方向分类器模型文件  识别模型文件 运行硬件 运行精度 线程数  batchsize  测试图像路径  参数配置路径  字典文件路径 是否使用benchmark参数
- ./ocr_db_crnn system  ch_PP-OCRv2_det_slim_opt.nb  ch_PP-OCRv2_rec_slim_opt.nb  ch_ppocr_mobile_v2.0_cls_slim_opt.nb  arm8 INT8 10 1  ./11.jpg  config.txt  ppocr_keys_v1.txt  True
+ ./ocr_db_crnn system  ch_PP-OCRv3_det_slim_opt.nb  ch_PP-OCRv3_rec_slim_opt.nb  ch_ppocr_mobile_v2.0_cls_slim_opt.nb  arm8 INT8 10 1  ./11.jpg  config.txt  ppocr_keys_v1.txt  True

 # 仅使用文本检测模型，使用方式如下：
-./ocr_db_crnn  det ch_PP-OCRv2_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg  config.txt
+./ocr_db_crnn  det ch_PP-OCRv3_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg  config.txt

 # 仅使用文本识别模型，使用方式如下：
-./ocr_db_crnn  rec ch_PP-OCRv2_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
+./ocr_db_crnn  rec ch_PP-OCRv3_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
 ```

 如果对代码做了修改，则需要重新编译并push到手机上。

--- a/deploy/slim/quantization/README.md
+++ b/deploy/slim/quantization/README.md
@@ -22,7 +22,7 @@
 ### 1. 安装PaddleSlim

 ```bash
-pip3 install paddleslim==2.2.2
+pip3 install paddleslim==2.3.2
 ```

 ### 2. 准备训练好的模型
@@ -33,17 +33,7 @@ PaddleOCR提供了一系列训练好的[模型](../../../doc/doc_ch/models_list.
 量化训练包括离线量化训练和在线量化训练，在线量化训练效果更好，需加载预训练模型，在定义好量化策略后即可对模型进行量化。


-量化训练的代码位于slim/quantization/quant.py 中，比如训练检测模型，训练指令如下：
-```bash
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model='your trained model'   Global.save_model_dir=./output/quant_model
-
-# 比如下载提供的训练模型
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
-tar -xf ch_ppocr_mobile_v2.0_det_train.tar
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_ppocr_mobile_v2.0_det_train/best_accuracy   Global.save_model_dir=./output/quant_model
-```
-
-模型蒸馏和模型量化可以同时使用，以PPOCRv3检测模型为例：
+量化训练的代码位于slim/quantization/quant.py 中，比如训练检测模型，以PPOCRv3检测模型为例，训练指令如下：
 ```
 # 下载检测预训练模型：
 wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
@@ -58,7 +48,7 @@ python deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_
 在得到量化训练保存的模型后，我们可以将其导出为inference_model，用于预测部署：

 ```bash
-python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
+python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
 ```

 ### 5. 量化模型部署

--- a/deploy/slim/quantization/README_en.md
+++ b/deploy/slim/quantization/README_en.md
@@ -25,7 +25,7 @@ After training, if you want to further compress the model size and accelerate th
 ### 1. Install PaddleSlim

 ```bash
-pip3 install paddleslim==2.2.2
+pip3 install paddleslim==2.3.2
 ```


@@ -39,18 +39,7 @@ Quantization training includes offline quantization training and online quantiza
 Online quantization training is more effective. It is necessary to load the pre-trained model.
 After the quantization strategy is defined, the model can be quantified.

-The code for quantization training is located in `slim/quantization/quant.py`. For example, to train a detection model, the training instructions are as follows:
-```bash
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model='your trained model'   Global.save_model_dir=./output/quant_model
-
-# download provided model
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
-tar -xf ch_ppocr_mobile_v2.0_det_train.tar
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_ppocr_mobile_v2.0_det_train/best_accuracy   Global.save_model_dir=./output/quant_model
-```
-
-
-Model distillation and model quantization can be used at the same time, taking the PPOCRv3 detection model as an example:
+The code for quantization training is located in `slim/quantization/quant.py`. For example, the training instructions of slim PPOCRv3 detection model are as follows:
 ```
 # download provided model
 wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
@@ -66,11 +55,11 @@ If you want to quantify the text recognition model, you can modify the configura
 Once we got the model after pruning and fine-tuning, we can export it as an inference model for the deployment of predictive tasks:

 ```bash
-python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
+python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
 ```

 ### 5. Deploy
 The numerical range of the quantized model parameters derived from the above steps is still FP32, but the numerical range of the parameters is int8.
 The derived model can be converted through the `opt tool` of PaddleLite.

-For quantitative model deployment, please refer to [Mobile terminal model deployment](../../lite/readme_en.md)
+For quantitative model deployment, please refer to [Mobile terminal model deployment](../../lite/readme.md)
--- a/deploy/slim/quantization/export_model.py
+++ b/deploy/slim/quantization/export_model.py
@@ -151,17 +151,24 @@ def main():

    arch_config = config["Architecture"]

-    arch_config = config["Architecture"]
+    if arch_config["algorithm"] == "SVTR" and arch_config["Head"][
+            "name"] != 'MultiHead':
+        input_shape = config["Eval"]["dataset"]["transforms"][-2][
+            'SVTRRecResizeImg']['image_shape']
+    else:
+        input_shape = None

    if arch_config["algorithm"] in ["Distillation", ]:  # distillation model
        archs = list(arch_config["Models"].values())
        for idx, name in enumerate(model.model_name_list):
            sub_model_save_path = os.path.join(save_path, name, "inference")
            export_single_model(model.model_list[idx], archs[idx],
-                                sub_model_save_path, logger, quanter)
+                                sub_model_save_path, logger, input_shape,
+                                quanter)
    else:
        save_path = os.path.join(save_path, "inference")
-        export_single_model(model, arch_config, save_path, logger, quanter)
+        export_single_model(model, arch_config, save_path, logger, input_shape,
+                            quanter)


 if __name__ == "__main__":

--- a/deploy/slim/quantization/quant.py
+++ b/deploy/slim/quantization/quant.py
@@ -158,8 +158,7 @@ def main(config, device, logger, vdl_writer):

    pre_best_model_dict = dict()
    # load fp32 model to begin quantization
-    if config["Global"]["pretrained_model"] is not None:
-        pre_best_model_dict = load_model(config, model)
+    pre_best_model_dict = load_model(config, model, None, config['Architecture']["model_type"])

    freeze_params = False
    if config['Architecture']["algorithm"] in ["Distillation"]:
@@ -184,8 +183,7 @@ def main(config, device, logger, vdl_writer):
        model=model)

    # resume PACT training process
-    if config["Global"]["checkpoints"] is not None:
-        pre_best_model_dict = load_model(config, model, optimizer)
+    pre_best_model_dict = load_model(config, model, optimizer, config['Architecture']["model_type"])

    # build metric
    eval_class = build_metric(config['Metric'])

--- a/deploy/slim/quantization/quant_kl.py
+++ b/deploy/slim/quantization/quant_kl.py
@@ -97,6 +97,17 @@ def sample_generator(loader):

    return __reader__

+def sample_generator_layoutxlm_ser(loader):
+    def __reader__():
+        for indx, data in enumerate(loader):
+            input_ids = np.array(data[0])
+            bbox = np.array(data[1])
+            attention_mask = np.array(data[2])
+            token_type_ids = np.array(data[3])
+            images = np.array(data[4])
+            yield [input_ids, bbox, attention_mask, token_type_ids, images]
+
+    return __reader__

 def main(config, device, logger, vdl_writer):
    # init dist environment
@@ -107,16 +118,18 @@ def main(config, device, logger, vdl_writer):

    # build dataloader
    config['Train']['loader']['num_workers'] = 0
+    is_layoutxlm_ser =  config['Architecture']['model_type'] =='kie' and config['Architecture']['Backbone']['name'] == 'LayoutXLMForSer'
    train_dataloader = build_dataloader(config, 'Train', device, logger)
    if config['Eval']:
        config['Eval']['loader']['num_workers'] = 0
        valid_dataloader = build_dataloader(config, 'Eval', device, logger)
+        if is_layoutxlm_ser:
+            train_dataloader = valid_dataloader
    else:
        valid_dataloader = None

    paddle.enable_static()
-    place = paddle.CPUPlace()
-    exe = paddle.static.Executor(place)
+    exe = paddle.static.Executor(device)

    if 'inference_model' in global_config.keys():  # , 'inference_model'):
        inference_model_dir = global_config['inference_model']
@@ -127,6 +140,11 @@ def main(config, device, logger, vdl_writer):
            raise ValueError(
                "Please set inference model dir in Global.inference_model or Global.pretrained_model for post-quantazition"
            )
+    
+    if is_layoutxlm_ser:
+        generator = sample_generator_layoutxlm_ser(train_dataloader)
+    else:
+        generator = sample_generator(train_dataloader)

    paddleslim.quant.quant_post_static(
        executor=exe,
@@ -134,7 +152,7 @@ def main(config, device, logger, vdl_writer):
        model_filename='inference.pdmodel',
        params_filename='inference.pdiparams',
        quantize_model_path=global_config['save_inference_dir'],
-        sample_generator=sample_generator(train_dataloader),
+        sample_generator=generator,
        save_model_filename='inference.pdmodel',
        save_params_filename='inference.pdiparams',
        batch_size=1,

--- a/doc/datasets/wildreceipt_demo/1bbe854b8817dedb8585e0732089fd1f752d2cec.jpeg
+++ b/doc/datasets/wildreceipt_demo/1bbe854b8817dedb8585e0732089fd1f752d2cec.jpeg
--- a/doc/datasets/wildreceipt_demo/2769.jpeg
+++ b/doc/datasets/wildreceipt_demo/2769.jpeg
--- a/doc/doc_ch/algorithm.md
+++ b/doc/doc_ch/algorithm.md
-# 前沿算法与模型
-
-PaddleOCR将**持续新增**支持OCR领域前沿算法与模型，已支持的模型与使用教程可点击下方列表查看：
-
- [文本检测算法](./algorithm_overview.md#11-%E6%96%87%E6%9C%AC%E6%A3%80%E6%B5%8B%E7%AE%97%E6%B3%95)
- [文本识别算法](./algorithm_overview.md#12-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
- [端到端算法](./algorithm_overview.md#2-%E6%96%87%E6%9C%AC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
- [表格识别]](./algorithm_overview.md#3-%E8%A1%A8%E6%A0%BC%E8%AF%86%E5%88%AB%E7%AE%97%E6%B3%95)
-
-**欢迎广大开发者合作共建，贡献更多算法，合入有奖🎁！具体可查看[社区常规赛](https://github.com/PaddlePaddle/PaddleOCR/issues/4982)。**
-
-新增算法可参考如下教程：
-
- [使用PaddleOCR架构添加新算法](./add_new_algorithm.md)
--- a/doc/doc_ch/algorithm_det_ct.md
+++ b/doc/doc_ch/algorithm_det_ct.md
+# CT
+
+- [1. 算法简介](#1)
+- [2. 环境配置](#2)
+- [3. 模型训练、评估、预测](#3)
+    - [3.1 训练](#3-1)
+    - [3.2 评估](#3-2)
+    - [3.3 预测](#3-3)
+- [4. 推理部署](#4)
+    - [4.1 Python推理](#4-1)
+    - [4.2 C++推理](#4-2)
+    - [4.3 Serving服务化部署](#4-3)
+    - [4.4 更多推理部署](#4-4)
+- [5. FAQ](#5)
+
+<a name="1"></a>
+## 1. 算法简介
+
+论文信息：
+> [CentripetalText: An Efficient Text Instance Representation for Scene Text Detection](https://arxiv.org/abs/2107.05945)
+> Tao Sheng, Jie Chen, Zhouhui Lian
+> NeurIPS, 2021
+
+
+在Total-Text文本检测公开数据集上，算法复现效果如下：
+
+|模型|骨干网络|配置文件|precision|recall|Hmean|下载链接|
+| --- | --- | --- | --- | --- | --- | --- |
+|CT|ResNet18_vd|[configs/det/det_r18_vd_ct.yml](../../configs/det/det_r18_vd_ct.yml)|88.68%|81.70%|85.05%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r18_ct_train.tar)|
+
+
+<a name="2"></a>
+## 2. 环境配置
+请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境，参考[《项目克隆》](./clone.md)克隆项目代码。
+
+
+<a name="3"></a>
+## 3. 模型训练、评估、预测
+
+CT模型使用Total-Text文本检测公开数据集训练得到，数据集下载可参考 [Total-Text-Dataset](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset), 我们将标签文件转成了paddleocr格式，转换好的标签文件下载参考[train.txt](https://paddleocr.bj.bcebos.com/dataset/ct_tipc/train.txt), [text.txt](https://paddleocr.bj.bcebos.com/dataset/ct_tipc/test.txt)。
+
+请参考[文本检测训练教程](./detection.md)。PaddleOCR对代码进行了模块化，训练不同的检测模型只需要**更换配置文件**即可。
+
+
+<a name="4"></a>
+## 4. 推理部署
+
+<a name="4-1"></a>
+### 4.1 Python推理
+首先将CT文本检测训练过程中保存的模型，转换成inference model。以基于Resnet18_vd骨干网络，在Total-Text英文数据集训练的模型为例（ [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r18_ct_train.tar) )，可以使用如下命令进行转换：
+
+```shell
+python3 tools/export_model.py -c configs/det/det_r18_vd_ct.yml -o Global.pretrained_model=./det_r18_ct_train/best_accuracy  Global.save_inference_dir=./inference/det_ct
+```
+
+CT文本检测模型推理，可以执行如下命令：
+
+```shell
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs_en/img623.jpg" --det_model_dir="./inference/det_ct/" --det_algorithm="CT"
+```
+
+可视化文本检测结果默认保存到`./inference_results`文件夹里面，结果文件的名称前缀为'det_res'。结果示例如下：
+
+![](../imgs_results/det_res_img623_ct.jpg)
+
+
+<a name="4-2"></a>
+### 4.2 C++推理
+
+暂不支持
+
+<a name="4-3"></a>
+### 4.3 Serving服务化部署
+
+暂不支持
+
+<a name="4-4"></a>
+### 4.4 更多推理部署
+
+暂不支持
+
+<a name="5"></a>
+## 5. FAQ
+
+
+## 引用
+
+```bibtex
+@inproceedings{sheng2021centripetaltext,
+    title={CentripetalText: An Efficient Text Instance Representation for Scene Text Detection},
+    author={Tao Sheng and Jie Chen and Zhouhui Lian},
+    booktitle={Thirty-Fifth Conference on Neural Information Processing Systems},
+    year={2021}
+}
+```
--- a/doc/doc_ch/algorithm_kie_layoutxlm.md
+++ b/doc/doc_ch/algorithm_kie_layoutxlm.md
+# 关键信息抽取算法-LayoutXLM
+
+- [1. 算法简介](#1-算法简介)
+- [2. 环境配置](#2-环境配置)
+- [3. 模型训练、评估、预测](#3-模型训练评估预测)
+- [4. 推理部署](#4-推理部署)
+  - [4.1 Python推理](#41-python推理)
+  - [4.2 C++推理部署](#42-推理部署)
+  - [4.3 Serving服务化部署](#43-serving服务化部署)
+  - [4.4 更多推理部署](#44-更多推理部署)
+- [5. FAQ](#5-faq)
+- [引用](#引用)
+
+
+<a name="1"></a>
+
+## 1. 算法简介
+
+
+论文信息：
+
+> [LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding](https://arxiv.org/abs/2104.08836)
+>
+> Yiheng Xu, Tengchao Lv, Lei Cui, Guoxin Wang, Yijuan Lu, Dinei Florencio, Cha Zhang, Furu Wei
+>
+> 2021
+
+在XFUND_zh数据集上，算法复现效果如下：
+
+|模型|骨干网络|任务|配置文件|hmean|下载链接|
+| --- | --- |--|--- | --- | --- |
+|LayoutXLM|LayoutXLM-base|SER |[ser_layoutxlm_xfund_zh.yml](../../configs/kie/layoutlm_series/ser_layoutxlm_xfund_zh.yml)|90.38%|[训练模型](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh.tar)/[推理模型](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh_infer.tar)|
+|LayoutXLM|LayoutXLM-base|RE | [re_layoutxlm_xfund_zh.yml](../../configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml)|74.83%|[训练模型](https://paddleocr.bj.bcebos.com/pplayout/re_LayoutXLM_xfun_zh.tar)/[推理模型(coming soon)]()|
+
+<a name="2"></a>
+
+## 2. 环境配置
+请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境，参考[《项目克隆》](./clone.md)克隆项目代码。
+
+
+<a name="3"></a>
+
+## 3. 模型训练、评估、预测
+
+请参考[关键信息抽取教程](./kie.md)。PaddleOCR对代码进行了模块化，训练不同的关键信息抽取模型只需要**更换配置文件**即可。
+
+
+<a name="4"></a>
+## 4. 推理部署
+
+<a name="4-1"></a>
+
+### 4.1 Python推理
+
+**注：** 目前RE任务推理过程仍在适配中，下面以SER任务为例，介绍基于LayoutXLM模型的关键信息抽取过程。
+
+首先将训练得到的模型转换成inference model。LayoutXLM模型在XFUND_zh数据集上训练的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh.tar)），可以使用下面的命令进行转换。
+
+``` bash
+wget https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh.tar
+tar -xf ser_LayoutXLM_xfun_zh.tar
+python3 tools/export_model.py -c configs/kie/layoutlm_series/ser_layoutxlm_xfund_zh.yml -o Architecture.Backbone.checkpoints=./ser_LayoutXLM_xfun_zh/best_accuracy Global.save_inference_dir=./inference/ser_layoutxlm
+```
+
+LayoutXLM模型基于SER任务进行推理，可以执行如下命令：
+
+```bash
+cd ppstructure
+python3 kie/predict_kie_token_ser.py \
+  --kie_algorithm=LayoutXLM \
+  --ser_model_dir=../inference/ser_layoutxlm_infer \
+  --image_dir=./docs/kie/input/zh_val_42.jpg \
+  --ser_dict_path=../train_data/XFUND/class_list_xfun.txt \
+  --vis_font_path=../doc/fonts/simfang.ttf
+```
+
+SER可视化结果默认保存到`./output`文件夹里面，结果示例如下：
+
+<div align="center">
+    <img src="../../ppstructure/docs/kie/result_ser/zh_val_42_ser.jpg" width="800">
+</div>
+
+
+<a name="4-2"></a>
+### 4.2 C++推理部署
+
+暂不支持
+
+<a name="4-3"></a>
+### 4.3 Serving服务化部署
+
+暂不支持
+
+<a name="4-4"></a>
+### 4.4 更多推理部署
+
+暂不支持
+
+<a name="5"></a>
+
+## 5. FAQ
+
+## 引用
+
+
+```bibtex
+@article{DBLP:journals/corr/abs-2104-08836,
+  author    = {Yiheng Xu and
+               Tengchao Lv and
+               Lei Cui and
+               Guoxin Wang and
+               Yijuan Lu and
+               Dinei Flor{\^{e}}ncio and
+               Cha Zhang and
+               Furu Wei},
+  title     = {LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich
+               Document Understanding},
+  journal   = {CoRR},
+  volume    = {abs/2104.08836},
+  year      = {2021},
+  url       = {https://arxiv.org/abs/2104.08836},
+  eprinttype = {arXiv},
+  eprint    = {2104.08836},
+  timestamp = {Thu, 14 Oct 2021 09:17:23 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/abs-2104-08836.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@article{DBLP:journals/corr/abs-1912-13318,
+  author    = {Yiheng Xu and
+               Minghao Li and
+               Lei Cui and
+               Shaohan Huang and
+               Furu Wei and
+               Ming Zhou},
+  title     = {LayoutLM: Pre-training of Text and Layout for Document Image Understanding},
+  journal   = {CoRR},
+  volume    = {abs/1912.13318},
+  year      = {2019},
+  url       = {http://arxiv.org/abs/1912.13318},
+  eprinttype = {arXiv},
+  eprint    = {1912.13318},
+  timestamp = {Mon, 01 Jun 2020 16:20:46 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/abs-1912-13318.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@article{DBLP:journals/corr/abs-2012-14740,
+  author    = {Yang Xu and
+               Yiheng Xu and
+               Tengchao Lv and
+               Lei Cui and
+               Furu Wei and
+               Guoxin Wang and
+               Yijuan Lu and
+               Dinei A. F. Flor{\^{e}}ncio and
+               Cha Zhang and
+               Wanxiang Che and
+               Min Zhang and
+               Lidong Zhou},
+  title     = {LayoutLMv2: Multi-modal Pre-training for Visually-Rich Document Understanding},
+  journal   = {CoRR},
+  volume    = {abs/2012.14740},
+  year      = {2020},
+  url       = {https://arxiv.org/abs/2012.14740},
+  eprinttype = {arXiv},
+  eprint    = {2012.14740},
+  timestamp = {Tue, 27 Jul 2021 09:53:52 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/abs-2012-14740.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+```
--- a/doc/doc_ch/algorithm_kie_sdmgr.md
+++ b/doc/doc_ch/algorithm_kie_sdmgr.md
+
+# 关键信息抽取算法-SDMGR
+
+- [1. 算法简介](#1-算法简介)
+- [2. 环境配置](#2-环境配置)
+- [3. 模型训练、评估、预测](#3-模型训练评估预测)
+  - [3.1 模型训练](#31-模型训练)
+  - [3.2 模型评估](#32-模型评估)
+  - [3.3 模型预测](#33-模型预测)
+- [4. 推理部署](#4-推理部署)
+  - [4.1 Python推理](#41-python推理)
+  - [4.2 C++推理部署](#42-c推理部署)
+  - [4.3 Serving服务化部署](#43-serving服务化部署)
+  - [4.4 更多推理部署](#44-更多推理部署)
+- [5. FAQ](#5-faq)
+- [引用](#引用)
+
+
+<a name="1"></a>
+
+## 1. 算法简介
+
+
+论文信息：
+
+> [Spatial Dual-Modality Graph Reasoning for Key Information Extraction](https://arxiv.org/abs/2103.14470)
+>
+> Hongbin Sun and Zhanghui Kuang and Xiaoyu Yue and Chenhao Lin and Wayne Zhang
+>
+> 2021
+
+在wildreceipt发票公开数据集上，算法复现效果如下：
+
+|模型|骨干网络|配置文件|hmean|下载链接|
+| --- | --- | --- | --- | --- |
+|SDMGR|VGG6|[configs/kie/sdmgr/kie_unet_sdmgr.yml](../../configs/kie/sdmgr/kie_unet_sdmgr.yml)|86.7%|[训练模型]( https://paddleocr.bj.bcebos.com/dygraph_v2.1/kie/kie_vgg16.tar)/[推理模型(coming soon)]()|
+
+
+<a name="2"></a>
+
+## 2. 环境配置
+请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境，参考[《项目克隆》](./clone.md)克隆项目代码。
+
+
+<a name="3"></a>
+
+## 3. 模型训练、评估、预测
+
+SDMGR是一个关键信息提取算法，将每个检测到的文本区域分类为预定义的类别，如订单ID、发票号码，金额等。
+
+训练和测试的数据采用wildreceipt数据集，通过如下指令下载数据集：
+
+```bash
+wget https://paddleocr.bj.bcebos.com/ppstructure/dataset/wildreceipt.tar && tar xf wildreceipt.tar
+```
+
+创建数据集软链到PaddleOCR/train_data目录下：
+```
+cd PaddleOCR/ && mkdir train_data && cd train_data
+
+ln -s ../../wildreceipt ./
+```
+
+
+### 3.1 模型训练
+
+训练采用的配置文件是`configs/kie/sdmgr/kie_unet_sdmgr.yml`，配置文件中默认训练数据路径是`train_data/wildreceipt`，准备好数据后，可以通过如下指令执行训练：
+
+```
+python3 tools/train.py -c configs/kie/sdmgr/kie_unet_sdmgr.yml -o Global.save_model_dir=./output/kie/
+```
+
+### 3.2 模型评估
+
+执行下面的命令进行模型评估
+
+```bash
+python3 tools/eval.py -c configs/kie/sdmgr/kie_unet_sdmgr.yml -o Global.checkpoints=./output/kie/best_accuracy
+```
+
+输出信息示例如下所示。
+
+```py
+[2022/08/10 05:22:23] ppocr INFO: metric eval ***************
+[2022/08/10 05:22:23] ppocr INFO: hmean:0.8670120239257812
+[2022/08/10 05:22:23] ppocr INFO: fps:10.18816520530961
+```
+
+### 3.3 模型预测
+
+执行下面的命令进行模型预测，预测的时候需要预先加载存储图片路径以及OCR信息的文本文件，使用`Global.infer_img`进行指定。
+
+```bash
+python3 tools/infer_kie.py -c configs/kie/kie_unet_sdmgr.yml -o Global.checkpoints=kie_vgg16/best_accuracy  Global.infer_img=./train_data/wildreceipt/1.txt
+```
+
+执行预测后的结果保存在`./output/sdmgr_kie/predicts_kie.txt`文件中，可视化结果保存在`/output/sdmgr_kie/kie_results/`目录下。
+
+可视化结果如下图所示：
+
+<div align="center">
+    <img src="../../ppstructure/docs/imgs/sdmgr_result.png" width="800">
+</div>
+
+<a name="4"></a>
+## 4. 推理部署
+
+<a name="4-1"></a>
+### 4.1 Python推理
+
+暂不支持
+
+<a name="4-2"></a>
+### 4.2 C++推理部署
+
+暂不支持
+
+<a name="4-3"></a>
+### 4.3 Serving服务化部署
+
+暂不支持
+
+<a name="4-4"></a>
+### 4.4 更多推理部署
+
+暂不支持
+
+<a name="5"></a>
+
+## 5. FAQ
+
+## 引用
+
+
+```bibtex
+@misc{sun2021spatial,
+      title={Spatial Dual-Modality Graph Reasoning for Key Information Extraction},
+      author={Hongbin Sun and Zhanghui Kuang and Xiaoyu Yue and Chenhao Lin and Wayne Zhang},
+      year={2021},
+      eprint={2103.14470},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```
--- a/doc/doc_ch/algorithm_kie_vi_layoutxlm.md
+++ b/doc/doc_ch/algorithm_kie_vi_layoutxlm.md
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
--- a/doc/doc_ch/algorithm_rec_robustscanner.md
+++ b/doc/doc_ch/algorithm_rec_robustscanner.md
--- a/doc/doc_ch/algorithm_rec_sar.md
+++ b/doc/doc_ch/algorithm_rec_sar.md
@@ -79,7 +79,7 @@ python3 tools/export_model.py -c configs/rec/rec_r31_sar.yml -o Global.pretraine
 SAR文本识别模型推理，可以执行如下命令：

 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/en/word_1.png" --rec_model_dir="./inference/rec_sar/" --rec_image_shape="3, 48, 48, 160" --rec_char_type="ch" --rec_algorithm="SAR" --rec_char_dict_path="ppocr/utils/dict90.txt" --max_text_length=30 --use_space_char=False
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/en/word_1.png" --rec_model_dir="./inference/rec_sar/" --rec_image_shape="3, 48, 48, 160" --rec_algorithm="SAR" --rec_char_dict_path="ppocr/utils/dict90.txt" --max_text_length=30 --use_space_char=False
 ```

 <a name="4-2"></a>

--- a/doc/doc_ch/algorithm_rec_srn.md
+++ b/doc/doc_ch/algorithm_rec_srn.md
@@ -78,7 +78,7 @@ python3 tools/export_model.py -c configs/rec/rec_r50_fpn_srn.yml -o Global.pretr
 SRN文本识别模型推理，可以执行如下命令：

 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/en/word_1.png" --rec_model_dir="./inference/rec_srn/" --rec_image_shape="1,64,256" --rec_char_type="ch" --rec_algorithm="SRN" --rec_char_dict_path=./ppocr/utils/ic15_dict.txt  --use_space_char=False
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/en/word_1.png" --rec_model_dir="./inference/rec_srn/" --rec_image_shape="1,64,256"  --rec_algorithm="SRN" --rec_char_dict_path=./ppocr/utils/ic15_dict.txt  --use_space_char=False
 ```

 <a name="4-2"></a>

--- a/doc/doc_ch/algorithm_rec_visionlan.md
+++ b/doc/doc_ch/algorithm_rec_visionlan.md
@@ -101,7 +101,7 @@ python3 tools/export_model.py -c configs/rec/rec_r45_visionlan.yml -o Global.pre
 执行如下命令进行模型推理：

 ```shell
-python3 tools/infer/predict_rec.py --image_dir='./doc/imgs_words/en/word_2.png' --rec_model_dir='./inference/rec_r45_visionlan/' --rec_algorithm='VisionLAN' --rec_image_shape='3,64,256' --rec_char_dict_path='./ppocr/utils/dict36.txt'
+python3 tools/infer/predict_rec.py --image_dir='./doc/imgs_words/en/word_2.png' --rec_model_dir='./inference/rec_r45_visionlan/' --rec_algorithm='VisionLAN' --rec_image_shape='3,64,256' --rec_char_dict_path='./ppocr/utils/ic15_dict.txt' --use_space_char=False
 # 预测文件夹下所有图像时，可修改image_dir为文件夹，如 --image_dir='./doc/imgs_words_en/'。
 ```

@@ -110,7 +110,7 @@ python3 tools/infer/predict_rec.py --image_dir='./doc/imgs_words/en/word_2.png'
 执行命令后，上面图像的预测结果（识别的文本和得分）会打印到屏幕上，示例如下：
 结果如下：
 ```shell
-Predicts of ./doc/imgs_words/en/word_2.png:('yourself', 0.97076982)
+Predicts of ./doc/imgs_words/en/word_2.png:('yourself', 0.9999493)
 ```

 **注意**：

--- a/doc/doc_ch/algorithm_sr_gestalt.md
+++ b/doc/doc_ch/algorithm_sr_gestalt.md
--- a/doc/doc_ch/dataset/docvqa_datasets.md
+++ b/doc/doc_ch/dataset/docvqa_datasets.md
--- a/doc/doc_ch/dataset/table_datasets.md
+++ b/doc/doc_ch/dataset/table_datasets.md
@@ -3,6 +3,7 @@
 - [数据集汇总](#数据集汇总)
 - [1. PubTabNet数据集](#1-pubtabnet数据集)
 - [2. 好未来表格识别竞赛数据集](#2-好未来表格识别竞赛数据集)
+- [3. 好未来表格识别竞赛数据集](#2-WTW中文场景表格数据集)

 这里整理了常用表格识别数据集，持续更新中，欢迎各位小伙伴贡献数据集～

@@ -12,6 +13,7 @@
 |---|---|---|
 | PubTabNet |https://github.com/ibm-aur-nlp/PubTabNet| jsonl格式，可直接用[pubtab_dataset.py](../../../ppocr/data/pubtab_dataset.py)加载 |
 | 好未来表格识别竞赛数据集 |https://ai.100tal.com/dataset| jsonl格式，可直接用[pubtab_dataset.py](../../../ppocr/data/pubtab_dataset.py)加载 |
+| WTW中文场景表格数据集 |https://github.com/wangwen-whu/WTW-Dataset| 需要进行转换后才能用[pubtab_dataset.py](../../../ppocr/data/pubtab_dataset.py)加载 |

 ## 1. PubTabNet数据集
 - **数据简介**：PubTabNet数据集的训练集合中包含50万张图像，验证集合中包含0.9万张图像。部分图像可视化如下所示。
@@ -31,3 +33,12 @@
    <img src="../../datasets/table_tal_demo/1.jpg" width="500">
    <img src="../../datasets/table_tal_demo/2.jpg" width="500">
 </div>
+
+## 3. WTW中文场景表格数据集
+- **数据简介**：WTW中文场景表格数据集包含表格检测和表格数据两部分数据，数据集中同时包含扫描和拍照两张场景的图像。
+
+https://github.com/wangwen-whu/WTW-Dataset/blob/main/demo/20210816_210413.gif
+
+<div align="center">
+    <img src="https://github.com/wangwen-whu/WTW-Dataset/blob/main/demo/20210816_210413.gif" width="500">
+</div>
--- a/doc/doc_ch/inference_args.md
+++ b/doc/doc_ch/inference_args.md
--- a/doc/doc_ch/inference_ppocr.md
+++ b/doc/doc_ch/inference_ppocr.md
@@ -158,3 +158,5 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --de
 执行命令后，识别结果图像如下：

 ![](../imgs_results/system_res_00018069_v3.jpg)
+
+更多关于推理超参数的配置与解释，请参考：[模型推理超参数解释教程](./inference_args.md)。
--- a/doc/doc_ch/kie.md
+++ b/doc/doc_ch/kie.md
--- a/doc/doc_ch/table_recognition.md
+++ b/doc/doc_ch/table_recognition.md
--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
--- a/doc/doc_en/algorithm_det_ct_en.md
+++ b/doc/doc_en/algorithm_det_ct_en.md
--- a/doc/doc_en/algorithm_det_db_en.md
+++ b/doc/doc_en/algorithm_det_db_en.md
--- a/doc/doc_en/algorithm_en.md
+++ b/doc/doc_en/algorithm_en.md
--- a/doc/doc_en/algorithm_kie_layoutxlm_en.md
+++ b/doc/doc_en/algorithm_kie_layoutxlm_en.md
--- a/doc/doc_en/algorithm_kie_sdmgr_en.md
+++ b/doc/doc_en/algorithm_kie_sdmgr_en.md
--- a/doc/doc_en/algorithm_kie_vi_layoutxlm_en.md
+++ b/doc/doc_en/algorithm_kie_vi_layoutxlm_en.md
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
--- a/doc/doc_en/algorithm_rec_robustscanner_en.md
+++ b/doc/doc_en/algorithm_rec_robustscanner_en.md
--- a/doc/doc_en/algorithm_rec_sar_en.md
+++ b/doc/doc_en/algorithm_rec_sar_en.md
--- a/doc/doc_en/algorithm_rec_visionlan_en.md
+++ b/doc/doc_en/algorithm_rec_visionlan_en.md
--- a/ppstructure/docs/kie_en.md
+++ b/ppstructure/docs/kie_en.md
--- a/doc/doc_en/algorithm_sr_gestalt_en.md
+++ b/doc/doc_en/algorithm_sr_gestalt_en.md
--- a/doc/doc_en/dataset/docvqa_datasets_en.md
+++ b/doc/doc_en/dataset/docvqa_datasets_en.md
--- a/doc/doc_en/dataset/layout_datasets_en.md
+++ b/doc/doc_en/dataset/layout_datasets_en.md
--- a/doc/doc_en/dataset/table_datasets_en.md
+++ b/doc/doc_en/dataset/table_datasets_en.md
--- a/doc/doc_en/inference_args_en.md
+++ b/doc/doc_en/inference_args_en.md
--- a/doc/doc_en/inference_ppocr_en.md
+++ b/doc/doc_en/inference_ppocr_en.md
--- a/doc/doc_en/kie_en.md
+++ b/doc/doc_en/kie_en.md
--- a/doc/doc_en/quickstart_en.md
+++ b/doc/doc_en/quickstart_en.md
--- a/doc/doc_en/table_recognition_en.md
+++ b/doc/doc_en/table_recognition_en.md
--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
--- a/doc/features.png
+++ b/doc/features.png
--- a/doc/features_en.png
+++ b/doc/features_en.png
--- a/doc/imgs_results/det_res_img623_ct.jpg
+++ b/doc/imgs_results/det_res_img623_ct.jpg
--- a/doc/overview_en.png
+++ b/doc/overview_en.png
--- a/doc/ppocr_v3/svtr_tiny.jpg
+++ b/doc/ppocr_v3/svtr_tiny.jpg
--- a/paddleocr.py
+++ b/paddleocr.py
--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
--- a/ppocr/data/imaug/copy_paste.py
+++ b/ppocr/data/imaug/copy_paste.py
--- a/ppocr/data/imaug/ct_process.py
+++ b/ppocr/data/imaug/ct_process.py
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
--- a/ppocr/data/imaug/pg_process.py
+++ b/ppocr/data/imaug/pg_process.py
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
--- a/ppocr/data/imaug/table_ops.py
+++ b/ppocr/data/imaug/table_ops.py
--- a/ppocr/data/lmdb_dataset.py
+++ b/ppocr/data/lmdb_dataset.py
--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
--- a/ppocr/losses/basic_loss.py
+++ b/ppocr/losses/basic_loss.py
--- a/ppocr/losses/det_ct_loss.py
+++ b/ppocr/losses/det_ct_loss.py
--- a/ppocr/losses/e2e_pg_loss.py
+++ b/ppocr/losses/e2e_pg_loss.py
--- a/ppocr/losses/stroke_focus_loss.py
+++ b/ppocr/losses/stroke_focus_loss.py
--- a/ppocr/losses/table_att_loss.py
+++ b/ppocr/losses/table_att_loss.py
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
--- a/ppocr/metrics/ct_metric.py
+++ b/ppocr/metrics/ct_metric.py
--- a/ppocr/metrics/rec_metric.py
+++ b/ppocr/metrics/rec_metric.py
--- a/ppocr/metrics/sr_metric.py
+++ b/ppocr/metrics/sr_metric.py
--- a/ppocr/metrics/table_metric.py
+++ b/ppocr/metrics/table_metric.py
--- a/ppocr/modeling/architectures/base_model.py
+++ b/ppocr/modeling/architectures/base_model.py
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
--- a/ppocr/modeling/backbones/det_pp_lcnet.py
+++ b/ppocr/modeling/backbones/det_pp_lcnet.py
--- a/ppocr/modeling/backbones/rec_resnet_31.py
+++ b/ppocr/modeling/backbones/rec_resnet_31.py
--- a/ppocr/modeling/backbones/vqa_layoutlm.py
+++ b/ppocr/modeling/backbones/vqa_layoutlm.py
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
--- a/ppocr/modeling/heads/det_ct_head.py
+++ b/ppocr/modeling/heads/det_ct_head.py
--- a/ppocr/modeling/heads/e2e_pg_head.py
+++ b/ppocr/modeling/heads/e2e_pg_head.py
--- a/ppocr/modeling/heads/rec_robustscanner_head.py
+++ b/ppocr/modeling/heads/rec_robustscanner_head.py
--- a/ppocr/modeling/heads/sr_rensnet_transformer.py
+++ b/ppocr/modeling/heads/sr_rensnet_transformer.py
--- a/ppocr/modeling/heads/table_att_head.py
+++ b/ppocr/modeling/heads/table_att_head.py
--- a/ppocr/modeling/heads/table_master_head.py
+++ b/ppocr/modeling/heads/table_master_head.py
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
--- a/ppocr/modeling/necks/csp_pan.py
+++ b/ppocr/modeling/necks/csp_pan.py
--- a/ppocr/modeling/necks/ct_fpn.py
+++ b/ppocr/modeling/necks/ct_fpn.py
--- a/ppocr/modeling/transforms/__init__.py
+++ b/ppocr/modeling/transforms/__init__.py
--- a/ppocr/modeling/transforms/tps_spatial_transformer.py
+++ b/ppocr/modeling/transforms/tps_spatial_transformer.py
--- a/ppocr/modeling/transforms/tsrn.py
+++ b/ppocr/modeling/transforms/tsrn.py
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
--- a/ppocr/postprocess/ct_postprocess.py
+++ b/ppocr/postprocess/ct_postprocess.py
--- a/ppocr/postprocess/pg_postprocess.py
+++ b/ppocr/postprocess/pg_postprocess.py
--- a/ppocr/postprocess/picodet_postprocess.py
+++ b/ppocr/postprocess/picodet_postprocess.py
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
--- a/ppocr/postprocess/table_postprocess.py
+++ b/ppocr/postprocess/table_postprocess.py
--- a/ppocr/utils/dict/arabic_dict.txt
+++ b/ppocr/utils/dict/arabic_dict.txt
--- a/ppocr/utils/dict/kie_dict/xfund_class_list.txt
+++ b/ppocr/utils/dict/kie_dict/xfund_class_list.txt
--- a/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt
+++ b/ppocr/utils/dict/layout_dict/layout_cdla_dict.txt
--- a/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt
+++ b/ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt
--- a/ppocr/utils/dict/layout_dict/layout_table_dict.txt
+++ b/ppocr/utils/dict/layout_dict/layout_table_dict.txt
--- a/ppocr/utils/dict/table_structure_dict_ch.txt
+++ b/ppocr/utils/dict/table_structure_dict_ch.txt
--- a/ppocr/utils/e2e_metric/Deteval.py
+++ b/ppocr/utils/e2e_metric/Deteval.py
--- a/ppocr/utils/e2e_utils/extract_textpoint_fast.py
+++ b/ppocr/utils/e2e_utils/extract_textpoint_fast.py
--- a/ppocr/utils/e2e_utils/pgnet_pp_utils.py
+++ b/ppocr/utils/e2e_utils/pgnet_pp_utils.py
--- a/ppocr/utils/network.py
+++ b/ppocr/utils/network.py
--- a/ppocr/utils/save_load.py
+++ b/ppocr/utils/save_load.py
--- a/ppocr/utils/utility.py
+++ b/ppocr/utils/utility.py
--- a/ppocr/utils/visual.py
+++ b/ppocr/utils/visual.py
--- a/ppstructure/README.md
+++ b/ppstructure/README.md
--- a/ppstructure/README_ch.md
+++ b/ppstructure/README_ch.md
--- a/ppstructure/docs/PP-Structurev2_introduction.md
+++ b/ppstructure/docs/PP-Structurev2_introduction.md
--- a/ppstructure/docs/imgs/0.png
+++ b/ppstructure/docs/imgs/0.png
--- a/ppstructure/docs/imgs/slanet_result.jpg
+++ b/ppstructure/docs/imgs/slanet_result.jpg
--- a/ppstructure/docs/imgs/table_ch_result1.jpg
+++ b/ppstructure/docs/imgs/table_ch_result1.jpg
--- a/ppstructure/docs/imgs/table_ch_result2.jpg
+++ b/ppstructure/docs/imgs/table_ch_result2.jpg
--- a/ppstructure/docs/imgs/table_ch_result3.jpg
+++ b/ppstructure/docs/imgs/table_ch_result3.jpg
--- a/ppstructure/docs/inference.md
+++ b/ppstructure/docs/inference.md
--- a/ppstructure/docs/inference_en.md
+++ b/ppstructure/docs/inference_en.md
--- a/ppstructure/docs/installation.md
+++ b/ppstructure/docs/installation.md
--- a/ppstructure/docs/kie.md
+++ b/ppstructure/docs/kie.md
--- a/ppstructure/docs/vqa/input/zh_val_0.jpg
+++ b/ppstructure/docs/vqa/input/zh_val_0.jpg
--- a/ppstructure/docs/vqa/input/zh_val_21.jpg
+++ b/ppstructure/docs/vqa/input/zh_val_21.jpg
--- a/ppstructure/docs/vqa/input/zh_val_40.jpg
+++ b/ppstructure/docs/vqa/input/zh_val_40.jpg
--- a/ppstructure/docs/vqa/input/zh_val_42.jpg
+++ b/ppstructure/docs/vqa/input/zh_val_42.jpg
--- a/ppstructure/docs/vqa/result_re/zh_val_21_re.jpg
+++ b/ppstructure/docs/vqa/result_re/zh_val_21_re.jpg
--- a/ppstructure/docs/vqa/result_re/zh_val_40_re.jpg
+++ b/ppstructure/docs/vqa/result_re/zh_val_40_re.jpg
--- a/ppstructure/docs/kie/result_re/zh_val_42_re.jpg
+++ b/ppstructure/docs/kie/result_re/zh_val_42_re.jpg
--- a/ppstructure/docs/kie/result_re_with_gt_ocr/zh_val_42_re.jpg
+++ b/ppstructure/docs/kie/result_re_with_gt_ocr/zh_val_42_re.jpg
--- a/ppstructure/docs/vqa/result_ser/zh_val_0_ser.jpg
+++ b/ppstructure/docs/vqa/result_ser/zh_val_0_ser.jpg
--- a/ppstructure/docs/kie/result_ser/zh_val_42_ser.jpg
+++ b/ppstructure/docs/kie/result_ser/zh_val_42_ser.jpg
--- a/ppstructure/docs/kie/result_ser_with_gt_ocr/zh_val_42_ser.jpg
+++ b/ppstructure/docs/kie/result_ser_with_gt_ocr/zh_val_42_ser.jpg
--- a/ppstructure/docs/layout/layout.png
+++ b/ppstructure/docs/layout/layout.png
--- a/ppstructure/docs/layout/layout_res.jpg
+++ b/ppstructure/docs/layout/layout_res.jpg
--- a/ppstructure/docs/models_list.md
+++ b/ppstructure/docs/models_list.md
--- a/ppstructure/docs/models_list_en.md
+++ b/ppstructure/docs/models_list_en.md
--- a/ppstructure/docs/ppstructurev2_pipeline.png
+++ b/ppstructure/docs/ppstructurev2_pipeline.png
--- a/ppstructure/docs/quickstart.md
+++ b/ppstructure/docs/quickstart.md
--- a/ppstructure/docs/quickstart_en.md
+++ b/ppstructure/docs/quickstart_en.md
--- a/ppstructure/docs/recovery/UnrealText.pdf
+++ b/ppstructure/docs/recovery/UnrealText.pdf
--- a/ppstructure/docs/recovery/recovery.jpg
+++ b/ppstructure/docs/recovery/recovery.jpg
--- a/ppstructure/docs/recovery/recovery_ch.jpg
+++ b/ppstructure/docs/recovery/recovery_ch.jpg
--- a/ppstructure/docs/table/layout.jpg
+++ b/ppstructure/docs/table/layout.jpg
--- a/ppstructure/docs/table/paper-image.jpg
+++ b/ppstructure/docs/table/paper-image.jpg
--- a/ppstructure/docs/table/recovery.jpg
+++ b/ppstructure/docs/table/recovery.jpg
--- a/ppstructure/docs/vqa/result_ser/zh_val_42_ser.jpg
+++ b/ppstructure/docs/vqa/result_ser/zh_val_42_ser.jpg
--- a/ppstructure/kie/README.md
+++ b/ppstructure/kie/README.md
--- a/ppstructure/kie/README_ch.md
+++ b/ppstructure/kie/README_ch.md
--- a/ppstructure/kie/how_to_do_kie.md
+++ b/ppstructure/kie/how_to_do_kie.md
--- a/ppstructure/kie/how_to_do_kie_en.md
+++ b/ppstructure/kie/how_to_do_kie_en.md
--- a/ppstructure/vqa/predict_vqa_token_ser.py
+++ b/ppstructure/vqa/predict_vqa_token_ser.py
--- a/ppstructure/vqa/requirements.txt
+++ b/ppstructure/vqa/requirements.txt
--- a/ppstructure/vqa/tools/eval_with_label_end2end.py
+++ b/ppstructure/vqa/tools/eval_with_label_end2end.py
--- a/ppstructure/vqa/tools/trans_funsd_label.py
+++ b/ppstructure/vqa/tools/trans_funsd_label.py
--- a/ppstructure/vqa/tools/trans_xfun_data.py
+++ b/ppstructure/vqa/tools/trans_xfun_data.py
--- a/ppstructure/layout/README.md
+++ b/ppstructure/layout/README.md
--- a/ppstructure/layout/README_ch.md
+++ b/ppstructure/layout/README_ch.md
--- a/ppstructure/layout/__init__.py
+++ b/ppstructure/layout/__init__.py
--- a/ppstructure/layout/predict_layout.py
+++ b/ppstructure/layout/predict_layout.py
--- a/ppstructure/layout/train_layoutparser_model.md
+++ b/ppstructure/layout/train_layoutparser_model.md
--- a/ppstructure/layout/train_layoutparser_model_ch.md
+++ b/ppstructure/layout/train_layoutparser_model_ch.md
--- a/ppstructure/pdf2word/README.md
+++ b/ppstructure/pdf2word/README.md
--- a/ppstructure/pdf2word/icons/chinese.png
+++ b/ppstructure/pdf2word/icons/chinese.png
--- a/ppstructure/pdf2word/icons/english.png
+++ b/ppstructure/pdf2word/icons/english.png
--- a/ppstructure/pdf2word/icons/folder-open.png
+++ b/ppstructure/pdf2word/icons/folder-open.png
--- a/ppstructure/pdf2word/icons/folder-plus.png
+++ b/ppstructure/pdf2word/icons/folder-plus.png
--- a/ppstructure/pdf2word/pdf2word.py
+++ b/ppstructure/pdf2word/pdf2word.py
--- a/ppstructure/predict_system.py
+++ b/ppstructure/predict_system.py
--- a/ppstructure/recovery/README.md
+++ b/ppstructure/recovery/README.md
--- a/ppstructure/recovery/README_ch.md
+++ b/ppstructure/recovery/README_ch.md
--- a/ppstructure/recovery/__init__.py
+++ b/ppstructure/recovery/__init__.py
--- a/ppstructure/recovery/recovery_to_doc.py
+++ b/ppstructure/recovery/recovery_to_doc.py
--- a/ppstructure/recovery/requirements.txt
+++ b/ppstructure/recovery/requirements.txt
--- a/ppstructure/recovery/table_process.py
+++ b/ppstructure/recovery/table_process.py
--- a/ppstructure/table/README.md
+++ b/ppstructure/table/README.md
--- a/ppstructure/table/README_ch.md
+++ b/ppstructure/table/README_ch.md
--- a/ppstructure/table/convert_label2html.py
+++ b/ppstructure/table/convert_label2html.py
--- a/ppstructure/table/eval_table.py
+++ b/ppstructure/table/eval_table.py
--- a/ppstructure/table/matcher.py
+++ b/ppstructure/table/matcher.py
--- a/ppstructure/table/predict_structure.py
+++ b/ppstructure/table/predict_structure.py
--- a/ppstructure/table/predict_table.py
+++ b/ppstructure/table/predict_table.py
--- a/ppstructure/table/table_master_match.py
+++ b/ppstructure/table/table_master_match.py
--- a/ppstructure/table/table_metric/table_metric.py
+++ b/ppstructure/table/table_metric/table_metric.py
--- a/ppstructure/utility.py
+++ b/ppstructure/utility.py
--- a/ppstructure/vqa/README.md
+++ b/ppstructure/vqa/README.md
--- a/ppstructure/vqa/README_ch.md
+++ b/ppstructure/vqa/README_ch.md
--- a/requirements.txt
+++ b/requirements.txt
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
--- a/test_tipc/common_func.sh
+++ b/test_tipc/common_func.sh
--- a/test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt
--- a/test_tipc/configs/ch_PP-OCRv3_rec/train_infer_python.txt
+++ b/test_tipc/configs/ch_PP-OCRv3_rec/train_infer_python.txt
--- a/test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt
--- a/test_tipc/configs/ch_ppocr_mobile_v2_0_rec/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_v2_0_rec/train_infer_python.txt
--- a/test_tipc/configs/ch_ppocr_server_v2_0_rec/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_server_v2_0_rec/train_infer_python.txt
--- a/test_tipc/configs/det_r18_vd_db_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/det_r18_vd_db_v2_0/train_infer_python.txt
--- a/test_tipc/configs/det_r50_dcn_fce_ctw_v2_0/det_r50_vd_dcn_fce_ctw.yml
+++ b/test_tipc/configs/det_r50_dcn_fce_ctw_v2_0/det_r50_vd_dcn_fce_ctw.yml
--- a/test_tipc/configs/en_table_structure/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/en_table_structure/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
--- a/test_tipc/configs/en_table_structure/table_mv3.yml
+++ b/test_tipc/configs/en_table_structure/table_mv3.yml
--- a/test_tipc/configs/en_table_structure/train_infer_python.txt
+++ b/test_tipc/configs/en_table_structure/train_infer_python.txt
--- a/test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml
+++ b/test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml
--- a/test_tipc/configs/layoutxlm_ser/train_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_infer_python.txt
--- a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
--- a/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt
--- a/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt
+++ b/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt
--- a/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2_0/train_infer_python.txt
--- a/test_tipc/configs/rec_mv3_none_none_ctc_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/rec_mv3_none_none_ctc_v2_0/train_infer_python.txt
--- a/test_tipc/configs/rec_mv3_tps_bilstm_att_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/rec_mv3_tps_bilstm_att_v2_0/train_infer_python.txt
--- a/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2_0/train_infer_python.txt
--- a/test_tipc/configs/rec_r31_robustscanner/rec_r31_robustscanner.yml
+++ b/test_tipc/configs/rec_r31_robustscanner/rec_r31_robustscanner.yml
--- a/test_tipc/configs/rec_r31_robustscanner/train_infer_python.txt
+++ b/test_tipc/configs/rec_r31_robustscanner/train_infer_python.txt
--- a/test_tipc/configs/rec_r31_sar/train_infer_python.txt
+++ b/test_tipc/configs/rec_r31_sar/train_infer_python.txt
--- a/test_tipc/configs/rec_r32_gaspin_bilstm_att/train_infer_python.txt
+++ b/test_tipc/configs/rec_r32_gaspin_bilstm_att/train_infer_python.txt
--- a/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2_0/train_infer_python.txt
--- a/test_tipc/configs/rec_r34_vd_none_none_ctc_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/rec_r34_vd_none_none_ctc_v2_0/train_infer_python.txt
--- a/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2_0/train_infer_python.txt
--- a/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2_0/train_infer_python.txt
--- a/test_tipc/configs/rec_r45_abinet/train_infer_python.txt
+++ b/test_tipc/configs/rec_r45_abinet/train_infer_python.txt
--- a/test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml
+++ b/test_tipc/configs/rec_r45_visionlan/rec_r45_visionlan.yml
--- a/test_tipc/configs/rec_r45_visionlan/train_infer_python.txt
+++ b/test_tipc/configs/rec_r45_visionlan/train_infer_python.txt
--- a/test_tipc/configs/rec_r50_fpn_vd_none_srn/train_infer_python.txt
+++ b/test_tipc/configs/rec_r50_fpn_vd_none_srn/train_infer_python.txt
--- a/test_tipc/configs/rec_svtrnet/train_infer_python.txt
+++ b/test_tipc/configs/rec_svtrnet/train_infer_python.txt
--- a/test_tipc/configs/rec_vitstr_none_ce/train_infer_python.txt
+++ b/test_tipc/configs/rec_vitstr_none_ce/train_infer_python.txt
--- a/test_tipc/configs/slanet/SLANet.yml
+++ b/test_tipc/configs/slanet/SLANet.yml
--- a/test_tipc/configs/slanet/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/slanet/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
--- a/test_tipc/configs/slanet/train_infer_python.txt
+++ b/test_tipc/configs/slanet/train_infer_python.txt
--- a/test_tipc/configs/table_master/table_master.yml
+++ b/test_tipc/configs/table_master/table_master.yml
--- a/test_tipc/configs/table_master/train_infer_python.txt
+++ b/test_tipc/configs/table_master/train_infer_python.txt
--- a/test_tipc/configs/vi_layoutxlm_ser/train_infer_python.txt
+++ b/test_tipc/configs/vi_layoutxlm_ser/train_infer_python.txt
--- a/test_tipc/docs/jeston_test_train_inference_python.md
+++ b/test_tipc/docs/jeston_test_train_inference_python.md
--- a/test_tipc/docs/mac_test_train_inference_python.md
+++ b/test_tipc/docs/mac_test_train_inference_python.md
--- a/test_tipc/docs/test_inference_cpp.md
+++ b/test_tipc/docs/test_inference_cpp.md
--- a/test_tipc/docs/test_paddle2onnx.md
+++ b/test_tipc/docs/test_paddle2onnx.md
--- a/test_tipc/docs/test_ptq_inference_python.md
+++ b/test_tipc/docs/test_ptq_inference_python.md
--- a/test_tipc/docs/test_serving.md
+++ b/test_tipc/docs/test_serving.md
--- a/test_tipc/docs/test_train_inference_python.md
+++ b/test_tipc/docs/test_train_inference_python.md
--- a/test_tipc/docs/win_test_train_inference_python.md
+++ b/test_tipc/docs/win_test_train_inference_python.md
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
--- a/test_tipc/readme.md
+++ b/test_tipc/readme.md
--- a/test_tipc/test_inference_cpp.sh
+++ b/test_tipc/test_inference_cpp.sh
--- a/test_tipc/test_inference_python.sh
+++ b/test_tipc/test_inference_python.sh
--- a/test_tipc/test_paddle2onnx.sh
+++ b/test_tipc/test_paddle2onnx.sh
--- a/test_tipc/test_ptq_inference_python.sh
+++ b/test_tipc/test_ptq_inference_python.sh
--- a/test_tipc/test_serving_infer_cpp.sh
+++ b/test_tipc/test_serving_infer_cpp.sh
--- a/test_tipc/test_serving_infer_python.sh
+++ b/test_tipc/test_serving_infer_python.sh
--- a/test_tipc/test_train_inference_python.sh
+++ b/test_tipc/test_train_inference_python.sh
--- a/test_tipc/test_train_inference_python_npu.sh
+++ b/test_tipc/test_train_inference_python_npu.sh
--- a/test_tipc/test_train_inference_python_xpu.sh
+++ b/test_tipc/test_train_inference_python_xpu.sh
--- a/tools/eval.py
+++ b/tools/eval.py
--- a/tools/export_model.py
+++ b/tools/export_model.py
--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
--- a/tools/infer/predict_e2e.py
+++ b/tools/infer/predict_e2e.py
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
--- a/tools/infer/predict_sr.py
+++ b/tools/infer/predict_sr.py
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
--- a/tools/infer_e2e.py
+++ b/tools/infer_e2e.py
--- a/tools/infer_kie.py
+++ b/tools/infer_kie.py
--- a/tools/infer_vqa_token_ser.py
+++ b/tools/infer_vqa_token_ser.py
--- a/tools/infer_vqa_token_ser_re.py
+++ b/tools/infer_vqa_token_ser_re.py
--- a/tools/infer_rec.py
+++ b/tools/infer_rec.py
--- a/tools/infer_sr.py
+++ b/tools/infer_sr.py
--- a/tools/infer_table.py
+++ b/tools/infer_table.py
--- a/tools/program.py
+++ b/tools/program.py
--- a/tools/train.py
+++ b/tools/train.py
--- a/train.sh
+++ b/train.sh