Merge remote-tracking branch 'origin/dygraph' into dy1

df783f5a · qq_25193841 · 050e2a68 · 38bef47b · df783f5a · df783f5a
157 changed file
--- a/PPOCRLabel/PPOCRLabel.py
+++ b/PPOCRLabel/PPOCRLabel.py
@@ -2449,13 +2449,6 @@ class MainWindow(QMainWindow):
            export PPLabel and CSV to JSON (PubTabNet)
        '''
        import pandas as pd
-        from libs.dataPartitionDialog import DataPartitionDialog
-
-        # data partition user input
-        partitionDialog = DataPartitionDialog(parent=self)
-        partitionDialog.exec()
-        if partitionDialog.getStatus() == False:
-            return

        # automatically save annotations
        self.saveFilestate()
@@ -2478,28 +2471,19 @@ class MainWindow(QMainWindow):
                        labeldict[file] = eval(label)
                    else:
                        labeldict[file] = []
+        
+        # read table recognition output
+        TableRec_excel_dir = os.path.join(
+            self.lastOpenDir, 'tableRec_excel_output')

-        train_split, val_split, test_split = partitionDialog.getDataPartition()
-        # check validate
-        if train_split + val_split + test_split > 100:
-            msg = "The sum of training, validation and testing data should be less than 100%"
-            QMessageBox.information(self, "Information", msg)
-            return
-        print(train_split, val_split, test_split)
-        train_split, val_split, test_split = float(train_split) / 100., float(val_split) / 100., float(test_split) / 100.
-        train_id = int(len(labeldict) * train_split)
-        val_id = int(len(labeldict) * (train_split + val_split))
-        print('Data partition: train:', train_id, 
-              'validation:',  val_id - train_id,
-              'test:', len(labeldict) - val_id)
-
-        TableRec_excel_dir = os.path.join(self.lastOpenDir, 'tableRec_excel_output')
-        json_results = []
-        imgid = 0
+        # save txt
+        fid = open(
+            "{}/gt.txt".format(self.lastOpenDir), "w", encoding='utf-8')
        for image_path in labeldict.keys():
            # load csv annotations
            filename, _ = os.path.splitext(os.path.basename(image_path))
-            csv_path = os.path.join(TableRec_excel_dir, filename + '.xlsx')
+            csv_path = os.path.join(
+                TableRec_excel_dir, filename + '.xlsx')
            if not os.path.exists(csv_path):
                continue

@@ -2518,28 +2502,31 @@ class MainWindow(QMainWindow):
            cells = []
            for anno in labeldict[image_path]:
                tokens = list(anno['transcription'])
-                obb = anno['points']
-                hbb = OBB2HBB(np.array(obb)).tolist()
-                cells.append({'tokens': tokens, 'bbox': hbb})
-            
-            # data split
-            if imgid < train_id:
-                split = 'train'
-            elif imgid < val_id:
-                split = 'val'
-            else:
-                split = 'test'
-
-            #  save dict
-            html = {'structure': {'tokens': token_list}, 'cell': cells}
-            json_results.append({'filename': os.path.basename(image_path), 'split': split, 'imgid': imgid, 'html': html})
-            imgid += 1
-
-        # save json
-        with open("{}/annotation.json".format(self.lastOpenDir), "w", encoding='utf-8') as fid:
-            fid.write(json.dumps(json_results, ensure_ascii=False))
-        
-        msg = 'JSON sucessfully saved in {}/annotation.json'.format(self.lastOpenDir)
+                cells.append({
+                    'tokens': tokens, 
+                    'bbox': anno['points']
+                    })
+
+            # 构造标注信息
+            html = {
+                'structure': {
+                    'tokens': token_list
+                    }, 
+                'cells': cells
+                }
+            d = {
+                'filename': os.path.basename(image_path), 
+                'html': html
+                }
+            # 重构HTML
+            d['gt'] = rebuild_html_from_ppstructure_label(d)
+            fid.write('{}\n'.format(
+                json.dumps(
+                    d, ensure_ascii=False)))
+                    
+        # convert to PP-Structure label format
+        fid.close()
+        msg = 'JSON sucessfully saved in {}/gt.txt'.format(self.lastOpenDir)
        QMessageBox.information(self, "Information", msg)

    def autolcm(self):
@@ -2728,6 +2715,9 @@ class MainWindow(QMainWindow):

            self._update_shape_color(shape)
            self.keyDialog.addLabelHistory(key_text)
+            
+        # save changed shape
+        self.setDirty()

    def undoShapeEdit(self):
        self.canvas.restoreShape()

--- a/PPOCRLabel/libs/canvas.py
+++ b/PPOCRLabel/libs/canvas.py
@@ -611,8 +611,8 @@ class Canvas(QWidget):

        if self.drawing() and not self.prevPoint.isNull() and not self.outOfPixmap(self.prevPoint):
            p.setPen(QColor(0, 0, 0))
-            p.drawLine(self.prevPoint.x(), 0, self.prevPoint.x(), self.pixmap.height())
-            p.drawLine(0, self.prevPoint.y(), self.pixmap.width(), self.prevPoint.y())
+            p.drawLine(int(self.prevPoint.x()), 0, int(self.prevPoint.x()), self.pixmap.height())
+            p.drawLine(0, int(self.prevPoint.y()), self.pixmap.width(), int(self.prevPoint.y()))

        self.setAutoFillBackground(True)
        if self.verified:
@@ -909,4 +909,4 @@ class Canvas(QWidget):
    def updateShapeIndex(self):
        for i in range(len(self.shapes)):
            self.shapes[i].idx = i
-        self.update()
\ No newline at end of file
+        self.update()
--- a/PPOCRLabel/libs/dataPartitionDialog.py
+++ b/PPOCRLabel/libs/dataPartitionDialog.py
-try:
-    from PyQt5.QtGui import *
-    from PyQt5.QtCore import *
-    from PyQt5.QtWidgets import *
-except ImportError:
-    from PyQt4.QtGui import *
-    from PyQt4.QtCore import *
-
-from libs.utils import newIcon
-
-import time
-import datetime
-import json
-import cv2
-import numpy as np
-
-
-BB = QDialogButtonBox
-
-class DataPartitionDialog(QDialog):
-    def __init__(self, parent=None):
-        super().__init__()
-        self.parnet = parent
-        self.title = 'DATA PARTITION'
-
-        self.train_ratio = 70
-        self.val_ratio = 15
-        self.test_ratio = 15
-        
-        self.initUI()
-
-    def initUI(self):
-        self.setWindowTitle(self.title)
-        self.setWindowModality(Qt.ApplicationModal)
-
-        self.flag_accept = True
-
-        if self.parnet.lang == 'ch':
-            msg = "导出JSON前请保存所有图像的标注且关闭EXCEL!"
-        else:
-            msg = "Please save all the annotations and close the EXCEL before exporting JSON!"
-
-        info_msg = QLabel(msg, self)
-        info_msg.setWordWrap(True)
-        info_msg.setStyleSheet("color: red")
-        info_msg.setFont(QFont('Arial', 12))
-
-        train_lbl = QLabel('Train split: ', self)
-        train_lbl.setFont(QFont('Arial', 15))
-        val_lbl = QLabel('Valid split: ', self)
-        val_lbl.setFont(QFont('Arial', 15))
-        test_lbl = QLabel('Test split: ', self)
-        test_lbl.setFont(QFont('Arial', 15))
-
-        self.train_input = QLineEdit(self)
-        self.train_input.setFont(QFont('Arial', 15))
-        self.val_input = QLineEdit(self)
-        self.val_input.setFont(QFont('Arial', 15))
-        self.test_input = QLineEdit(self)
-        self.test_input.setFont(QFont('Arial', 15))
-
-        self.train_input.setText(str(self.train_ratio))
-        self.val_input.setText(str(self.val_ratio))
-        self.test_input.setText(str(self.test_ratio))
-
-        validator = QIntValidator(0, 100)
-        self.train_input.setValidator(validator)
-        self.val_input.setValidator(validator)
-        self.test_input.setValidator(validator)
-
-        gridlayout = QGridLayout()
-        gridlayout.addWidget(info_msg, 0, 0, 1, 2)
-        gridlayout.addWidget(train_lbl, 1, 0)
-        gridlayout.addWidget(val_lbl, 2, 0)
-        gridlayout.addWidget(test_lbl, 3, 0)
-        gridlayout.addWidget(self.train_input, 1, 1)
-        gridlayout.addWidget(self.val_input, 2, 1)
-        gridlayout.addWidget(self.test_input, 3, 1)
-
-        bb = BB(BB.Ok | BB.Cancel, Qt.Horizontal, self)
-        bb.button(BB.Ok).setIcon(newIcon('done'))
-        bb.button(BB.Cancel).setIcon(newIcon('undo'))
-        bb.accepted.connect(self.validate)
-        bb.rejected.connect(self.cancel)
-        gridlayout.addWidget(bb, 4, 0, 1, 2)
-
-        self.setLayout(gridlayout)
-        
-        self.show()
-
-    def validate(self):
-        self.flag_accept = True
-        self.accept()
-
-    def cancel(self):
-        self.flag_accept = False
-        self.reject()
-    
-    def getStatus(self):
-        return self.flag_accept
-
-    def getDataPartition(self):
-        self.train_ratio = int(self.train_input.text())
-        self.val_ratio = int(self.val_input.text())
-        self.test_ratio = int(self.test_input.text())
-
-        return self.train_ratio, self.val_ratio, self.test_ratio
-
-    def closeEvent(self, event):
-        self.flag_accept = False
-        self.reject()
-
-
--- a/PPOCRLabel/libs/utils.py
+++ b/PPOCRLabel/libs/utils.py
@@ -176,18 +176,6 @@ def boxPad(box, imgShape, pad : int) -> np.array:
    return box


-def OBB2HBB(obb) -> np.array:
-    """
-    Convert Oriented Bounding Box to Horizontal Bounding Box.
-    """
-    hbb = np.zeros(4, dtype=np.int32)
-    hbb[0] = min(obb[:, 0])
-    hbb[1] = min(obb[:, 1])
-    hbb[2] = max(obb[:, 0])
-    hbb[3] = max(obb[:, 1])
-    return hbb
-
-
 def expand_list(merged, html_list):
    '''
    Fill blanks according to merged cells
@@ -232,6 +220,26 @@ def convert_token(html_list):
    return token_list


+def rebuild_html_from_ppstructure_label(label_info):
+        from html import escape
+        html_code = label_info['html']['structure']['tokens'].copy()
+        to_insert = [
+            i for i, tag in enumerate(html_code) if tag in ('<td>', '>')
+        ]
+        for i, cell in zip(to_insert[::-1], label_info['html']['cells'][::-1]):
+            if cell['tokens']:
+                cell = [
+                    escape(token) if len(token) == 1 else token
+                    for token in cell['tokens']
+                ]
+                cell = ''.join(cell)
+                html_code.insert(i + 1, cell)
+        html_code = ''.join(html_code)
+        html_code = '<html><body><table>{}</table></body></html>'.format(
+            html_code)
+        return html_code
+
+
 def stepsInfo(lang='en'):
    if lang == 'ch':
        msg = "1. 安装与运行：使用上述命令安装与运行程序。\n" \

--- a/applications/中文表格识别.md
+++ b/applications/中文表格识别.md
+# 智能运营：通用中文表格识别
+
+- [1. 背景介绍](#1-背景介绍)
+- [2. 中文表格识别](#2-中文表格识别)
+- [2.1 环境准备](#21-环境准备)
+- [2.2 准备数据集](#22-准备数据集)
+    - [2.2.1 划分训练测试集](#221-划分训练测试集)
+    - [2.2.2 查看数据集](#222-查看数据集)
+- [2.3 训练](#23-训练)
+- [2.4 验证](#24-验证)
+- [2.5 训练引擎推理](#25-训练引擎推理)
+- [2.6 模型导出](#26-模型导出)
+- [2.7 预测引擎推理](#27-预测引擎推理)
+- [2.8 表格识别](#28-表格识别)
+- [3. 表格属性识别](#3-表格属性识别)
+- [3.1 代码、环境、数据准备](#31-代码环境数据准备)
+    - [3.1.1 代码准备](#311-代码准备)
+    - [3.1.2 环境准备](#312-环境准备)
+    - [3.1.3 数据准备](#313-数据准备)
+- [3.2 表格属性识别训练](#32-表格属性识别训练)
+- [3.3 表格属性识别推理和部署](#33-表格属性识别推理和部署)
+    - [3.3.1 模型转换](#331-模型转换)
+    - [3.3.2 模型推理](#332-模型推理)
+
+## 1. 背景介绍
+
+中文表格识别在金融行业有着广泛的应用，如保险理赔、财报分析和信息录入等领域。当前，金融行业的表格识别主要以手动录入为主，开发一种自动表格识别成为丞待解决的问题。
+![](https://ai-studio-static-online.cdn.bcebos.com/d1e7780f0c7745ada4be540decefd6288e4d59257d8141f6842682a4c05d28b6)
+
+
+在金融行业中，表格图像主要有清单类的单元格密集型表格，申请表类的大单元格表格，拍照表格和倾斜表格四种主要形式。
+
+![](https://ai-studio-static-online.cdn.bcebos.com/da82ae8ef8fd479aaa38e1049eb3a681cf020dc108fa458eb3ec79da53b45fd1)
+![](https://ai-studio-static-online.cdn.bcebos.com/5ffff2093a144a6993a75eef71634a52276015ee43a04566b9c89d353198c746)
+
+
+当前的表格识别算法不能很好的处理这些场景下的表格图像。在本例中，我们使用PP-Structurev2最新发布的表格识别模型SLANet来演示如何进行中文表格是识别。同时，为了方便作业流程，我们使用表格属性识别模型对表格图像的属性进行识别，对表格的难易程度进行判断，加快人工进行校对速度。
+
+本项目AI Studio链接：https://aistudio.baidu.com/aistudio/projectdetail/4588067
+
+## 2. 中文表格识别
+### 2.1 环境准备
+
+
+```python
+# 下载PaddleOCR代码
+! git clone -b dygraph https://gitee.com/paddlepaddle/PaddleOCR
+```
+
+
+```python
+# 安装PaddleOCR环境
+! pip install -r PaddleOCR/requirements.txt --force-reinstall
+! pip install protobuf==3.19
+```
+
+### 2.2 准备数据集
+
+本例中使用的数据集采用表格[生成工具](https://github.com/WenmuZhou/TableGeneration)制作。
+
+使用如下命令对数据集进行解压，并查看数据集大小
+
+
+```python
+! cd data/data165849 && tar -xf table_gen_dataset.tar && cd -
+! wc -l data/data165849/table_gen_dataset/gt.txt
+```
+
+#### 2.2.1 划分训练测试集
+
+使用下述命令将数据集划分为训练集和测试集, 这里将90%划分为训练集，10%划分为测试集
+
+
+```python
+import random
+with open('/home/aistudio/data/data165849/table_gen_dataset/gt.txt') as f:
+    lines = f.readlines()
+random.shuffle(lines)
+train_len = int(len(lines)*0.9)
+train_list = lines[:train_len]
+val_list = lines[train_len:]
+
+# 保存结果
+with open('/home/aistudio/train.txt','w',encoding='utf-8') as f:
+    f.writelines(train_list)
+with open('/home/aistudio/val.txt','w',encoding='utf-8') as f:
+    f.writelines(val_list)
+```
+
+划分完成后，数据集信息如下
+
+|类型|数量|图片地址|标注文件路径|
+|---|---|---|---|
+|训练集|18000|/home/aistudio/data/data165849/table_gen_dataset|/home/aistudio/train.txt|
+|测试集|2000|/home/aistudio/data/data165849/table_gen_dataset|/home/aistudio/val.txt|
+
+#### 2.2.2 查看数据集
+
+
+```python
+import cv2
+import os, json
+import numpy as np
+from matplotlib import pyplot as plt
+%matplotlib inline
+
+def parse_line(data_dir, line):
+    data_line = line.strip("\n")
+    info = json.loads(data_line)
+    file_name = info['filename']
+    cells = info['html']['cells'].copy()
+    structure = info['html']['structure']['tokens'].copy()
+
+    img_path = os.path.join(data_dir, file_name)
+    if not os.path.exists(img_path):
+        print(img_path)
+        return None
+    data = {
+        'img_path': img_path,
+        'cells': cells,
+        'structure': structure,
+        'file_name': file_name
+    }
+    return data
+
+def draw_bbox(img_path, points, color=(255, 0, 0), thickness=2):
+    if isinstance(img_path, str):
+        img_path = cv2.imread(img_path)
+    img_path = img_path.copy()
+    for point in points:
+        cv2.polylines(img_path, [point.astype(int)], True, color, thickness)
+    return img_path
+
+
+def rebuild_html(data):
+    html_code = data['structure']
+    cells = data['cells']
+    to_insert = [i for i, tag in enumerate(html_code) if tag in ('<td>', '>')]
+
+    for i, cell in zip(to_insert[::-1], cells[::-1]):
+        if cell['tokens']:
+            text = ''.join(cell['tokens'])
+            # skip empty text
+            sp_char_list = ['<b>', '</b>', '\u2028', ' ', '<i>', '</i>']
+            text_remove_style = skip_char(text, sp_char_list)
+            if len(text_remove_style) == 0:
+                continue
+            html_code.insert(i + 1, text)
+
+    html_code = ''.join(html_code)
+    return html_code
+
+
+def skip_char(text, sp_char_list):
+    """
+    skip empty cell
+    @param text: text in cell
+    @param sp_char_list: style char and special code
+    @return:
+    """
+    for sp_char in sp_char_list:
+        text = text.replace(sp_char, '')
+    return text
+
+save_dir = '/home/aistudio/vis'
+os.makedirs(save_dir, exist_ok=True)
+image_dir = '/home/aistudio/data/data165849/'
+html_str = '<table border="1">'
+
+# 解析标注信息并还原html表格
+data = parse_line(image_dir, val_list[0])
+
+img = cv2.imread(data['img_path'])
+img_name = ''.join(os.path.basename(data['file_name']).split('.')[:-1])
+img_save_name = os.path.join(save_dir, img_name)
+boxes = [np.array(x['bbox']) for x in data['cells']]
+show_img = draw_bbox(data['img_path'], boxes)
+cv2.imwrite(img_save_name + '_show.jpg', show_img)
+
+html = rebuild_html(data)
+html_str += html
+html_str += '</table>'
+
+# 显示标注的html字符串
+from IPython.core.display import display, HTML
+display(HTML(html_str))
+# 显示单元格坐标
+plt.figure(figsize=(15,15))
+plt.imshow(show_img)
+plt.show()
+```
+
+### 2.3 训练
+
+这里选用PP-Structurev2中的表格识别模型[SLANet](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/configs/table/SLANet.yml)
+
+SLANet是PP-Structurev2全新推出的表格识别模型，相比PP-Structurev1中TableRec-RARE，在速度不变的情况下精度提升4.7%。TEDS提升2%
+
+
+|算法|Acc|[TEDS(Tree-Edit-Distance-based Similarity)](https://github.com/ibm-aur-nlp/PubTabNet/tree/master/src)|Speed|
+| --- | --- | --- | ---|
+| EDD<sup>[2]</sup> |x| 88.3% |x|
+| TableRec-RARE(ours) | 71.73%| 93.88% |779ms|
+| SLANet(ours) | 76.31%|    95.89%|766ms|
+
+进行训练之前先使用如下命令下载预训练模型
+
+
+```python
+# 进入PaddleOCR工作目录
+os.chdir('/home/aistudio/PaddleOCR')
+# 下载英文预训练模型
+! wget  -nc -P  ./pretrain_models/  https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar --no-check-certificate
+! cd ./pretrain_models/ && tar xf en_ppstructure_mobile_v2.0_SLANet_train.tar  && cd ../
+```
+
+使用如下命令即可启动训练，需要修改的配置有
+
+|字段|修改值|含义|
+|---|---|---|
+|Global.pretrained_model|./pretrain_models/en_ppstructure_mobile_v2.0_SLANet_train/best_accuracy.pdparams|指向英文表格预训练模型地址|
+|Global.eval_batch_step|562|模型多少step评估一次，一般设置为一个epoch总的step数|
+|Optimizer.lr.name|Const|学习率衰减器 |
+|Optimizer.lr.learning_rate|0.0005|学习率设为之前的0.05倍 |
+|Train.dataset.data_dir|/home/aistudio/data/data165849|指向训练集图片存放目录 |
+|Train.dataset.label_file_list|/home/aistudio/data/data165849/table_gen_dataset/train.txt|指向训练集标注文件 |
+|Train.loader.batch_size_per_card|32|训练时每张卡的batch_size |
+|Train.loader.num_workers|1|训练集多进程数据读取的进程数，在aistudio中需要设为1 |
+|Eval.dataset.data_dir|/home/aistudio/data/data165849|指向测试集图片存放目录 |
+|Eval.dataset.label_file_list|/home/aistudio/data/data165849/table_gen_dataset/val.txt|指向测试集标注文件 |
+|Eval.loader.batch_size_per_card|32|测试时每张卡的batch_size |
+|Eval.loader.num_workers|1|测试集多进程数据读取的进程数，在aistudio中需要设为1 |
+
+
+已经修改好的配置存储在 `/home/aistudio/SLANet_ch.yml`
+
+
+```python
+import os
+os.chdir('/home/aistudio/PaddleOCR')
+! python3 tools/train.py -c /home/aistudio/SLANet_ch.yml
+```
+
+大约在7个epoch后达到最高精度 97.49%
+
+### 2.4 验证
+
+训练完成后，可使用如下命令在测试集上评估最优模型的精度
+
+
+```python
+! python3 tools/eval.py -c /home/aistudio/SLANet_ch.yml -o Global.checkpoints=/home/aistudio/PaddleOCR/output/SLANet_ch/best_accuracy.pdparams
+```
+
+### 2.5 训练引擎推理
+使用如下命令可使用训练引擎对单张图片进行推理
+
+
+```python
+import os;os.chdir('/home/aistudio/PaddleOCR')
+! python3 tools/infer_table.py -c /home/aistudio/SLANet_ch.yml -o Global.checkpoints=/home/aistudio/PaddleOCR/output/SLANet_ch/best_accuracy.pdparams Global.infer_img=/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg
+```
+
+
+```python
+import cv2
+from matplotlib import pyplot as plt
+%matplotlib inline
+
+# 显示原图
+show_img = cv2.imread('/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg')
+plt.figure(figsize=(15,15))
+plt.imshow(show_img)
+plt.show()
+
+# 显示预测的单元格
+show_img = cv2.imread('/home/aistudio/PaddleOCR/output/infer/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg')
+plt.figure(figsize=(15,15))
+plt.imshow(show_img)
+plt.show()
+```
+
+### 2.6 模型导出
+
+使用如下命令可将模型导出为inference模型
+
+
+```python
+! python3 tools/export_model.py -c /home/aistudio/SLANet_ch.yml -o Global.checkpoints=/home/aistudio/PaddleOCR/output/SLANet_ch/best_accuracy.pdparams Global.save_inference_dir=/home/aistudio/SLANet_ch/infer
+```
+
+### 2.7 预测引擎推理
+使用如下命令可使用预测引擎对单张图片进行推理
+
+
+
+```python
+os.chdir('/home/aistudio/PaddleOCR/ppstructure')
+! python3 table/predict_structure.py \
+    --table_model_dir=/home/aistudio/SLANet_ch/infer \
+    --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
+    --image_dir=/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg \
+    --output=../output/inference
+```
+
+
+```python
+# 显示原图
+show_img = cv2.imread('/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg')
+plt.figure(figsize=(15,15))
+plt.imshow(show_img)
+plt.show()
+
+# 显示预测的单元格
+show_img = cv2.imread('/home/aistudio/PaddleOCR/output/inference/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg')
+plt.figure(figsize=(15,15))
+plt.imshow(show_img)
+plt.show()
+```
+
+### 2.8 表格识别
+
+在表格结构模型训练完成后，可结合OCR检测识别模型，对表格内容进行识别。
+
+首先下载PP-OCRv3文字检测识别模型
+
+
+```python
+# 下载PP-OCRv3文本检测识别模型并解压
+! wget  -nc -P  ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar --no-check-certificate
+! wget  -nc -P  ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar --no-check-certificate
+! cd ./inference/ && tar xf ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar  && cd ../
+```
+
+模型下载完成后，使用如下命令进行表格识别
+
+
+```python
+import os;os.chdir('/home/aistudio/PaddleOCR/ppstructure')
+! python3 table/predict_table.py \
+    --det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
+    --rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer  \
+    --table_model_dir=/home/aistudio/SLANet_ch/infer \
+    --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
+    --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
+    --image_dir=/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg \
+    --output=../output/table
+```
+
+
+```python
+# 显示原图
+show_img = cv2.imread('/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg')
+plt.figure(figsize=(15,15))
+plt.imshow(show_img)
+plt.show()
+
+# 显示预测结果
+from IPython.core.display import display, HTML
+display(HTML('<html><body><table><tr><td colspan="5">alleadersh</td><td rowspan="2">不贰过，推</td><td rowspan="2">从自己参与浙江数</td><td rowspan="2">。另一方</td></tr><tr><td>AnSha</td><td>自己越</td><td>共商共建工作协商</td><td>w.east </td><td>抓好改革试点任务</td></tr><tr><td>Edime</td><td>ImisesElec</td><td>怀天下”。</td><td></td><td>22.26 </td><td>31.61</td><td>4.30 </td><td>794.94</td></tr><tr><td rowspan="2">ip</td><td> Profundi</td><td>：2019年12月1</td><td>Horspro</td><td>444.48</td><td>2.41 </td><td>87</td><td>679.98</td></tr><tr><td> iehaiTrain</td><td>组长蒋蕊</td><td>Toafterdec</td><td>203.43</td><td>23.54 </td><td>4</td><td>4266.62</td></tr><tr><td>Tyint </td><td> roudlyRol</td><td>谢您的好意，我知道</td><td>ErChows</td><td></td><td>48.90</td><td>1031</td><td>6</td></tr><tr><td>NaFlint</td><td></td><td>一辈的</td><td>aterreclam</td><td>7823.86</td><td>9829.23</td><td>7.96 </td><td> 3068</td></tr><tr><td>家上下游企业，5</td><td>Tr</td><td>景象。当地球上的我们</td><td>Urelaw</td><td>799.62</td><td>354.96</td><td>12.98</td><td>33 </td></tr><tr><td>赛事（</td><td> uestCh</td><td>复制的业务模式并</td><td>Listicjust</td><td>9.23</td><td></td><td>92</td><td>53.22</td></tr><tr><td> Ca</td><td> Iskole</td><td>扶贫"之名引导</td><td> Papua </td><td>7191.90</td><td>1.65</td><td>3.62</td><td>48</td></tr><tr><td rowspan="2">避讳</td><td>ir</td><td>但由于</td><td>Fficeof</td><td>0.22</td><td>6.37</td><td>7.17</td><td>3397.75</td></tr><tr><td>ndaTurk</td><td>百处遗址</td><td>gMa</td><td>1288.34</td><td>2053.66</td><td>2.29</td><td>885.45</td></tr></table></body></html>'))
+```
+
+## 3. 表格属性识别
+### 3.1 代码、环境、数据准备
+#### 3.1.1 代码准备
+首先，我们需要准备训练表格属性的代码，PaddleClas集成了PULC方案，该方案可以快速获得一个在CPU上用时2ms的属性识别模型。PaddleClas代码可以clone下载得到。获取方式如下：
+
+
+
+```python
+! git clone -b develop https://gitee.com/paddlepaddle/PaddleClas
+```
+
+#### 3.1.2 环境准备
+其次，我们需要安装训练PaddleClas相关的依赖包
+
+
+```python
+! pip install -r PaddleClas/requirements.txt --force-reinstall
+! pip install protobuf==3.20.0
+```
+
+
+#### 3.1.3 数据准备
+
+最后，准备训练数据。在这里，我们一共定义了表格的6个属性，分别是表格来源、表格数量、表格颜色、表格清晰度、表格有无干扰、表格角度。其可视化如下：
+
+![](https://user-images.githubusercontent.com/45199522/190587903-ccdfa6fb-51e8-42de-b08b-a127cb04e304.png)
+
+这里，我们提供了一个表格属性的demo子集，可以快速迭代体验。下载方式如下：
+
+
+```python
+%cd PaddleClas/dataset
+!wget https://paddleclas.bj.bcebos.com/data/PULC/table_attribute.tar
+!tar -xf table_attribute.tar
+%cd ../PaddleClas/dataset
+%cd ../
+```
+
+### 3.2 表格属性识别训练
+表格属性训练整体pipelinie如下：
+
+![](https://user-images.githubusercontent.com/45199522/190599426-3415b38e-e16e-4e68-9253-2ff531b1b5ca.png)
+
+1.训练过程中，图片经过预处理之后，送入到骨干网络之中，骨干网络将抽取表格图片的特征，最终该特征连接输出的FC层，FC层经过Sigmoid激活函数后和真实标签做交叉熵损失函数，优化器通过对该损失函数做梯度下降来更新骨干网络的参数，经过多轮训练后，骨干网络的参数可以对为止图片做很好的预测；
+
+2.推理过程中，图片经过预处理之后，送入到骨干网络之中，骨干网络加载学习好的权重后对该表格图片做出预测，预测的结果为一个6维向量，该向量中的每个元素反映了每个属性对应的概率值，通过对该值进一步卡阈值之后，得到最终的输出，最终的输出描述了该表格的6个属性。
+
+当准备好相关的数据之后，可以一键启动表格属性的训练，训练代码如下：
+
+
+```python
+
+!python tools/train.py -c ./ppcls/configs/PULC/table_attribute/PPLCNet_x1_0.yaml -o Global.device=cpu -o Global.epochs=10
+```
+
+### 3.3 表格属性识别推理和部署
+#### 3.3.1 模型转换
+当训练好模型之后，需要将模型转换为推理模型进行部署。转换脚本如下：
+
+
+```python
+!python tools/export_model.py -c ppcls/configs/PULC/table_attribute/PPLCNet_x1_0.yaml -o Global.pretrained_model=output/PPLCNet_x1_0/best_model
+```
+
+执行以上命令之后，会在当前目录上生成`inference`文件夹，该文件夹中保存了当前精度最高的推理模型。
+
+#### 3.3.2 模型推理
+安装推理需要的paddleclas包, 此时需要通过下载安装paddleclas的develop的whl包
+
+
+
+```python
+!pip install https://paddleclas.bj.bcebos.com/whl/paddleclas-0.0.0-py3-none-any.whl
+```
+
+进入`deploy`目录下即可对模型进行推理
+
+
+```python
+%cd deploy/
+```
+
+推理命令如下：
+
+
+```python
+!python python/predict_cls.py -c configs/PULC/table_attribute/inference_table_attribute.yaml -o Global.inference_model_dir="../inference" -o Global.infer_imgs="../dataset/table_attribute/Table_val/val_9.jpg"
+!python python/predict_cls.py -c configs/PULC/table_attribute/inference_table_attribute.yaml -o Global.inference_model_dir="../inference" -o Global.infer_imgs="../dataset/table_attribute/Table_val/val_3253.jpg"
+```
+
+推理的表格图片：
+
+![](https://user-images.githubusercontent.com/45199522/190596141-74f4feda-b082-46d7-908d-b0bd5839b430.png)
+
+预测结果如下：
+```
+val_9.jpg:	 {'attributes': ['Scanned', 'Little', 'Black-and-White', 'Clear', 'Without-Obstacles', 'Horizontal'], 'output': [1, 1, 1, 1, 1, 1]}
+```
+
+
+推理的表格图片：
+
+![](https://user-images.githubusercontent.com/45199522/190597086-2e685200-22d0-4042-9e46-f61f24e02e4e.png)
+
+预测结果如下：
+```
+val_3253.jpg:	 {'attributes': ['Photo', 'Little', 'Black-and-White', 'Blurry', 'Without-Obstacles', 'Tilted'], 'output': [0, 1, 1, 0, 1, 0]}
+```
+
+对比两张图片可以发现，第一张图片比较清晰，表格属性的结果也偏向于比较容易识别，我们可以更相信表格识别的结果，第二张图片比较模糊，且存在倾斜现象，表格识别可能存在错误，需要我们人工进一步校验。通过表格的属性识别能力，可以进一步将“人工”和“智能”很好的结合起来，为表格识别能力的落地的精度提供保障。
--- a/applications/印章弯曲文字识别.md
+++ b/applications/印章弯曲文字识别.md
--- a/applications/发票关键信息抽取.md
+++ b/applications/发票关键信息抽取.md
@@ -30,7 +30,7 @@ cd PaddleOCR
 # 安装PaddleOCR的依赖
 pip install -r requirements.txt
 # 安装关键信息抽取任务的依赖
-pip install -r ./ppstructure/vqa/requirements.txt
+pip install -r ./ppstructure/kie/requirements.txt
 ```

 ## 4. 关键信息抽取
@@ -94,7 +94,7 @@ VI-LayoutXLM的配置为[ser_vi_layoutxlm_xfund_zh_udml.yml](../configs/kie/vi_l

 ```yml
 Architecture:
-  model_type: &model_type "vqa"
+  model_type: &model_type "kie"
  name: DistillationModel
  algorithm: Distillation
  Models:
@@ -177,7 +177,7 @@ python3 tools/eval.py -c ./fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.
 使用下面的命令进行预测。

 ```bash
-python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/XFUND/zh_val/val.json Global.infer_mode=False
+python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/XFUND/zh_val/val.json Global.infer_mode=False
 ```

 预测结果会保存在配置文件中的`Global.save_res_path`目录中。
@@ -195,7 +195,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect


 ```bash
-python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True
+python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True
 ```

 结果如下所示。
@@ -211,7 +211,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect
 如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型，可以使用下面的方法传入检测与识别的inference 模型路径，即可完成OCR文本检测与识别以及SER的串联过程。

 ```bash
-python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
+python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
 ```

 ### 4.4 关系抽取（Relation Extraction）
@@ -316,7 +316,7 @@ python3 tools/eval.py -c ./fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.c
 # -o 后面的字段是RE任务的配置
 # -c_ser 后面的是SER任务的配置文件
 # -c_ser 后面的字段是SER任务的配置
-python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=False -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy
+python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_trained/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=False -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_trained/best_accuracy
 ```

 预测结果会保存在配置文件中的`Global.save_res_path`目录中。
@@ -333,11 +333,11 @@ python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Archite
 如果希望使用OCR引擎结果得到的结果进行推理，则可以使用下面的命令进行推理。

 ```bash
-python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy
+python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy
 ```

 如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型，可以使用下面的方法传入，即可完成SER + RE的串联过程。

 ```bash
-python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
+python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
 ```
--- a/applications/快速构建卡证类OCR.md
+++ b/applications/快速构建卡证类OCR.md
--- a/applications/扫描合同关键信息提取.md
+++ b/applications/扫描合同关键信息提取.md
+# 金融智能核验：扫描合同关键信息抽取
+
+本案例将使用OCR技术和通用信息抽取技术，实现合同关键信息审核和比对。通过本章的学习，你可以快速掌握：
+
+1. 使用PaddleOCR提取扫描文本内容
+2. 使用PaddleNLP抽取自定义信息
+
+点击进入 [AI Studio 项目](https://aistudio.baidu.com/aistudio/projectdetail/4545772)
+
+## 1. 项目背景
+合同审核广泛应用于大中型企业、上市公司、证券、基金公司中，是规避风险的重要任务。
+- 合同内容对比：合同审核场景中，快速找出不同版本合同修改区域、版本差异；如合同盖章归档场景中有效识别实际签署的纸质合同、电子版合同差异。
+
+- 合规性检查：法务人员进行合同审核，如合同完备性检查、大小写金额检查、签约主体一致性检查、双方权利和义务对等性分析等。
+
+- 风险点识别：通过合同审核可识别事实倾向型风险点和数值计算型风险点等，例如交付地点约定不明、合同总价款不一致、重要条款缺失等风险点。
+
+
+![](https://ai-studio-static-online.cdn.bcebos.com/d5143df967fa4364a38868793fe7c57b0c0b1213930243babd6ae01423dcbc4d)
+
+传统业务中大多使用人工进行纸质版合同审核，存在成本高，工作量大，效率低的问题，且一旦出错将造成巨额损失。
+
+
+本项目针对以上场景，使用PaddleOCR+PaddleNLP快速提取文本内容，经过少量数据微调即可准确抽取关键信息，**高效完成合同内容对比、合规性检查、风险点识别等任务，提高效率，降低风险**。
+
+![](https://ai-studio-static-online.cdn.bcebos.com/54f3053e6e1b47a39b26e757006fe2c44910d60a3809422ab76c25396b92e69b)
+
+
+## 2. 解决方案
+
+### 2.1 扫描合同文本内容提取
+
+使用PaddleOCR开源的模型可以快速完成扫描文档的文本内容提取，在清晰文档上识别准确率可达到95%+。下面来快速体验一下：
+
+#### 2.1.1 环境准备
+
+[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)提供了适用于通用场景的高精轻量模型，提供数据预处理-模型推理-后处理全流程，支持pip安装：
+
+```
+python -m pip install paddleocr
+```
+
+#### 2.1.2 效果测试
+
+使用一张合同图片作为测试样本，感受ppocrv3模型效果：
+
+<img src=https://ai-studio-static-online.cdn.bcebos.com/46258d0dc9dc40bab3ea0e70434e4a905646df8a647f4c49921e217de5142def width=300>
+
+使用中文检测+识别模型提取文本，实例化PaddleOCR类：
+
+```
+from paddleocr import PaddleOCR, draw_ocr
+
+# paddleocr目前支持中英文、英文、法语、德语、韩语、日语等80个语种，可以通过修改lang参数进行切换
+ocr = PaddleOCR(use_angle_cls=False, lang="ch")  # need to run only once to download and load model into memory
+```
+
+一行命令启动预测，预测结果包括`检测框`和`文本识别内容`:
+
+```
+img_path = "./test_img/hetong2.jpg"
+result = ocr.ocr(img_path, cls=False)
+for line in result:
+    print(line)
+
+# 可视化结果
+from PIL import Image
+
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='./simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.show()
+```
+
+#### 2.1.3 图片预处理
+
+通过上图可视化结果可以看到，印章部分造成的文本遮盖，影响了文本识别结果，因此可以考虑通道提取，去除图片中的红色印章：
+
+```
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+
+#读入图像,三通道
+image=cv2.imread("./test_img/hetong2.jpg",cv2.IMREAD_COLOR) #timg.jpeg
+
+#获得三个通道
+Bch,Gch,Rch=cv2.split(image)
+
+#保存三通道图片
+cv2.imwrite('blue_channel.jpg',Bch)
+cv2.imwrite('green_channel.jpg',Gch)
+cv2.imwrite('red_channel.jpg',Rch)
+```
+#### 2.1.4 合同文本信息提取
+
+经过2.1.3的预处理后，合同照片的红色通道被分离，获得了一张相对更干净的图片，此时可以再次使用ppocr模型提取文本内容：
+
+```
+import numpy as np
+import cv2
+
+
+img_path = './red_channel.jpg'
+result = ocr.ocr(img_path, cls=False)
+
+# 可视化结果
+from PIL import Image
+
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='./simfang.ttf')
+im_show = Image.fromarray(im_show)
+vis = np.array(im_show)
+im_show.show()
+```
+
+忽略检测框内容，提取完整的合同文本：
+
+```
+txts = [line[1][0] for line in result]
+all_context = "\n".join(txts)
+print(all_context)
+```
+
+通过以上环节就完成了扫描合同关键信息抽取的第一步：文本内容提取，接下来可以基于识别出的文本内容抽取关键信息
+
+### 2.2 合同关键信息抽取
+
+#### 2.2.1 环境准备
+
+安装PaddleNLP
+
+
+```
+pip install --upgrade pip
+pip install --upgrade paddlenlp
+```
+
+#### 2.2.2 合同关键信息抽取
+
+PaddleNLP 使用 Taskflow 统一管理多场景任务的预测功能，其中`information_extraction` 通过大量的有标签样本进行训练，在通用的场景中一般可以直接使用，只需更换关键字即可。例如在合同信息抽取中，我们重新定义抽取关键字：
+
+甲方、乙方、币种、金额、付款方式
+
+
+将使用OCR提取好的文本作为输入，使用三行命令可以对上文中提取到的合同文本进行关键信息抽取：
+
+```
+from paddlenlp import Taskflow
+schema = ["甲方","乙方","总价"]
+ie = Taskflow('information_extraction', schema=schema)
+ie.set_schema(schema)
+ie(all_context)
+```
+
+可以看到UIE模型可以准确的提取出关键信息，用于后续的信息比对或审核。
+
+## 3.效果优化
+
+### 3.1 文本识别后处理调优
+
+实际图片采集过程中，可能出现部分图片弯曲等问题，导致使用默认参数识别文本时存在漏检，影响关键信息获取。
+
+例如下图:
+
+<img src="https://ai-studio-static-online.cdn.bcebos.com/fe350481be0241c58736d487d1bf06c2e65911bf01254a79944be629c4c10091" height="300" width="300">
+
+
+直接进行预测：
+
+```
+img_path = "./test_img/hetong3.jpg"
+# 预测结果
+result = ocr.ocr(img_path, cls=False)
+# 可视化结果
+from PIL import Image
+
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='./simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.show()
+```
+
+可视化结果可以看到，弯曲图片存在漏检，一般来说可以通过调整后处理参数解决，无需重新训练模型。漏检问题往往是因为检测模型获得的分割图太小，生成框的得分过低被过滤掉了，通常有两种方式调整参数：
+- 开启`use_dilatiion=True` 膨胀分割区域
+- 调小`det_db_box_thresh`阈值
+
+```
+# 重新实例化 PaddleOCR
+ocr = PaddleOCR(use_angle_cls=False, lang="ch", det_db_box_thresh=0.3, use_dilation=True)
+
+# 预测并可视化
+img_path = "./test_img/hetong3.jpg"
+# 预测结果
+result = ocr.ocr(img_path, cls=False)
+# 可视化结果
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='./simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.show()
+```
+
+可以看到漏检问题被很好的解决，提取完整的文本内容：
+
+```
+txts = [line[1][0] for line in result]
+context = "\n".join(txts)
+print(context)
+```
+
+### 3.2 关键信息提取调优
+
+UIE通过大量有标签样本进行训练，得到了一个开箱即用的高精模型。 然而针对不同场景，可能会出现部分实体无法被抽取的情况。通常来说有以下几个方法进行效果调优：
+
+
+- 修改 schema
+- 添加正则方法
+- 标注小样本微调模型
+
+**修改schema**
+
+Prompt和原文描述越像，抽取效果越好，例如
+```
+三：合同价格：总价为人民币大写：参拾玖万捌仟伍佰
+元，小写：398500.00元。总价中包括站房工程建设、安装
+及相关避雷、消防、接地、电力、材料费、检验费、安全、
+验收等所需费用及其他相关费用和税金。
+```
+schema = ["总金额"] 时无法准确抽取，与原文描述差异较大。 修改 schema = ["总价"] 再次尝试：
+
+```
+from paddlenlp import Taskflow
+# schema = ["总金额"]
+schema = ["总价"]
+ie = Taskflow('information_extraction', schema=schema)
+ie.set_schema(schema)
+ie(all_context)
+```
+
+
+**模型微调**
+
+UIE的建模方式主要是通过 `Prompt` 方式来建模， `Prompt` 在小样本上进行微调效果非常有效。详细的数据标注+模型微调步骤可以参考项目：
+
+[PaddleNLP信息抽取技术重磅升级！](https://aistudio.baidu.com/aistudio/projectdetail/3914778?channelType=0&channel=0)
+
+[工单信息抽取](https://aistudio.baidu.com/aistudio/projectdetail/3914778?contributionType=1)
+
+[快递单信息抽取](https://aistudio.baidu.com/aistudio/projectdetail/4038499?contributionType=1)
+
+
+## 总结
+
+扫描合同的关键信息提取可以使用 PaddleOCR + PaddleNLP 组合实现，两个工具均有以下优势：
+
+* 使用简单：whl包一键安装，3行命令调用
+* 效果领先：优秀的模型效果可覆盖几乎全部的应用场景
+* 调优成本低：OCR模型可通过后处理参数的调整适配略有偏差的扫描文本， UIE模型可以通过极少的标注样本微调，成本很低。
+
+## 作业
+
+尝试自己解析出 `test_img/homework.png` 扫描合同中的 [甲方、乙方] 关键词：
+
+
+
+<img src=https://ai-studio-static-online.cdn.bcebos.com/50a49a3c9f8348bfa04e8c8b97d3cce0d0dd6b14040f43939268d120688ef7ca width=300 hight=400>
+
+
+
+更多场景下的垂类模型获取，请扫下图二维码填写问卷，加入PaddleOCR官方交流群获取模型下载链接、《动手学OCR》电子书等全套OCR学习资料🎁
+
+<img src=https://ai-studio-static-online.cdn.bcebos.com/606538b59ea845cb99943b1dec6efe724e78f75c1e9c49228c7bf7da9f8837f5 width=300 hight=300>
--- a/configs/det/det_r18_vd_ct.yml
+++ b/configs/det/det_r18_vd_ct.yml
+Global:
+  use_gpu: true
+  epoch_num: 600
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/det_ct/
+  save_epoch_step: 10
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0,1000]
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet18_vd_pretrained.pdparams
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_en/img623.jpg
+  save_res_path: ./output/det_ct/predicts_ct.txt
+
+Architecture:
+  model_type: det
+  algorithm: CT
+  Transform:
+  Backbone:
+    name: ResNet_vd
+    layers: 18
+  Neck:
+    name: CTFPN
+  Head:
+    name: CT_Head
+    in_channels: 512
+    hidden_dim: 128
+    num_classes: 3
+
+Loss:
+  name: CTLoss
+
+Optimizer:
+  name: Adam
+  lr:  #PolynomialDecay
+    name: Linear 
+    learning_rate: 0.001
+    end_lr: 0.
+    epochs: 600
+    step_each_epoch: 1254
+    power: 0.9
+
+PostProcess:
+  name: CTPostProcess
+  box_type: poly
+
+Metric:
+  name: CTMetric
+  main_indicator: f_score
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/total_text/train
+    label_file_list:
+      - ./train_data/total_text/train/train.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage:
+          img_mode: RGB
+          channel_first: False
+      - CTLabelEncode: # Class handling label
+      - RandomScale:
+      - MakeShrink:
+      - GroupRandomHorizontalFlip:
+      - GroupRandomRotate:
+      - GroupRandomCropPadding:
+      - MakeCentripetalShift:
+      - ColorJitter:
+          brightness: 0.125
+          saturation: 0.5 
+      - ToCHWImage: 
+      - NormalizeImage:
+      - KeepKeys:
+          keep_keys: ['image', 'gt_kernel', 'training_mask', 'gt_instance', 'gt_kernel_instance', 'training_mask_distance', 'gt_distance'] # the order of the dataloader list
+  loader:
+    shuffle: True
+    drop_last: True
+    batch_size_per_card: 4
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/total_text/test
+    label_file_list:
+      - ./train_data/total_text/test/test.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage:
+          img_mode: RGB
+          channel_first: False
+      - CTLabelEncode: # Class handling label
+      - ScaleAlignedShort:
+      - NormalizeImage:
+          order: 'hwc'
+      - ToCHWImage: 
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'texts'] # the order of the dataloader list          
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1
+    num_workers: 2
--- a/configs/e2e/e2e_r50_vd_pg.yml
+++ b/configs/e2e/e2e_r50_vd_pg.yml
@@ -13,6 +13,7 @@ Global:
  save_inference_dir:
  use_visualdl: False
  infer_img:
+  infer_visual_type: EN # two mode: EN is for english datasets, CN is for chinese datasets
  valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words
  save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt
  character_dict_path: ppocr/utils/ic15_dict.txt
@@ -32,6 +33,7 @@ Architecture:
    name: PGFPN
  Head:
    name: PGHead
+    character_dict_path: ppocr/utils/ic15_dict.txt # the same as Global:character_dict_path

 Loss:
  name: PGLoss
@@ -45,16 +47,18 @@ Optimizer:
  beta1: 0.9
  beta2: 0.999
  lr:
+    name: Cosine
    learning_rate: 0.001
+    warmup_epoch: 50
  regularizer:
    name: 'L2'
-    factor: 0
-
+    factor: 0.0001

 PostProcess:
  name: PGPostProcess
  score_thresh: 0.5
  mode: fast   # fast or slow two ways
+  point_gather_mode: align # same as PGProcessTrain: point_gather_mode

 Metric:
  name: E2EMetric
@@ -76,9 +80,12 @@ Train:
      - E2ELabelEncodeTrain:
      - PGProcessTrain:
          batch_size: 14  # same as loader: batch_size_per_card
+          use_resize: True
+          use_random_crop: False
          min_crop_size: 24
          min_text_size: 4
          max_text_size: 512
+          point_gather_mode: align # two mode: align and none, align mode is better than none mode
      - KeepKeys:
          keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
  loader:

--- a/configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml
+++ b/configs/kie/layoutlm_series/re_layoutxlm_xfund_zh.yml
@@ -68,6 +68,7 @@ Train:
      - VQAReTokenRelation:
      - VQAReTokenChunk:
          max_seq_len: *max_seq_len
+      - TensorizeEntitiesRelations:
      - Resize:
          size: [224,224]
      - NormalizeImage:
@@ -83,7 +84,6 @@ Train:
    drop_last: False
    batch_size_per_card: 2
    num_workers: 8
-    collate_fn: ListCollator

 Eval:
  dataset:
@@ -105,6 +105,7 @@ Eval:
      - VQAReTokenRelation:
      - VQAReTokenChunk:
          max_seq_len: *max_seq_len
+      - TensorizeEntitiesRelations:
      - Resize:
          size: [224,224]
      - NormalizeImage:
@@ -120,4 +121,3 @@ Eval:
    drop_last: False
    batch_size_per_card: 8
    num_workers: 8
-    collate_fn: ListCollator
--- a/configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml
+++ b/configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml
@@ -73,6 +73,7 @@ Train:
      - VQAReTokenRelation:
      - VQAReTokenChunk:
          max_seq_len: *max_seq_len
+      - TensorizeEntitiesRelations:
      - Resize:
          size: [224,224]
      - NormalizeImage:
@@ -82,13 +83,12 @@ Train:
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
-          keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order
+          keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'entities', 'relations'] # dataloader will return list in this order
  loader:
    shuffle: True
    drop_last: False
    batch_size_per_card: 2
    num_workers: 4
-    collate_fn: ListCollator

 Eval:
  dataset:
@@ -112,6 +112,7 @@ Eval:
      - VQAReTokenRelation:
      - VQAReTokenChunk:
          max_seq_len: *max_seq_len
+      - TensorizeEntitiesRelations:
      - Resize:
          size: [224,224]
      - NormalizeImage:
@@ -121,11 +122,9 @@ Eval:
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
-          keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order
+          keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'entities', 'relations'] # dataloader will return list in this order
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 8
    num_workers: 8
-    collate_fn: ListCollator
-
--- a/configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh_udml.yml
+++ b/configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh_udml.yml
@@ -57,14 +57,16 @@ Loss:
      mode: "l2"
      model_name_pairs:
        - ["Student", "Teacher"]
-      key: hidden_states_5
+      key: hidden_states
+      index: 5
      name: "loss_5"
  - DistillationVQADistanceLoss:
      weight: 0.5
      mode: "l2"
      model_name_pairs:
        - ["Student", "Teacher"]
-      key: hidden_states_8
+      key: hidden_states
+      index: 8
      name: "loss_8"


@@ -116,6 +118,7 @@ Train:
      - VQAReTokenRelation:
      - VQAReTokenChunk:
          max_seq_len: *max_seq_len
+      - TensorizeEntitiesRelations:
      - Resize:
          size: [224,224]
      - NormalizeImage:
@@ -125,13 +128,12 @@ Train:
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
-          keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order
+          keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'entities', 'relations'] # dataloader will return list in this order
  loader:
    shuffle: True
    drop_last: False
    batch_size_per_card: 2
    num_workers: 4
-    collate_fn: ListCollator

 Eval:
  dataset:
@@ -155,6 +157,7 @@ Eval:
      - VQAReTokenRelation:
      - VQAReTokenChunk:
          max_seq_len: *max_seq_len
+      - TensorizeEntitiesRelations:
      - Resize:
          size: [224,224]
      - NormalizeImage:
@@ -164,12 +167,11 @@ Eval:
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
-          keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order
+          keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'entities', 'relations'] # dataloader will return list in this order
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 8
    num_workers: 8
-    collate_fn: ListCollator


--- a/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml
+++ b/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml
@@ -70,14 +70,16 @@ Loss:
      mode: "l2"
      model_name_pairs:
        - ["Student", "Teacher"]
-      key: hidden_states_5
+      key: hidden_states
+      index: 5
      name: "loss_5"
  - DistillationVQADistanceLoss:
      weight: 0.5
      mode: "l2"
      model_name_pairs:
        - ["Student", "Teacher"]
-      key: hidden_states_8
+      key: hidden_states
+      index: 8
      name: "loss_8"
  
  

--- a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml
+++ b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec.yml
@@ -88,6 +88,7 @@ Train:
        prob: 0.5
        ext_data_num: 2
        image_shape: [48, 320, 3]
+        max_text_length: *max_text_length
    - RecAug:
    - MultiLabelEncode:
    - RecResizeImg:

--- a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml
+++ b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml
@@ -162,6 +162,7 @@ Train:
        prob: 0.5
        ext_data_num: 2
        image_shape: [48, 320, 3]
+        max_text_length: *max_text_length
    - RecAug:
    - MultiLabelEncode:
    - RecResizeImg:

--- a/configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml
+++ b/configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml
@@ -88,6 +88,7 @@ Train:
        prob: 0.5
        ext_data_num: 2
        image_shape: [48, 320, 3]
+        max_text_length: *max_text_length
    - RecAug:
    - MultiLabelEncode:
    - RecResizeImg:

--- a/configs/rec/rec_r31_robustscanner.yml
+++ b/configs/rec/rec_r31_robustscanner.yml
@@ -12,7 +12,7 @@ Global:
  checkpoints: 
  save_inference_dir:
  use_visualdl: False
-  infer_img: ./inference/rec_inference
+  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path: ppocr/utils/dict90.txt
  max_text_length: &max_text_length 40

--- a/configs/rec/rec_r32_gaspin_bilstm_att.yml
+++ b/configs/rec/rec_r32_gaspin_bilstm_att.yml
@@ -12,7 +12,7 @@ Global:
  checkpoints:
  save_inference_dir:
  use_visualdl: False
-  infer_img: doc/imgs_words/ch/word_1.jpg
+  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path: ./ppocr/utils/dict/spin_dict.txt
  max_text_length: 25

--- a/configs/table/SLANet.yml
+++ b/configs/table/SLANet.yml
@@ -12,7 +12,7 @@ Global:
  checkpoints:
  save_inference_dir: ./output/SLANet/infer
  use_visualdl: False
-  infer_img: doc/table/table.jpg
+  infer_img: ppstructure/docs/table/table.jpg
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  character_type: en

--- a/configs/table/SLANet_ch.yml
+++ b/configs/table/SLANet_ch.yml
@@ -12,7 +12,7 @@ Global:
  checkpoints: 
  save_inference_dir: ./output/SLANet_ch/infer
  use_visualdl: False
-  infer_img: doc/table/table.jpg
+  infer_img: ppstructure/docs/table/table.jpg
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
  character_type: en
@@ -107,7 +107,7 @@ Train:
 Eval:
  dataset:
    name: PubTabDataSet
-     data_dir: train_data/table/val/
+    data_dir: train_data/table/val/
    label_file_list: [train_data/table/val.txt]
    transforms:
      - DecodeImage:

--- a/configs/table/table_mv3.yml
+++ b/configs/table/table_mv3.yml
@@ -43,7 +43,6 @@ Architecture:
  Head:
    name: TableAttentionHead
    hidden_size: 256
-    loc_type: 2
    max_text_length: *max_text_length
    loc_reg_num: &loc_reg_num 4


--- a/deploy/cpp_infer/include/args.h
+++ b/deploy/cpp_infer/include/args.h
@@ -49,13 +49,20 @@ DECLARE_int32(rec_batch_num);
 DECLARE_string(rec_char_dict_path);
 DECLARE_int32(rec_img_h);
 DECLARE_int32(rec_img_w);
+// layout model related
+DECLARE_string(layout_model_dir);
+DECLARE_string(layout_dict_path);
+DECLARE_double(layout_score_threshold);
+DECLARE_double(layout_nms_threshold);
 // structure model related
 DECLARE_string(table_model_dir);
 DECLARE_int32(table_max_len);
 DECLARE_int32(table_batch_num);
 DECLARE_string(table_char_dict_path);
+DECLARE_bool(merge_no_span_structure);
 // forward related
 DECLARE_bool(det);
 DECLARE_bool(rec);
 DECLARE_bool(cls);
-DECLARE_bool(table);
\ No newline at end of file
+DECLARE_bool(table);
+DECLARE_bool(layout);
\ No newline at end of file
--- a/deploy/cpp_infer/include/ocr_cls.h
+++ b/deploy/cpp_infer/include/ocr_cls.h
@@ -14,26 +14,12 @@

 #pragma once

-#include "opencv2/core.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/imgproc.hpp"
 #include "paddle_api.h"
 #include "paddle_inference_api.h"
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <ostream>
-#include <vector>
-
-#include <cstring>
-#include <fstream>
-#include <numeric>

 #include <include/preprocess_op.h>
 #include <include/utility.h>

-using namespace paddle_infer;
-
 namespace PaddleOCR {

 class Classifier {
@@ -66,7 +52,7 @@ public:
           std::vector<float> &cls_scores, std::vector<double> &times);

 private:
-  std::shared_ptr<Predictor> predictor_;
+  std::shared_ptr<paddle_infer::Predictor> predictor_;

  bool use_gpu_ = false;
  int gpu_id_ = 0;

--- a/deploy/cpp_infer/include/ocr_det.h
+++ b/deploy/cpp_infer/include/ocr_det.h
@@ -14,26 +14,12 @@

 #pragma once

-#include "opencv2/core.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/imgproc.hpp"
 #include "paddle_api.h"
 #include "paddle_inference_api.h"
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <ostream>
-#include <vector>
-
-#include <cstring>
-#include <fstream>
-#include <numeric>

 #include <include/postprocess_op.h>
 #include <include/preprocess_op.h>

-using namespace paddle_infer;
-
 namespace PaddleOCR {

 class DBDetector {
@@ -41,7 +27,7 @@ public:
  explicit DBDetector(const std::string &model_dir, const bool &use_gpu,
                      const int &gpu_id, const int &gpu_mem,
                      const int &cpu_math_library_num_threads,
-                      const bool &use_mkldnn, const string &limit_type,
+                      const bool &use_mkldnn, const std::string &limit_type,
                      const int &limit_side_len, const double &det_db_thresh,
                      const double &det_db_box_thresh,
                      const double &det_db_unclip_ratio,
@@ -77,7 +63,7 @@ public:
           std::vector<double> &times);

 private:
-  std::shared_ptr<Predictor> predictor_;
+  std::shared_ptr<paddle_infer::Predictor> predictor_;

  bool use_gpu_ = false;
  int gpu_id_ = 0;
@@ -85,7 +71,7 @@ private:
  int cpu_math_library_num_threads_ = 4;
  bool use_mkldnn_ = false;

-  string limit_type_ = "max";
+  std::string limit_type_ = "max";
  int limit_side_len_ = 960;

  double det_db_thresh_ = 0.3;

--- a/deploy/cpp_infer/include/ocr_rec.h
+++ b/deploy/cpp_infer/include/ocr_rec.h
@@ -14,27 +14,12 @@

 #pragma once

-#include "opencv2/core.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/imgproc.hpp"
 #include "paddle_api.h"
 #include "paddle_inference_api.h"
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <ostream>
-#include <vector>
-
-#include <cstring>
-#include <fstream>
-#include <numeric>

 #include <include/ocr_cls.h>
-#include <include/preprocess_op.h>
 #include <include/utility.h>

-using namespace paddle_infer;
-
 namespace PaddleOCR {

 class CRNNRecognizer {
@@ -42,7 +27,7 @@ public:
  explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu,
                          const int &gpu_id, const int &gpu_mem,
                          const int &cpu_math_library_num_threads,
-                          const bool &use_mkldnn, const string &label_path,
+                          const bool &use_mkldnn, const std::string &label_path,
                          const bool &use_tensorrt,
                          const std::string &precision,
                          const int &rec_batch_num, const int &rec_img_h,
@@ -75,7 +60,7 @@ public:
           std::vector<float> &rec_text_scores, std::vector<double> &times);

 private:
-  std::shared_ptr<Predictor> predictor_;
+  std::shared_ptr<paddle_infer::Predictor> predictor_;

  bool use_gpu_ = false;
  int gpu_id_ = 0;

--- a/deploy/cpp_infer/include/paddleocr.h
+++ b/deploy/cpp_infer/include/paddleocr.h
@@ -14,28 +14,9 @@

 #pragma once

-#include "opencv2/core.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/imgproc.hpp"
-#include "paddle_api.h"
-#include "paddle_inference_api.h"
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <ostream>
-#include <vector>
-
-#include <cstring>
-#include <fstream>
-#include <numeric>
-
 #include <include/ocr_cls.h>
 #include <include/ocr_det.h>
 #include <include/ocr_rec.h>
-#include <include/preprocess_op.h>
-#include <include/utility.h>
-
-using namespace paddle_infer;

 namespace PaddleOCR {

@@ -43,21 +24,27 @@ class PPOCR {
 public:
  explicit PPOCR();
  ~PPOCR();
-  std::vector<std::vector<OCRPredictResult>>
-  ocr(std::vector<cv::String> cv_all_img_names, bool det = true,
-      bool rec = true, bool cls = true);
+
+  std::vector<std::vector<OCRPredictResult>> ocr(std::vector<cv::Mat> img_list,
+                                                 bool det = true,
+                                                 bool rec = true,
+                                                 bool cls = true);
+  std::vector<OCRPredictResult> ocr(cv::Mat img, bool det = true,
+                                    bool rec = true, bool cls = true);
+
+  void reset_timer();
+  void benchmark_log(int img_num);

 protected:
-  void det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
-           std::vector<double> &times);
+  std::vector<double> time_info_det = {0, 0, 0};
+  std::vector<double> time_info_rec = {0, 0, 0};
+  std::vector<double> time_info_cls = {0, 0, 0};
+
+  void det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results);
  void rec(std::vector<cv::Mat> img_list,
-           std::vector<OCRPredictResult> &ocr_results,
-           std::vector<double> &times);
+           std::vector<OCRPredictResult> &ocr_results);
  void cls(std::vector<cv::Mat> img_list,
-           std::vector<OCRPredictResult> &ocr_results,
-           std::vector<double> &times);
-  void log(std::vector<double> &det_times, std::vector<double> &rec_times,
-           std::vector<double> &cls_times, int img_num);
+           std::vector<OCRPredictResult> &ocr_results);

 private:
  DBDetector *detector_ = nullptr;

--- a/deploy/cpp_infer/include/paddlestructure.h
+++ b/deploy/cpp_infer/include/paddlestructure.h
@@ -14,27 +14,9 @@

 #pragma once

-#include "opencv2/core.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/imgproc.hpp"
-#include "paddle_api.h"
-#include "paddle_inference_api.h"
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <ostream>
-#include <vector>
-
-#include <cstring>
-#include <fstream>
-#include <numeric>
-
 #include <include/paddleocr.h>
-#include <include/preprocess_op.h>
+#include <include/structure_layout.h>
 #include <include/structure_table.h>
-#include <include/utility.h>
-
-using namespace paddle_infer;

 namespace PaddleOCR {

@@ -42,27 +24,32 @@ class PaddleStructure : public PPOCR {
 public:
  explicit PaddleStructure();
  ~PaddleStructure();
-  std::vector<std::vector<StructurePredictResult>>
-  structure(std::vector<cv::String> cv_all_img_names, bool layout = false,
-            bool table = true);
+
+  std::vector<StructurePredictResult> structure(cv::Mat img,
+                                                bool layout = false,
+                                                bool table = true,
+                                                bool ocr = false);
+
+  void reset_timer();
+  void benchmark_log(int img_num);

 private:
-  StructureTableRecognizer *recognizer_ = nullptr;
+  std::vector<double> time_info_table = {0, 0, 0};
+  std::vector<double> time_info_layout = {0, 0, 0};
+
+  StructureTableRecognizer *table_model_ = nullptr;
+  StructureLayoutRecognizer *layout_model_ = nullptr;
+
+  void layout(cv::Mat img,
+              std::vector<StructurePredictResult> &structure_result);
+
+  void table(cv::Mat img, StructurePredictResult &structure_result);

-  void table(cv::Mat img, StructurePredictResult &structure_result,
-             std::vector<double> &time_info_table,
-             std::vector<double> &time_info_det,
-             std::vector<double> &time_info_rec,
-             std::vector<double> &time_info_cls);
-  std::string
-  rebuild_table(std::vector<std::string> rec_html_tags,
-                std::vector<std::vector<std::vector<int>>> rec_boxes,
-                std::vector<OCRPredictResult> &ocr_result);
+  std::string rebuild_table(std::vector<std::string> rec_html_tags,
+                            std::vector<std::vector<int>> rec_boxes,
+                            std::vector<OCRPredictResult> &ocr_result);

-  float iou(std::vector<std::vector<int>> &box1,
-            std::vector<std::vector<int>> &box2);
-  float dis(std::vector<std::vector<int>> &box1,
-            std::vector<std::vector<int>> &box2);
+  float dis(std::vector<int> &box1, std::vector<int> &box2);

  static bool comparison_dis(const std::vector<float> &dis1,
                             const std::vector<float> &dis2) {

--- a/deploy/cpp_infer/include/postprocess_op.h
+++ b/deploy/cpp_infer/include/postprocess_op.h
@@ -14,24 +14,9 @@

 #pragma once

-#include "opencv2/core.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/imgproc.hpp"
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <ostream>
-#include <vector>
-
-#include <cstring>
-#include <fstream>
-#include <numeric>
-
 #include "include/clipper.h"
 #include "include/utility.h"

-using namespace std;
-
 namespace PaddleOCR {

 class DBPostProcessor {
@@ -92,14 +77,13 @@ private:

 class TablePostProcessor {
 public:
-  void init(std::string label_path);
-  void
-  Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs,
-      std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
-      std::vector<int> &structure_probs_shape,
-      std::vector<std::vector<std::string>> &rec_html_tag_batch,
-      std::vector<std::vector<std::vector<std::vector<int>>>> &rec_boxes_batch,
-      std::vector<int> &width_list, std::vector<int> &height_list);
+  void init(std::string label_path, bool merge_no_span_structure = true);
+  void Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs,
+           std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
+           std::vector<int> &structure_probs_shape,
+           std::vector<std::vector<std::string>> &rec_html_tag_batch,
+           std::vector<std::vector<std::vector<int>>> &rec_boxes_batch,
+           std::vector<int> &width_list, std::vector<int> &height_list);

 private:
  std::vector<std::string> label_list_;
@@ -107,4 +91,27 @@ private:
  std::string beg = "sos";
 };

+class PicodetPostProcessor {
+public:
+  void init(std::string label_path, const double score_threshold = 0.4,
+            const double nms_threshold = 0.5,
+            const std::vector<int> &fpn_stride = {8, 16, 32, 64});
+  void Run(std::vector<StructurePredictResult> &results,
+           std::vector<std::vector<float>> outs, std::vector<int> ori_shape,
+           std::vector<int> resize_shape, int eg_max);
+  std::vector<int> fpn_stride_ = {8, 16, 32, 64};
+
+private:
+  StructurePredictResult disPred2Bbox(std::vector<float> bbox_pred, int label,
+                                      float score, int x, int y, int stride,
+                                      std::vector<int> im_shape, int reg_max);
+  void nms(std::vector<StructurePredictResult> &input_boxes,
+           float nms_threshold);
+
+  std::vector<std::string> label_list_;
+  double score_threshold_ = 0.4;
+  double nms_threshold_ = 0.5;
+  int num_class_ = 5;
+};
+
 } // namespace PaddleOCR
--- a/deploy/cpp_infer/include/preprocess_op.h
+++ b/deploy/cpp_infer/include/preprocess_op.h
@@ -14,21 +14,12 @@

 #pragma once

-#include "opencv2/core.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/imgproc.hpp"
-#include <chrono>
-#include <iomanip>
 #include <iostream>
-#include <ostream>
 #include <vector>

-#include <cstring>
-#include <fstream>
-#include <numeric>
-
-using namespace std;
-using namespace paddle;
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"

 namespace PaddleOCR {

@@ -51,9 +42,9 @@ public:

 class ResizeImgType0 {
 public:
-  virtual void Run(const cv::Mat &img, cv::Mat &resize_img, string limit_type,
-                   int limit_side_len, float &ratio_h, float &ratio_w,
-                   bool use_tensorrt);
+  virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
+                   std::string limit_type, int limit_side_len, float &ratio_h,
+                   float &ratio_w, bool use_tensorrt);
 };

 class CrnnResizeImg {
@@ -82,4 +73,10 @@ public:
                   const int max_len = 488);
 };

+class Resize {
+public:
+  virtual void Run(const cv::Mat &img, cv::Mat &resize_img, const int h,
+                   const int w);
+};
+
 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/include/structure_layout.h
+++ b/deploy/cpp_infer/include/structure_layout.h
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle_api.h"
+#include "paddle_inference_api.h"
+
+#include <include/postprocess_op.h>
+#include <include/preprocess_op.h>
+
+namespace PaddleOCR {
+
+class StructureLayoutRecognizer {
+public:
+  explicit StructureLayoutRecognizer(
+      const std::string &model_dir, const bool &use_gpu, const int &gpu_id,
+      const int &gpu_mem, const int &cpu_math_library_num_threads,
+      const bool &use_mkldnn, const std::string &label_path,
+      const bool &use_tensorrt, const std::string &precision,
+      const double &layout_score_threshold,
+      const double &layout_nms_threshold) {
+    this->use_gpu_ = use_gpu;
+    this->gpu_id_ = gpu_id;
+    this->gpu_mem_ = gpu_mem;
+    this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
+    this->use_mkldnn_ = use_mkldnn;
+    this->use_tensorrt_ = use_tensorrt;
+    this->precision_ = precision;
+
+    this->post_processor_.init(label_path, layout_score_threshold,
+                               layout_nms_threshold);
+    LoadModel(model_dir);
+  }
+
+  // Load Paddle inference model
+  void LoadModel(const std::string &model_dir);
+
+  void Run(cv::Mat img, std::vector<StructurePredictResult> &result,
+           std::vector<double> &times);
+
+private:
+  std::shared_ptr<paddle_infer::Predictor> predictor_;
+
+  bool use_gpu_ = false;
+  int gpu_id_ = 0;
+  int gpu_mem_ = 4000;
+  int cpu_math_library_num_threads_ = 4;
+  bool use_mkldnn_ = false;
+
+  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
+  std::vector<float> scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
+  bool is_scale_ = true;
+
+  bool use_tensorrt_ = false;
+  std::string precision_ = "fp32";
+
+  // pre-process
+  Resize resize_op_;
+  Normalize normalize_op_;
+  Permute permute_op_;
+
+  // post-process
+  PicodetPostProcessor post_processor_;
+};
+
+} // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/include/structure_table.h
+++ b/deploy/cpp_infer/include/structure_table.h
@@ -14,26 +14,11 @@

 #pragma once

-#include "opencv2/core.hpp"
-#include "opencv2/imgcodecs.hpp"
-#include "opencv2/imgproc.hpp"
 #include "paddle_api.h"
 #include "paddle_inference_api.h"
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <ostream>
-#include <vector>
-
-#include <cstring>
-#include <fstream>
-#include <numeric>

 #include <include/postprocess_op.h>
 #include <include/preprocess_op.h>
-#include <include/utility.h>
-
-using namespace paddle_infer;

 namespace PaddleOCR {

@@ -42,9 +27,10 @@ public:
  explicit StructureTableRecognizer(
      const std::string &model_dir, const bool &use_gpu, const int &gpu_id,
      const int &gpu_mem, const int &cpu_math_library_num_threads,
-      const bool &use_mkldnn, const string &label_path,
+      const bool &use_mkldnn, const std::string &label_path,
      const bool &use_tensorrt, const std::string &precision,
-      const int &table_batch_num, const int &table_max_len) {
+      const int &table_batch_num, const int &table_max_len,
+      const bool &merge_no_span_structure) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
@@ -55,7 +41,7 @@ public:
    this->table_batch_num_ = table_batch_num;
    this->table_max_len_ = table_max_len;

-    this->post_processor_.init(label_path);
+    this->post_processor_.init(label_path, merge_no_span_structure);
    LoadModel(model_dir);
  }

@@ -65,11 +51,11 @@ public:
  void Run(std::vector<cv::Mat> img_list,
           std::vector<std::vector<std::string>> &rec_html_tags,
           std::vector<float> &rec_scores,
-           std::vector<std::vector<std::vector<std::vector<int>>>> &rec_boxes,
+           std::vector<std::vector<std::vector<int>>> &rec_boxes,
           std::vector<double> &times);

 private:
-  std::shared_ptr<Predictor> predictor_;
+  std::shared_ptr<paddle_infer::Predictor> predictor_;

  bool use_gpu_ = false;
  int gpu_id_ = 0;

--- a/deploy/cpp_infer/include/utility.h
+++ b/deploy/cpp_infer/include/utility.h
@@ -41,11 +41,13 @@ struct OCRPredictResult {
 };

 struct StructurePredictResult {
-  std::vector<int> box;
+  std::vector<float> box;
+  std::vector<std::vector<int>> cell_box;
  std::string type;
  std::vector<OCRPredictResult> text_res;
  std::string html;
  float html_score = -1;
+  float confidence;
 };

 class Utility {
@@ -56,6 +58,10 @@ public:
                              const std::vector<OCRPredictResult> &ocr_result,
                              const std::string &save_path);

+  static void VisualizeBboxes(const cv::Mat &srcimg,
+                              const StructurePredictResult &structure_result,
+                              const std::string &save_path);
+
  template <class ForwardIterator>
  inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
    return std::distance(first, std::max_element(first, last));
@@ -77,10 +83,20 @@ public:

  static void print_result(const std::vector<OCRPredictResult> &ocr_result);

-  static cv::Mat crop_image(cv::Mat &img, std::vector<int> &area);
+  static cv::Mat crop_image(cv::Mat &img, const std::vector<int> &area);
+  static cv::Mat crop_image(cv::Mat &img, const std::vector<float> &area);

  static void sorted_boxes(std::vector<OCRPredictResult> &ocr_result);

+  static std::vector<int> xyxyxyxy2xyxy(std::vector<std::vector<int>> &box);
+  static std::vector<int> xyxyxyxy2xyxy(std::vector<int> &box);
+
+  static float fast_exp(float x);
+  static std::vector<float>
+  activation_function_softmax(std::vector<float> &src);
+  static float iou(std::vector<int> &box1, std::vector<int> &box2);
+  static float iou(std::vector<float> &box1, std::vector<float> &box2);
+
 private:
  static bool comparison_box(const OCRPredictResult &result1,
                             const OCRPredictResult &result2) {

--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
@@ -174,6 +174,9 @@ inference/
 |-- table
 |   |--inference.pdiparams
 |   |--inference.pdmodel
+|-- layout
+|   |--inference.pdiparams
+|   |--inference.pdmodel
 ```


@@ -278,8 +281,30 @@ Specifically,
    --cls=true \
 ```

+##### 7. layout+table
+```shell
+./build/ppocr --det_model_dir=inference/det_db \
+    --rec_model_dir=inference/rec_rcnn \
+    --table_model_dir=inference/table \
+    --image_dir=../../ppstructure/docs/table/table.jpg \
+    --layout_model_dir=inference/layout \
+    --type=structure \
+    --table=true \
+    --layout=true
+```
+
+##### 8. layout
+```shell
+./build/ppocr --layout_model_dir=inference/layout \
+    --image_dir=../../ppstructure/docs/table/1.png \
+    --type=structure \
+    --table=false \
+    --layout=true \
+    --det=false \
+    --rec=false
+```

-##### 7. table
+##### 9. table
 ```shell
 ./build/ppocr --det_model_dir=inference/det_db \
    --rec_model_dir=inference/rec_rcnn \
@@ -343,6 +368,16 @@ More parameters are as follows,
 |rec_img_h|int|48|image height of recognition|
 |rec_img_w|int|320|image width of recognition|

+- Layout related parameters
+
+|parameter|data type|default|meaning|
+| :---: | :---: | :---: | :---: |
+|layout_model_dir|string|-| Address of layout inference model|
+|layout_dict_path|string|../../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt|dictionary file|
+|layout_score_threshold|float|0.5|Threshold of score.|
+|layout_nms_threshold|float|0.5|Threshold of nms.|
+
+
 - Table recognition related parameters

 |parameter|data type|default|meaning|
@@ -350,6 +385,7 @@ More parameters are as follows,
 |table_model_dir|string|-|Address of table recognition inference model|
 |table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict.txt|dictionary file|
 |table_max_len|int|488|The size of the long side of the input image of the table recognition model, the final input image size of the network is（table_max_len，table_max_len）|
+|merge_no_span_structure|bool|true|Whether to merge <td> and </td> to <td></td|


 * Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `rec_char_dict_path` and `rec_model_dir`.
@@ -367,11 +403,51 @@ predict img: ../../doc/imgs/12.jpg
 The detection visualized image saved in ./output//12.jpg
 ```

- table
+- layout+table

 ```bash
-predict img: ../../ppstructure/docs/table/table.jpg
-0       type: table, region: [0,0,371,293], res: <html><body><table><thead><tr><td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN [3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
+predict img: ../../ppstructure/docs/table/1.png
+0       type: text, region: [12,729,410,848], score: 0.781044, res: count of ocr result is : 7
+********** print ocr result **********
+0       det boxes: [[4,1],[79,1],[79,12],[4,12]] rec text: CTW1500. rec score: 0.769472
+...
+6       det boxes: [[4,99],[391,99],[391,112],[4,112]] rec text: sate-of-the-artmethods[12.34.36l.ourapproachachieves rec score: 0.90414
+********** end print ocr result **********
+1       type: text, region: [69,342,342,359], score: 0.703666, res: count of ocr result is : 1
+********** print ocr result **********
+0       det boxes: [[8,2],[269,2],[269,13],[8,13]] rec text: Table6.Experimentalresults on CTW-1500 rec score: 0.890454
+********** end print ocr result **********
+2       type: text, region: [70,316,706,332], score: 0.659738, res: count of ocr result is : 2
+********** print ocr result **********
+0       det boxes: [[373,2],[630,2],[630,11],[373,11]] rec text: oroposals.andthegreencontoursarefinal rec score: 0.919729
+1       det boxes: [[8,3],[357,3],[357,11],[8,11]] rec text: Visualexperimentalresultshebluecontoursareboundar rec score: 0.915963
+********** end print ocr result **********
+3       type: text, region: [489,342,789,359], score: 0.630538, res: count of ocr result is : 1
+********** print ocr result **********
+0       det boxes: [[8,2],[294,2],[294,14],[8,14]] rec text: Table7.Experimentalresults onMSRA-TD500 rec score: 0.942251
+********** end print ocr result **********
+4       type: text, region: [444,751,841,848], score: 0.607345, res: count of ocr result is : 5
+********** print ocr result **********
+0       det boxes: [[19,3],[389,3],[389,17],[19,17]] rec text: Inthispaper,weproposeanovel adaptivebound rec score: 0.941031
+1       det boxes: [[4,22],[390,22],[390,36],[4,36]] rec text: aryproposalnetworkforarbitraryshapetextdetection rec score: 0.960172
+2       det boxes: [[4,42],[392,42],[392,56],[4,56]] rec text: whichadoptanboundaryproposalmodeltogeneratecoarse rec score: 0.934647
+3       det boxes: [[4,61],[389,61],[389,75],[4,75]] rec text: ooundaryproposals,andthenadoptanadaptiveboundary rec score: 0.946296
+4       det boxes: [[5,80],[387,80],[387,93],[5,93]] rec text: leformationmodelcombinedwithGCNandRNNtoper rec score: 0.952401
+********** end print ocr result **********
+5       type: title, region: [444,705,564,724], score: 0.785429, res: count of ocr result is : 1
+********** print ocr result **********
+0       det boxes: [[6,2],[113,2],[113,14],[6,14]] rec text: 5.Conclusion rec score: 0.856903
+********** end print ocr result **********
+6       type: table, region: [14,360,402,711], score: 0.963643, res: <html><body><table><thead><tr><td>Methods</td><td>Ext</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>TextSnake [18]</td><td>Syn</td><td>85.3</td><td>67.9</td><td>75.6</td><td></td></tr><tr><td>CSE [17]</td><td>MiLT</td><td>76.1</td><td>78.7</td><td>77.4</td><td>0.38</td></tr><tr><td>LOMO[40]</td><td>Syn</td><td>76.5</td><td>85.7</td><td>80.8</td><td>4.4</td></tr><tr><td>ATRR[35]</td><td>Sy-</td><td>80.2</td><td>80.1</td><td>80.1</td><td>-</td></tr><tr><td>SegLink++ [28]</td><td>Syn</td><td>79.8</td><td>82.8</td><td>81.3</td><td>-</td></tr><tr><td>TextField [37]</td><td>Syn</td><td>79.8</td><td>83.0</td><td>81.4</td><td>6.0</td></tr><tr><td>MSR[38]</td><td>Syn</td><td>79.0</td><td>84.1</td><td>81.5</td><td>4.3</td></tr><tr><td>PSENet-1s [33]</td><td>MLT</td><td>79.7</td><td>84.8</td><td>82.2</td><td>3.9</td></tr><tr><td>DB [12]</td><td>Syn</td><td>80.2</td><td>86.9</td><td>83.4</td><td>22.0</td></tr><tr><td>CRAFT [2]</td><td>Syn</td><td>81.1</td><td>86.0</td><td>83.5</td><td>-</td></tr><tr><td>TextDragon [5]</td><td>MLT+</td><td>82.8</td><td>84.5</td><td>83.6</td><td></td></tr><tr><td>PAN [34]</td><td>Syn</td><td>81.2</td><td>86.4</td><td>83.7</td><td>39.8</td></tr><tr><td>ContourNet [36]</td><td></td><td>84.1</td><td>83.7</td><td>83.9</td><td>4.5</td></tr><tr><td>DRRG [41]</td><td>MLT</td><td>83.02</td><td>85.93</td><td>84.45</td><td>-</td></tr><tr><td>TextPerception[23]</td><td>Syn</td><td>81.9</td><td>87.5</td><td>84.6</td><td></td></tr><tr><td>Ours</td><td> Syn</td><td>80.57</td><td>87.66</td><td>83.97</td><td>12.08</td></tr><tr><td>Ours</td><td></td><td>81.45</td><td>87.81</td><td>84.51</td><td>12.15</td></tr><tr><td>Ours</td><td>MLT</td><td>83.60</td><td>86.45</td><td>85.00</td><td>12.21</td></tr></tbody></table></body></html>
+The table visualized image saved in ./output//6_1.png
+7       type: table, region: [462,359,820,657], score: 0.953917, res: <html><body><table><thead><tr><td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN[3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>:</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td></td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
+The table visualized image saved in ./output//7_1.png
+8       type: figure, region: [14,3,836,310], score: 0.969443, res: count of ocr result is : 26
+********** print ocr result **********
+0       det boxes: [[506,14],[539,15],[539,22],[506,21]] rec text: E rec score: 0.318073
+...
+25      det boxes: [[680,290],[759,288],[759,303],[680,305]] rec text: (d) CTW1500 rec score: 0.95911
+********** end print ocr result **********
 ```

 <a name="3"></a>

--- a/deploy/cpp_infer/readme_ch.md
+++ b/deploy/cpp_infer/readme_ch.md
@@ -184,6 +184,9 @@ inference/
 |-- table
 |   |--inference.pdiparams
 |   |--inference.pdmodel
+|-- layout
+|   |--inference.pdiparams
+|   |--inference.pdmodel
 ```

 <a name="22"></a>
@@ -288,7 +291,30 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
    --cls=true \
 ```

-##### 7. 表格识别
+##### 7. 版面分析+表格识别
+```shell
+./build/ppocr --det_model_dir=inference/det_db \
+    --rec_model_dir=inference/rec_rcnn \
+    --table_model_dir=inference/table \
+    --image_dir=../../ppstructure/docs/table/table.jpg \
+    --layout_model_dir=inference/layout \
+    --type=structure \
+    --table=true \
+    --layout=true
+```
+
+##### 8. 版面分析
+```shell
+./build/ppocr --layout_model_dir=inference/layout \
+    --image_dir=../../ppstructure/docs/table/1.png \
+    --type=structure \
+    --table=false \
+    --layout=true \
+    --det=false \
+    --rec=false
+```
+
+##### 9. 表格识别
 ```shell
 ./build/ppocr --det_model_dir=inference/det_db \
    --rec_model_dir=inference/rec_rcnn \
@@ -352,13 +378,24 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 |rec_img_w|int|320|文字识别模型输入图像宽度|


+- 版面分析模型相关
+
+|参数名称|类型|默认参数|意义|
+| :---: | :---: | :---: | :---: |
+|layout_model_dir|string|-|版面分析模型inference model地址|
+|layout_dict_path|string|../../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt|字典文件|
+|layout_score_threshold|float|0.5|检测框的分数阈值|
+|layout_nms_threshold|float|0.5|nms的阈值|
+
+
 - 表格识别模型相关

 |参数名称|类型|默认参数|意义|
 | :---: | :---: | :---: | :---: |
 |table_model_dir|string|-|表格识别模型inference model地址|
-|table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict.txt|字典文件|
+|table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict_ch.txt|字典文件|
 |table_max_len|int|488|表格识别模型输入图像长边大小，最终网络输入图像大小为（table_max_len，table_max_len）|
+|merge_no_span_structure|bool|true|是否合并<td> 和 </td> 为<td></td>|


 * PaddleOCR也支持多语言的预测，更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分，如果希望进行多语言预测，只需将修改`rec_char_dict_path`（字典文件路径）以及`rec_model_dir`（inference模型路径）字段即可。
@@ -377,11 +414,51 @@ predict img: ../../doc/imgs/12.jpg
 The detection visualized image saved in ./output//12.jpg
 ```

- table
+- layout+table

 ```bash
-predict img: ../../ppstructure/docs/table/table.jpg
-0       type: table, region: [0,0,371,293], res: <html><body><table><thead><tr><td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN [3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
+predict img: ../../ppstructure/docs/table/1.png
+0       type: text, region: [12,729,410,848], score: 0.781044, res: count of ocr result is : 7
+********** print ocr result **********
+0       det boxes: [[4,1],[79,1],[79,12],[4,12]] rec text: CTW1500. rec score: 0.769472
+...
+6       det boxes: [[4,99],[391,99],[391,112],[4,112]] rec text: sate-of-the-artmethods[12.34.36l.ourapproachachieves rec score: 0.90414
+********** end print ocr result **********
+1       type: text, region: [69,342,342,359], score: 0.703666, res: count of ocr result is : 1
+********** print ocr result **********
+0       det boxes: [[8,2],[269,2],[269,13],[8,13]] rec text: Table6.Experimentalresults on CTW-1500 rec score: 0.890454
+********** end print ocr result **********
+2       type: text, region: [70,316,706,332], score: 0.659738, res: count of ocr result is : 2
+********** print ocr result **********
+0       det boxes: [[373,2],[630,2],[630,11],[373,11]] rec text: oroposals.andthegreencontoursarefinal rec score: 0.919729
+1       det boxes: [[8,3],[357,3],[357,11],[8,11]] rec text: Visualexperimentalresultshebluecontoursareboundar rec score: 0.915963
+********** end print ocr result **********
+3       type: text, region: [489,342,789,359], score: 0.630538, res: count of ocr result is : 1
+********** print ocr result **********
+0       det boxes: [[8,2],[294,2],[294,14],[8,14]] rec text: Table7.Experimentalresults onMSRA-TD500 rec score: 0.942251
+********** end print ocr result **********
+4       type: text, region: [444,751,841,848], score: 0.607345, res: count of ocr result is : 5
+********** print ocr result **********
+0       det boxes: [[19,3],[389,3],[389,17],[19,17]] rec text: Inthispaper,weproposeanovel adaptivebound rec score: 0.941031
+1       det boxes: [[4,22],[390,22],[390,36],[4,36]] rec text: aryproposalnetworkforarbitraryshapetextdetection rec score: 0.960172
+2       det boxes: [[4,42],[392,42],[392,56],[4,56]] rec text: whichadoptanboundaryproposalmodeltogeneratecoarse rec score: 0.934647
+3       det boxes: [[4,61],[389,61],[389,75],[4,75]] rec text: ooundaryproposals,andthenadoptanadaptiveboundary rec score: 0.946296
+4       det boxes: [[5,80],[387,80],[387,93],[5,93]] rec text: leformationmodelcombinedwithGCNandRNNtoper rec score: 0.952401
+********** end print ocr result **********
+5       type: title, region: [444,705,564,724], score: 0.785429, res: count of ocr result is : 1
+********** print ocr result **********
+0       det boxes: [[6,2],[113,2],[113,14],[6,14]] rec text: 5.Conclusion rec score: 0.856903
+********** end print ocr result **********
+6       type: table, region: [14,360,402,711], score: 0.963643, res: <html><body><table><thead><tr><td>Methods</td><td>Ext</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>TextSnake [18]</td><td>Syn</td><td>85.3</td><td>67.9</td><td>75.6</td><td></td></tr><tr><td>CSE [17]</td><td>MiLT</td><td>76.1</td><td>78.7</td><td>77.4</td><td>0.38</td></tr><tr><td>LOMO[40]</td><td>Syn</td><td>76.5</td><td>85.7</td><td>80.8</td><td>4.4</td></tr><tr><td>ATRR[35]</td><td>Sy-</td><td>80.2</td><td>80.1</td><td>80.1</td><td>-</td></tr><tr><td>SegLink++ [28]</td><td>Syn</td><td>79.8</td><td>82.8</td><td>81.3</td><td>-</td></tr><tr><td>TextField [37]</td><td>Syn</td><td>79.8</td><td>83.0</td><td>81.4</td><td>6.0</td></tr><tr><td>MSR[38]</td><td>Syn</td><td>79.0</td><td>84.1</td><td>81.5</td><td>4.3</td></tr><tr><td>PSENet-1s [33]</td><td>MLT</td><td>79.7</td><td>84.8</td><td>82.2</td><td>3.9</td></tr><tr><td>DB [12]</td><td>Syn</td><td>80.2</td><td>86.9</td><td>83.4</td><td>22.0</td></tr><tr><td>CRAFT [2]</td><td>Syn</td><td>81.1</td><td>86.0</td><td>83.5</td><td>-</td></tr><tr><td>TextDragon [5]</td><td>MLT+</td><td>82.8</td><td>84.5</td><td>83.6</td><td></td></tr><tr><td>PAN [34]</td><td>Syn</td><td>81.2</td><td>86.4</td><td>83.7</td><td>39.8</td></tr><tr><td>ContourNet [36]</td><td></td><td>84.1</td><td>83.7</td><td>83.9</td><td>4.5</td></tr><tr><td>DRRG [41]</td><td>MLT</td><td>83.02</td><td>85.93</td><td>84.45</td><td>-</td></tr><tr><td>TextPerception[23]</td><td>Syn</td><td>81.9</td><td>87.5</td><td>84.6</td><td></td></tr><tr><td>Ours</td><td> Syn</td><td>80.57</td><td>87.66</td><td>83.97</td><td>12.08</td></tr><tr><td>Ours</td><td></td><td>81.45</td><td>87.81</td><td>84.51</td><td>12.15</td></tr><tr><td>Ours</td><td>MLT</td><td>83.60</td><td>86.45</td><td>85.00</td><td>12.21</td></tr></tbody></table></body></html>
+The table visualized image saved in ./output//6_1.png
+7       type: table, region: [462,359,820,657], score: 0.953917, res: <html><body><table><thead><tr><td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN[3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>:</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td></td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
+The table visualized image saved in ./output//7_1.png
+8       type: figure, region: [14,3,836,310], score: 0.969443, res: count of ocr result is : 26
+********** print ocr result **********
+0       det boxes: [[506,14],[539,15],[539,22],[506,21]] rec text: E rec score: 0.318073
+...
+25      det boxes: [[680,290],[759,288],[759,303],[680,305]] rec text: (d) CTW1500 rec score: 0.95911
+********** end print ocr result **********
 ```

 <a name="3"></a>

--- a/deploy/cpp_infer/src/args.cpp
+++ b/deploy/cpp_infer/src/args.cpp
@@ -51,16 +51,26 @@ DEFINE_string(rec_char_dict_path, "../../ppocr/utils/ppocr_keys_v1.txt",
 DEFINE_int32(rec_img_h, 48, "rec image height");
 DEFINE_int32(rec_img_w, 320, "rec image width");

+// layout model related
+DEFINE_string(layout_model_dir, "", "Path of table layout inference model.");
+DEFINE_string(layout_dict_path,
+              "../../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt",
+              "Path of dictionary.");
+DEFINE_double(layout_score_threshold, 0.5, "Threshold of score.");
+DEFINE_double(layout_nms_threshold, 0.5, "Threshold of nms.");
 // structure model related
 DEFINE_string(table_model_dir, "", "Path of table struture inference model.");
 DEFINE_int32(table_max_len, 488, "max len size of input image.");
 DEFINE_int32(table_batch_num, 1, "table_batch_num.");
+DEFINE_bool(merge_no_span_structure, true,
+            "Whether merge <td> and </td> to <td></td>");
 DEFINE_string(table_char_dict_path,
-              "../../ppocr/utils/dict/table_structure_dict.txt",
+              "../../ppocr/utils/dict/table_structure_dict_ch.txt",
              "Path of dictionary.");

 // ocr forward related
 DEFINE_bool(det, true, "Whether use det in forward.");
 DEFINE_bool(rec, true, "Whether use rec in forward.");
 DEFINE_bool(cls, false, "Whether use cls in forward.");
-DEFINE_bool(table, false, "Whether use table structure in forward.");
\ No newline at end of file
+DEFINE_bool(table, false, "Whether use table structure in forward.");
+DEFINE_bool(layout, false, "Whether use layout analysis in forward.");
\ No newline at end of file
--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -65,9 +65,18 @@ void check_params() {
      exit(1);
    }
  }
+  if (FLAGS_layout) {
+    if (FLAGS_layout_model_dir.empty() || FLAGS_image_dir.empty()) {
+      std::cout << "Usage[layout]: ./ppocr "
+                << "--layout_model_dir=/PATH/TO/LAYOUT_INFERENCE_MODEL/ "
+                << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
+      exit(1);
+    }
+  }
  if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" &&
      FLAGS_precision != "int8") {
-    cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl;
+    std::cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. "
+              << std::endl;
    exit(1);
  }
 }
@@ -75,65 +84,94 @@ void check_params() {
 void ocr(std::vector<cv::String> &cv_all_img_names) {
  PPOCR ocr = PPOCR();

-  std::vector<std::vector<OCRPredictResult>> ocr_results =
-      ocr.ocr(cv_all_img_names, FLAGS_det, FLAGS_rec, FLAGS_cls);
+  if (FLAGS_benchmark) {
+    ocr.reset_timer();
+  }

+  std::vector<cv::Mat> img_list;
+  std::vector<cv::String> img_names;
  for (int i = 0; i < cv_all_img_names.size(); ++i) {
-    if (FLAGS_benchmark) {
-      cout << cv_all_img_names[i] << '\t';
-      if (FLAGS_rec && FLAGS_det) {
-        Utility::print_result(ocr_results[i]);
-      } else if (FLAGS_det) {
-        for (int n = 0; n < ocr_results[i].size(); n++) {
-          for (int m = 0; m < ocr_results[i][n].box.size(); m++) {
-            cout << ocr_results[i][n].box[m][0] << ' '
-                 << ocr_results[i][n].box[m][1] << ' ';
-          }
-        }
-        cout << endl;
-      } else {
-        Utility::print_result(ocr_results[i]);
-      }
-    } else {
-      cout << cv_all_img_names[i] << "\n";
-      Utility::print_result(ocr_results[i]);
-      if (FLAGS_visualize && FLAGS_det) {
-        cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
-        if (!srcimg.data) {
-          std::cerr << "[ERROR] image read failed! image path: "
-                    << cv_all_img_names[i] << endl;
-          exit(1);
-        }
-        std::string file_name = Utility::basename(cv_all_img_names[i]);
+    cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
+    if (!img.data) {
+      std::cerr << "[ERROR] image read failed! image path: "
+                << cv_all_img_names[i] << std::endl;
+      continue;
+    }
+    img_list.push_back(img);
+    img_names.push_back(cv_all_img_names[i]);
+  }

-        Utility::VisualizeBboxes(srcimg, ocr_results[i],
-                                 FLAGS_output + "/" + file_name);
-      }
-      cout << "***************************" << endl;
+  std::vector<std::vector<OCRPredictResult>> ocr_results =
+      ocr.ocr(img_list, FLAGS_det, FLAGS_rec, FLAGS_cls);
+
+  for (int i = 0; i < img_names.size(); ++i) {
+    std::cout << "predict img: " << cv_all_img_names[i] << std::endl;
+    Utility::print_result(ocr_results[i]);
+    if (FLAGS_visualize && FLAGS_det) {
+      std::string file_name = Utility::basename(img_names[i]);
+      cv::Mat srcimg = img_list[i];
+      Utility::VisualizeBboxes(srcimg, ocr_results[i],
+                               FLAGS_output + "/" + file_name);
    }
  }
+  if (FLAGS_benchmark) {
+    ocr.benchmark_log(cv_all_img_names.size());
+  }
 }

 void structure(std::vector<cv::String> &cv_all_img_names) {
  PaddleOCR::PaddleStructure engine = PaddleOCR::PaddleStructure();
-  std::vector<std::vector<StructurePredictResult>> structure_results =
-      engine.structure(cv_all_img_names, false, FLAGS_table);
+
+  if (FLAGS_benchmark) {
+    engine.reset_timer();
+  }
+
  for (int i = 0; i < cv_all_img_names.size(); i++) {
-    cout << "predict img: " << cv_all_img_names[i] << endl;
-    for (int j = 0; j < structure_results[i].size(); j++) {
-      std::cout << j << "\ttype: " << structure_results[i][j].type
+    std::cout << "predict img: " << cv_all_img_names[i] << std::endl;
+    cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
+    if (!img.data) {
+      std::cerr << "[ERROR] image read failed! image path: "
+                << cv_all_img_names[i] << std::endl;
+      continue;
+    }
+
+    std::vector<StructurePredictResult> structure_results = engine.structure(
+        img, FLAGS_layout, FLAGS_table, FLAGS_det && FLAGS_rec);
+
+    for (int j = 0; j < structure_results.size(); j++) {
+      std::cout << j << "\ttype: " << structure_results[j].type
                << ", region: [";
-      std::cout << structure_results[i][j].box[0] << ","
-                << structure_results[i][j].box[1] << ","
-                << structure_results[i][j].box[2] << ","
-                << structure_results[i][j].box[3] << "], res: ";
-      if (structure_results[i][j].type == "table") {
-        std::cout << structure_results[i][j].html << std::endl;
+      std::cout << structure_results[j].box[0] << ","
+                << structure_results[j].box[1] << ","
+                << structure_results[j].box[2] << ","
+                << structure_results[j].box[3] << "], score: ";
+      std::cout << structure_results[j].confidence << ", res: ";
+
+      if (structure_results[j].type == "table") {
+        std::cout << structure_results[j].html << std::endl;
+        if (structure_results[j].cell_box.size() > 0 && FLAGS_visualize) {
+          std::string file_name = Utility::basename(cv_all_img_names[i]);
+
+          Utility::VisualizeBboxes(img, structure_results[j],
+                                   FLAGS_output + "/" + std::to_string(j) +
+                                       "_" + file_name);
+        }
      } else {
-        Utility::print_result(structure_results[i][j].text_res);
+        std::cout << "count of ocr result is : "
+                  << structure_results[j].text_res.size() << std::endl;
+        if (structure_results[j].text_res.size() > 0) {
+          std::cout << "********** print ocr result "
+                    << "**********" << std::endl;
+          Utility::print_result(structure_results[j].text_res);
+          std::cout << "********** end print ocr result "
+                    << "**********" << std::endl;
+        }
      }
    }
  }
+  if (FLAGS_benchmark) {
+    engine.benchmark_log(cv_all_img_names.size());
+  }
 }

 int main(int argc, char **argv) {
@@ -143,19 +181,22 @@ int main(int argc, char **argv) {

  if (!Utility::PathExists(FLAGS_image_dir)) {
    std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir
-              << endl;
+              << std::endl;
    exit(1);
  }

  std::vector<cv::String> cv_all_img_names;
  cv::glob(FLAGS_image_dir, cv_all_img_names);
-  std::cout << "total images num: " << cv_all_img_names.size() << endl;
+  std::cout << "total images num: " << cv_all_img_names.size() << std::endl;

+  if (!Utility::PathExists(FLAGS_output)) {
+    Utility::CreateDir(FLAGS_output);
+  }
  if (FLAGS_type == "ocr") {
    ocr(cv_all_img_names);
  } else if (FLAGS_type == "structure") {
    structure(cv_all_img_names);
  } else {
-    std::cout << "only value in ['ocr','structure'] is supported" << endl;
+    std::cout << "only value in ['ocr','structure'] is supported" << std::endl;
  }
 }
--- a/deploy/cpp_infer/src/ocr_cls.cpp
+++ b/deploy/cpp_infer/src/ocr_cls.cpp
@@ -32,7 +32,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list,
  for (int beg_img_no = 0; beg_img_no < img_num;
       beg_img_no += this->cls_batch_num_) {
    auto preprocess_start = std::chrono::steady_clock::now();
-    int end_img_no = min(img_num, beg_img_no + this->cls_batch_num_);
+    int end_img_no = std::min(img_num, beg_img_no + this->cls_batch_num_);
    int batch_num = end_img_no - beg_img_no;
    // preprocess
    std::vector<cv::Mat> norm_img_batch;
@@ -97,7 +97,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list,
 }

 void Classifier::LoadModel(const std::string &model_dir) {
-  AnalysisConfig config;
+  paddle_infer::Config config;
  config.SetModel(model_dir + "/inference.pdmodel",
                  model_dir + "/inference.pdiparams");

@@ -112,6 +112,11 @@ void Classifier::LoadModel(const std::string &model_dir) {
        precision = paddle_infer::Config::Precision::kInt8;
      }
      config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
+      if (!Utility::PathExists("./trt_cls_shape.txt")) {
+        config.CollectShapeRangeInfo("./trt_cls_shape.txt");
+      } else {
+        config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true);
+      }
    }
  } else {
    config.DisableGpu();
@@ -131,6 +136,6 @@ void Classifier::LoadModel(const std::string &model_dir) {
  config.EnableMemoryOptim();
  config.DisableGlogInfo();

-  this->predictor_ = CreatePredictor(config);
+  this->predictor_ = paddle_infer::CreatePredictor(config);
 }
 } // namespace PaddleOCR
--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -32,49 +32,12 @@ void DBDetector::LoadModel(const std::string &model_dir) {
      if (this->precision_ == "int8") {
        precision = paddle_infer::Config::Precision::kInt8;
      }
-      config.EnableTensorRtEngine(1 << 20, 1, 20, precision, false, false);
-      std::map<std::string, std::vector<int>> min_input_shape = {
-          {"x", {1, 3, 50, 50}},
-          {"conv2d_92.tmp_0", {1, 120, 20, 20}},
-          {"conv2d_91.tmp_0", {1, 24, 10, 10}},
-          {"conv2d_59.tmp_0", {1, 96, 20, 20}},
-          {"nearest_interp_v2_1.tmp_0", {1, 256, 10, 10}},
-          {"nearest_interp_v2_2.tmp_0", {1, 256, 20, 20}},
-          {"conv2d_124.tmp_0", {1, 256, 20, 20}},
-          {"nearest_interp_v2_3.tmp_0", {1, 64, 20, 20}},
-          {"nearest_interp_v2_4.tmp_0", {1, 64, 20, 20}},
-          {"nearest_interp_v2_5.tmp_0", {1, 64, 20, 20}},
-          {"elementwise_add_7", {1, 56, 2, 2}},
-          {"nearest_interp_v2_0.tmp_0", {1, 256, 2, 2}}};
-      std::map<std::string, std::vector<int>> max_input_shape = {
-          {"x", {1, 3, 1536, 1536}},
-          {"conv2d_92.tmp_0", {1, 120, 400, 400}},
-          {"conv2d_91.tmp_0", {1, 24, 200, 200}},
-          {"conv2d_59.tmp_0", {1, 96, 400, 400}},
-          {"nearest_interp_v2_1.tmp_0", {1, 256, 200, 200}},
-          {"nearest_interp_v2_2.tmp_0", {1, 256, 400, 400}},
-          {"conv2d_124.tmp_0", {1, 256, 400, 400}},
-          {"nearest_interp_v2_3.tmp_0", {1, 64, 400, 400}},
-          {"nearest_interp_v2_4.tmp_0", {1, 64, 400, 400}},
-          {"nearest_interp_v2_5.tmp_0", {1, 64, 400, 400}},
-          {"elementwise_add_7", {1, 56, 400, 400}},
-          {"nearest_interp_v2_0.tmp_0", {1, 256, 400, 400}}};
-      std::map<std::string, std::vector<int>> opt_input_shape = {
-          {"x", {1, 3, 640, 640}},
-          {"conv2d_92.tmp_0", {1, 120, 160, 160}},
-          {"conv2d_91.tmp_0", {1, 24, 80, 80}},
-          {"conv2d_59.tmp_0", {1, 96, 160, 160}},
-          {"nearest_interp_v2_1.tmp_0", {1, 256, 80, 80}},
-          {"nearest_interp_v2_2.tmp_0", {1, 256, 160, 160}},
-          {"conv2d_124.tmp_0", {1, 256, 160, 160}},
-          {"nearest_interp_v2_3.tmp_0", {1, 64, 160, 160}},
-          {"nearest_interp_v2_4.tmp_0", {1, 64, 160, 160}},
-          {"nearest_interp_v2_5.tmp_0", {1, 64, 160, 160}},
-          {"elementwise_add_7", {1, 56, 40, 40}},
-          {"nearest_interp_v2_0.tmp_0", {1, 256, 40, 40}}};
-
-      config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
-                                    opt_input_shape);
+      config.EnableTensorRtEngine(1 << 30, 1, 20, precision, false, false);
+      if (!Utility::PathExists("./trt_det_shape.txt")) {
+        config.CollectShapeRangeInfo("./trt_det_shape.txt");
+      } else {
+        config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true);
+      }
    }
  } else {
    config.DisableGpu();
@@ -95,7 +58,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
  config.EnableMemoryOptim();
  // config.DisableGlogInfo();

-  this->predictor_ = CreatePredictor(config);
+  this->predictor_ = paddle_infer::CreatePredictor(config);
 }

 void DBDetector::Run(cv::Mat &img,

--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -37,7 +37,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
  for (int beg_img_no = 0; beg_img_no < img_num;
       beg_img_no += this->rec_batch_num_) {
    auto preprocess_start = std::chrono::steady_clock::now();
-    int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_);
+    int end_img_no = std::min(img_num, beg_img_no + this->rec_batch_num_);
    int batch_num = end_img_no - beg_img_no;
    int imgH = this->rec_image_shape_[1];
    int imgW = this->rec_image_shape_[2];
@@ -46,7 +46,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
      int h = img_list[indices[ino]].rows;
      int w = img_list[indices[ino]].cols;
      float wh_ratio = w * 1.0 / h;
-      max_wh_ratio = max(max_wh_ratio, wh_ratio);
+      max_wh_ratio = std::max(max_wh_ratio, wh_ratio);
    }

    int batch_width = imgW;
@@ -60,7 +60,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
      this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
                              this->is_scale_);
      norm_img_batch.push_back(resize_img);
-      batch_width = max(resize_img.cols, batch_width);
+      batch_width = std::max(resize_img.cols, batch_width);
    }

    std::vector<float> input(batch_num * 3 * imgH * batch_width, 0.0f);
@@ -115,7 +115,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
        last_index = argmax_idx;
      }
      score /= count;
-      if (isnan(score)) {
+      if (std::isnan(score)) {
        continue;
      }
      rec_texts[indices[beg_img_no + m]] = str_res;
@@ -130,7 +130,6 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
 }

 void CRNNRecognizer::LoadModel(const std::string &model_dir) {
-  //   AnalysisConfig config;
  paddle_infer::Config config;
  config.SetModel(model_dir + "/inference.pdmodel",
                  model_dir + "/inference.pdiparams");
@@ -147,20 +146,11 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
      if (this->precision_ == "int8") {
        precision = paddle_infer::Config::Precision::kInt8;
      }
-      config.EnableTensorRtEngine(1 << 20, 10, 15, precision, false, false);
-      int imgH = this->rec_image_shape_[1];
-      int imgW = this->rec_image_shape_[2];
-      std::map<std::string, std::vector<int>> min_input_shape = {
-          {"x", {1, 3, imgH, 10}}, {"lstm_0.tmp_0", {10, 1, 96}}};
-      std::map<std::string, std::vector<int>> max_input_shape = {
-          {"x", {this->rec_batch_num_, 3, imgH, 2500}},
-          {"lstm_0.tmp_0", {1000, 1, 96}}};
-      std::map<std::string, std::vector<int>> opt_input_shape = {
-          {"x", {this->rec_batch_num_, 3, imgH, imgW}},
-          {"lstm_0.tmp_0", {25, 1, 96}}};
-
-      config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
-                                    opt_input_shape);
+      if (!Utility::PathExists("./trt_rec_shape.txt")) {
+        config.CollectShapeRangeInfo("./trt_rec_shape.txt");
+      } else {
+        config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true);
+      }
    }
  } else {
    config.DisableGpu();
@@ -185,7 +175,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
  config.EnableMemoryOptim();
  //   config.DisableGlogInfo();

-  this->predictor_ = CreatePredictor(config);
+  this->predictor_ = paddle_infer::CreatePredictor(config);
 }

 } // namespace PaddleOCR
--- a/deploy/cpp_infer/src/paddleocr.cpp
+++ b/deploy/cpp_infer/src/paddleocr.cpp
@@ -16,7 +16,7 @@
 #include <include/paddleocr.h>

 #include "auto_log/autolog.h"
-#include <numeric>
+
 namespace PaddleOCR {

 PPOCR::PPOCR() {
@@ -44,8 +44,71 @@ PPOCR::PPOCR() {
  }
 };

-void PPOCR::det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
-                std::vector<double> &times) {
+std::vector<std::vector<OCRPredictResult>>
+PPOCR::ocr(std::vector<cv::Mat> img_list, bool det, bool rec, bool cls) {
+  std::vector<std::vector<OCRPredictResult>> ocr_results;
+
+  if (!det) {
+    std::vector<OCRPredictResult> ocr_result;
+    ocr_result.resize(img_list.size());
+    if (cls && this->classifier_ != nullptr) {
+      this->cls(img_list, ocr_result);
+      for (int i = 0; i < img_list.size(); i++) {
+        if (ocr_result[i].cls_label % 2 == 1 &&
+            ocr_result[i].cls_score > this->classifier_->cls_thresh) {
+          cv::rotate(img_list[i], img_list[i], 1);
+        }
+      }
+    }
+    if (rec) {
+      this->rec(img_list, ocr_result);
+    }
+    for (int i = 0; i < ocr_result.size(); ++i) {
+      std::vector<OCRPredictResult> ocr_result_tmp;
+      ocr_result_tmp.push_back(ocr_result[i]);
+      ocr_results.push_back(ocr_result_tmp);
+    }
+  } else {
+    for (int i = 0; i < img_list.size(); ++i) {
+      std::vector<OCRPredictResult> ocr_result =
+          this->ocr(img_list[i], true, rec, cls);
+      ocr_results.push_back(ocr_result);
+    }
+  }
+  return ocr_results;
+}
+
+std::vector<OCRPredictResult> PPOCR::ocr(cv::Mat img, bool det, bool rec,
+                                         bool cls) {
+
+  std::vector<OCRPredictResult> ocr_result;
+  // det
+  this->det(img, ocr_result);
+  // crop image
+  std::vector<cv::Mat> img_list;
+  for (int j = 0; j < ocr_result.size(); j++) {
+    cv::Mat crop_img;
+    crop_img = Utility::GetRotateCropImage(img, ocr_result[j].box);
+    img_list.push_back(crop_img);
+  }
+  // cls
+  if (cls && this->classifier_ != nullptr) {
+    this->cls(img_list, ocr_result);
+    for (int i = 0; i < img_list.size(); i++) {
+      if (ocr_result[i].cls_label % 2 == 1 &&
+          ocr_result[i].cls_score > this->classifier_->cls_thresh) {
+        cv::rotate(img_list[i], img_list[i], 1);
+      }
+    }
+  }
+  // rec
+  if (rec) {
+    this->rec(img_list, ocr_result);
+  }
+  return ocr_result;
+}
+
+void PPOCR::det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results) {
  std::vector<std::vector<std::vector<int>>> boxes;
  std::vector<double> det_times;

@@ -58,14 +121,13 @@ void PPOCR::det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
  }
  // sort boex from top to bottom, from left to right
  Utility::sorted_boxes(ocr_results);
-  times[0] += det_times[0];
-  times[1] += det_times[1];
-  times[2] += det_times[2];
+  this->time_info_det[0] += det_times[0];
+  this->time_info_det[1] += det_times[1];
+  this->time_info_det[2] += det_times[2];
 }

 void PPOCR::rec(std::vector<cv::Mat> img_list,
-                std::vector<OCRPredictResult> &ocr_results,
-                std::vector<double> &times) {
+                std::vector<OCRPredictResult> &ocr_results) {
  std::vector<std::string> rec_texts(img_list.size(), "");
  std::vector<float> rec_text_scores(img_list.size(), 0);
  std::vector<double> rec_times;
@@ -75,14 +137,13 @@ void PPOCR::rec(std::vector<cv::Mat> img_list,
    ocr_results[i].text = rec_texts[i];
    ocr_results[i].score = rec_text_scores[i];
  }
-  times[0] += rec_times[0];
-  times[1] += rec_times[1];
-  times[2] += rec_times[2];
+  this->time_info_rec[0] += rec_times[0];
+  this->time_info_rec[1] += rec_times[1];
+  this->time_info_rec[2] += rec_times[2];
 }

 void PPOCR::cls(std::vector<cv::Mat> img_list,
-                std::vector<OCRPredictResult> &ocr_results,
-                std::vector<double> &times) {
+                std::vector<OCRPredictResult> &ocr_results) {
  std::vector<int> cls_labels(img_list.size(), 0);
  std::vector<float> cls_scores(img_list.size(), 0);
  std::vector<double> cls_times;
@@ -92,125 +153,43 @@ void PPOCR::cls(std::vector<cv::Mat> img_list,
    ocr_results[i].cls_label = cls_labels[i];
    ocr_results[i].cls_score = cls_scores[i];
  }
-  times[0] += cls_times[0];
-  times[1] += cls_times[1];
-  times[2] += cls_times[2];
+  this->time_info_cls[0] += cls_times[0];
+  this->time_info_cls[1] += cls_times[1];
+  this->time_info_cls[2] += cls_times[2];
 }

-std::vector<std::vector<OCRPredictResult>>
-PPOCR::ocr(std::vector<cv::String> cv_all_img_names, bool det, bool rec,
-           bool cls) {
-  std::vector<double> time_info_det = {0, 0, 0};
-  std::vector<double> time_info_rec = {0, 0, 0};
-  std::vector<double> time_info_cls = {0, 0, 0};
-  std::vector<std::vector<OCRPredictResult>> ocr_results;
-
-  if (!det) {
-    std::vector<OCRPredictResult> ocr_result;
-    // read image
-    std::vector<cv::Mat> img_list;
-    for (int i = 0; i < cv_all_img_names.size(); ++i) {
-      cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
-      if (!srcimg.data) {
-        std::cerr << "[ERROR] image read failed! image path: "
-                  << cv_all_img_names[i] << endl;
-        exit(1);
-      }
-      img_list.push_back(srcimg);
-      OCRPredictResult res;
-      ocr_result.push_back(res);
-    }
-    if (cls && this->classifier_ != nullptr) {
-      this->cls(img_list, ocr_result, time_info_cls);
-      for (int i = 0; i < img_list.size(); i++) {
-        if (ocr_result[i].cls_label % 2 == 1 &&
-            ocr_result[i].cls_score > this->classifier_->cls_thresh) {
-          cv::rotate(img_list[i], img_list[i], 1);
-        }
-      }
-    }
-    if (rec) {
-      this->rec(img_list, ocr_result, time_info_rec);
-    }
-    for (int i = 0; i < cv_all_img_names.size(); ++i) {
-      std::vector<OCRPredictResult> ocr_result_tmp;
-      ocr_result_tmp.push_back(ocr_result[i]);
-      ocr_results.push_back(ocr_result_tmp);
-    }
-  } else {
-    if (!Utility::PathExists(FLAGS_output) && FLAGS_det) {
-      Utility::CreateDir(FLAGS_output);
-    }
-
-    for (int i = 0; i < cv_all_img_names.size(); ++i) {
-      std::vector<OCRPredictResult> ocr_result;
-      if (!FLAGS_benchmark) {
-        cout << "predict img: " << cv_all_img_names[i] << endl;
-      }
-
-      cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
-      if (!srcimg.data) {
-        std::cerr << "[ERROR] image read failed! image path: "
-                  << cv_all_img_names[i] << endl;
-        exit(1);
-      }
-      // det
-      this->det(srcimg, ocr_result, time_info_det);
-      // crop image
-      std::vector<cv::Mat> img_list;
-      for (int j = 0; j < ocr_result.size(); j++) {
-        cv::Mat crop_img;
-        crop_img = Utility::GetRotateCropImage(srcimg, ocr_result[j].box);
-        img_list.push_back(crop_img);
-      }
-
-      // cls
-      if (cls && this->classifier_ != nullptr) {
-        this->cls(img_list, ocr_result, time_info_cls);
-        for (int i = 0; i < img_list.size(); i++) {
-          if (ocr_result[i].cls_label % 2 == 1 &&
-              ocr_result[i].cls_score > this->classifier_->cls_thresh) {
-            cv::rotate(img_list[i], img_list[i], 1);
-          }
-        }
-      }
-      // rec
-      if (rec) {
-        this->rec(img_list, ocr_result, time_info_rec);
-      }
-      ocr_results.push_back(ocr_result);
-    }
-  }
-  if (FLAGS_benchmark) {
-    this->log(time_info_det, time_info_rec, time_info_cls,
-              cv_all_img_names.size());
-  }
-  return ocr_results;
-} // namespace PaddleOCR
+void PPOCR::reset_timer() {
+  this->time_info_det = {0, 0, 0};
+  this->time_info_rec = {0, 0, 0};
+  this->time_info_cls = {0, 0, 0};
+}

-void PPOCR::log(std::vector<double> &det_times, std::vector<double> &rec_times,
-                std::vector<double> &cls_times, int img_num) {
-  if (det_times[0] + det_times[1] + det_times[2] > 0) {
+void PPOCR::benchmark_log(int img_num) {
+  if (this->time_info_det[0] + this->time_info_det[1] + this->time_info_det[2] >
+      0) {
    AutoLogger autolog_det("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt,
                           FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic",
-                           FLAGS_precision, det_times, img_num);
+                           FLAGS_precision, this->time_info_det, img_num);
    autolog_det.report();
  }
-  if (rec_times[0] + rec_times[1] + rec_times[2] > 0) {
+  if (this->time_info_rec[0] + this->time_info_rec[1] + this->time_info_rec[2] >
+      0) {
    AutoLogger autolog_rec("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt,
                           FLAGS_enable_mkldnn, FLAGS_cpu_threads,
                           FLAGS_rec_batch_num, "dynamic", FLAGS_precision,
-                           rec_times, img_num);
+                           this->time_info_rec, img_num);
    autolog_rec.report();
  }
-  if (cls_times[0] + cls_times[1] + cls_times[2] > 0) {
+  if (this->time_info_cls[0] + this->time_info_cls[1] + this->time_info_cls[2] >
+      0) {
    AutoLogger autolog_cls("ocr_cls", FLAGS_use_gpu, FLAGS_use_tensorrt,
                           FLAGS_enable_mkldnn, FLAGS_cpu_threads,
                           FLAGS_cls_batch_num, "dynamic", FLAGS_precision,
-                           cls_times, img_num);
+                           this->time_info_cls, img_num);
    autolog_cls.report();
  }
 }
+
 PPOCR::~PPOCR() {
  if (this->detector_ != nullptr) {
    delete this->detector_;

--- a/deploy/cpp_infer/src/paddlestructure.cpp
+++ b/deploy/cpp_infer/src/paddlestructure.cpp
--- a/deploy/cpp_infer/src/postprocess_op.cpp
+++ b/deploy/cpp_infer/src/postprocess_op.cpp
@@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include <include/clipper.h>
 #include <include/postprocess_op.h>

 namespace PaddleOCR {
@@ -352,8 +351,21 @@ std::vector<std::vector<std::vector<int>>> DBPostProcessor::FilterTagDetRes(
  return root_points;
 }

-void TablePostProcessor::init(std::string label_path) {
+void TablePostProcessor::init(std::string label_path,
+                              bool merge_no_span_structure) {
  this->label_list_ = Utility::ReadDict(label_path);
+  if (merge_no_span_structure) {
+    this->label_list_.push_back("<td></td>");
+    std::vector<std::string>::iterator it;
+    for (it = this->label_list_.begin(); it != this->label_list_.end();) {
+      if (*it == "<td>") {
+        it = this->label_list_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+  // add_special_char
  this->label_list_.insert(this->label_list_.begin(), this->beg);
  this->label_list_.push_back(this->end);
 }
@@ -363,12 +375,12 @@ void TablePostProcessor::Run(
    std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
    std::vector<int> &structure_probs_shape,
    std::vector<std::vector<std::string>> &rec_html_tag_batch,
-    std::vector<std::vector<std::vector<std::vector<int>>>> &rec_boxes_batch,
+    std::vector<std::vector<std::vector<int>>> &rec_boxes_batch,
    std::vector<int> &width_list, std::vector<int> &height_list) {
  for (int batch_idx = 0; batch_idx < structure_probs_shape[0]; batch_idx++) {
    // image tags and boxs
    std::vector<std::string> rec_html_tags;
-    std::vector<std::vector<std::vector<int>>> rec_boxes;
+    std::vector<std::vector<int>> rec_boxes;

    float score = 0.f;
    int count = 0;
@@ -378,7 +390,7 @@ void TablePostProcessor::Run(
    // step
    for (int step_idx = 0; step_idx < structure_probs_shape[1]; step_idx++) {
      std::string html_tag;
-      std::vector<std::vector<int>> rec_box;
+      std::vector<int> rec_box;
      // html tag
      int step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
                           structure_probs_shape[2];
@@ -399,24 +411,26 @@ void TablePostProcessor::Run(
      count += 1;
      score += char_score;
      rec_html_tags.push_back(html_tag);
+
      // box
      if (html_tag == "<td>" || html_tag == "<td" || html_tag == "<td></td>") {
-        for (int point_idx = 0; point_idx < loc_preds_shape[2];
-             point_idx += 2) {
-          std::vector<int> point(2, 0);
+        for (int point_idx = 0; point_idx < loc_preds_shape[2]; point_idx++) {
          step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
                               loc_preds_shape[2] +
                           point_idx;
-          point[0] = int(loc_preds[step_start_idx] * width_list[batch_idx]);
-          point[1] =
-              int(loc_preds[step_start_idx + 1] * height_list[batch_idx]);
+          float point = loc_preds[step_start_idx];
+          if (point_idx % 2 == 0) {
+            point = int(point * width_list[batch_idx]);
+          } else {
+            point = int(point * height_list[batch_idx]);
+          }
          rec_box.push_back(point);
        }
        rec_boxes.push_back(rec_box);
      }
    }
    score /= count;
-    if (isnan(score) || rec_boxes.size() == 0) {
+    if (std::isnan(score) || rec_boxes.size() == 0) {
      score = -1;
    }
    rec_scores.push_back(score);
@@ -425,4 +439,137 @@ void TablePostProcessor::Run(
  }
 }

+void PicodetPostProcessor::init(std::string label_path,
+                                const double score_threshold,
+                                const double nms_threshold,
+                                const std::vector<int> &fpn_stride) {
+  this->label_list_ = Utility::ReadDict(label_path);
+  this->score_threshold_ = score_threshold;
+  this->nms_threshold_ = nms_threshold;
+  this->num_class_ = label_list_.size();
+  this->fpn_stride_ = fpn_stride;
+}
+
+void PicodetPostProcessor::Run(std::vector<StructurePredictResult> &results,
+                               std::vector<std::vector<float>> outs,
+                               std::vector<int> ori_shape,
+                               std::vector<int> resize_shape, int reg_max) {
+  int in_h = resize_shape[0];
+  int in_w = resize_shape[1];
+  float scale_factor_h = resize_shape[0] / float(ori_shape[0]);
+  float scale_factor_w = resize_shape[1] / float(ori_shape[1]);
+
+  std::vector<std::vector<StructurePredictResult>> bbox_results;
+  bbox_results.resize(this->num_class_);
+  for (int i = 0; i < this->fpn_stride_.size(); ++i) {
+    int feature_h = std::ceil((float)in_h / this->fpn_stride_[i]);
+    int feature_w = std::ceil((float)in_w / this->fpn_stride_[i]);
+    for (int idx = 0; idx < feature_h * feature_w; idx++) {
+      // score and label
+      float score = 0;
+      int cur_label = 0;
+      for (int label = 0; label < this->num_class_; label++) {
+        if (outs[i][idx * this->num_class_ + label] > score) {
+          score = outs[i][idx * this->num_class_ + label];
+          cur_label = label;
+        }
+      }
+      // bbox
+      if (score > this->score_threshold_) {
+        int row = idx / feature_w;
+        int col = idx % feature_w;
+        std::vector<float> bbox_pred(
+            outs[i + this->fpn_stride_.size()].begin() + idx * 4 * reg_max,
+            outs[i + this->fpn_stride_.size()].begin() +
+                (idx + 1) * 4 * reg_max);
+        bbox_results[cur_label].push_back(
+            this->disPred2Bbox(bbox_pred, cur_label, score, col, row,
+                               this->fpn_stride_[i], resize_shape, reg_max));
+      }
+    }
+  }
+  for (int i = 0; i < bbox_results.size(); i++) {
+    bool flag = bbox_results[i].size() <= 0;
+  }
+  for (int i = 0; i < bbox_results.size(); i++) {
+    bool flag = bbox_results[i].size() <= 0;
+    if (bbox_results[i].size() <= 0) {
+      continue;
+    }
+    this->nms(bbox_results[i], this->nms_threshold_);
+    for (auto box : bbox_results[i]) {
+      box.box[0] = box.box[0] / scale_factor_w;
+      box.box[2] = box.box[2] / scale_factor_w;
+      box.box[1] = box.box[1] / scale_factor_h;
+      box.box[3] = box.box[3] / scale_factor_h;
+      results.push_back(box);
+    }
+  }
+}
+
+StructurePredictResult
+PicodetPostProcessor::disPred2Bbox(std::vector<float> bbox_pred, int label,
+                                   float score, int x, int y, int stride,
+                                   std::vector<int> im_shape, int reg_max) {
+  float ct_x = (x + 0.5) * stride;
+  float ct_y = (y + 0.5) * stride;
+  std::vector<float> dis_pred;
+  dis_pred.resize(4);
+  for (int i = 0; i < 4; i++) {
+    float dis = 0;
+    std::vector<float> bbox_pred_i(bbox_pred.begin() + i * reg_max,
+                                   bbox_pred.begin() + (i + 1) * reg_max);
+    std::vector<float> dis_after_sm =
+        Utility::activation_function_softmax(bbox_pred_i);
+    for (int j = 0; j < reg_max; j++) {
+      dis += j * dis_after_sm[j];
+    }
+    dis *= stride;
+    dis_pred[i] = dis;
+  }
+
+  float xmin = (std::max)(ct_x - dis_pred[0], .0f);
+  float ymin = (std::max)(ct_y - dis_pred[1], .0f);
+  float xmax = (std::min)(ct_x + dis_pred[2], (float)im_shape[1]);
+  float ymax = (std::min)(ct_y + dis_pred[3], (float)im_shape[0]);
+
+  StructurePredictResult result_item;
+  result_item.box = {xmin, ymin, xmax, ymax};
+  result_item.type = this->label_list_[label];
+  result_item.confidence = score;
+
+  return result_item;
+}
+
+void PicodetPostProcessor::nms(std::vector<StructurePredictResult> &input_boxes,
+                               float nms_threshold) {
+  std::sort(input_boxes.begin(), input_boxes.end(),
+            [](StructurePredictResult a, StructurePredictResult b) {
+              return a.confidence > b.confidence;
+            });
+  std::vector<int> picked(input_boxes.size(), 1);
+
+  for (int i = 0; i < input_boxes.size(); ++i) {
+    if (picked[i] == 0) {
+      continue;
+    }
+    for (int j = i + 1; j < input_boxes.size(); ++j) {
+      if (picked[j] == 0) {
+        continue;
+      }
+      float iou = Utility::iou(input_boxes[i].box, input_boxes[j].box);
+      if (iou > nms_threshold) {
+        picked[j] = 0;
+      }
+    }
+  }
+  std::vector<StructurePredictResult> input_boxes_nms;
+  for (int i = 0; i < input_boxes.size(); ++i) {
+    if (picked[i] == 1) {
+      input_boxes_nms.push_back(input_boxes[i]);
+    }
+  }
+  input_boxes = input_boxes_nms;
+}
+
 } // namespace PaddleOCR
--- a/deploy/cpp_infer/src/preprocess_op.cpp
+++ b/deploy/cpp_infer/src/preprocess_op.cpp
--- a/deploy/cpp_infer/src/structure_layout.cpp
+++ b/deploy/cpp_infer/src/structure_layout.cpp
--- a/deploy/cpp_infer/src/structure_table.cpp
+++ b/deploy/cpp_infer/src/structure_table.cpp
--- a/deploy/cpp_infer/src/utility.cpp
+++ b/deploy/cpp_infer/src/utility.cpp
--- a/deploy/lite/config.txt
+++ b/deploy/lite/config.txt
@@ -5,4 +5,4 @@ det_db_unclip_ratio  1.6
 det_db_use_dilate 0
 det_use_polygon_score 1
 use_direction_classify  1
-rec_image_height  32
\ No newline at end of file
+rec_image_height  48
\ No newline at end of file
--- a/deploy/lite/readme.md
+++ b/deploy/lite/readme.md
--- a/deploy/lite/readme_ch.md
+++ b/deploy/lite/readme_ch.md
--- a/deploy/slim/quantization/README.md
+++ b/deploy/slim/quantization/README.md
--- a/deploy/slim/quantization/README_en.md
+++ b/deploy/slim/quantization/README_en.md
--- a/deploy/slim/quantization/export_model.py
+++ b/deploy/slim/quantization/export_model.py
--- a/deploy/slim/quantization/quant.py
+++ b/deploy/slim/quantization/quant.py
@@ -158,8 +158,7 @@ def main(config, device, logger, vdl_writer):

    pre_best_model_dict = dict()
    # load fp32 model to begin quantization
-    if config["Global"]["pretrained_model"] is not None:
-        pre_best_model_dict = load_model(config, model)
+    pre_best_model_dict = load_model(config, model, None, config['Architecture']["model_type"])

    freeze_params = False
    if config['Architecture']["algorithm"] in ["Distillation"]:
@@ -184,8 +183,7 @@ def main(config, device, logger, vdl_writer):
        model=model)

    # resume PACT training process
-    if config["Global"]["checkpoints"] is not None:
-        pre_best_model_dict = load_model(config, model, optimizer)
+    pre_best_model_dict = load_model(config, model, optimizer, config['Architecture']["model_type"])

    # build metric
    eval_class = build_metric(config['Metric'])

--- a/deploy/slim/quantization/quant_kl.py
+++ b/deploy/slim/quantization/quant_kl.py
--- a/doc/doc_ch/algorithm_det_ct.md
+++ b/doc/doc_ch/algorithm_det_ct.md
--- a/doc/doc_ch/algorithm_kie_layoutxlm.md
+++ b/doc/doc_ch/algorithm_kie_layoutxlm.md
--- a/doc/doc_ch/algorithm_kie_vi_layoutxlm.md
+++ b/doc/doc_ch/algorithm_kie_vi_layoutxlm.md
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
--- a/doc/doc_ch/algorithm_rec_robustscanner.md
+++ b/doc/doc_ch/algorithm_rec_robustscanner.md
--- a/doc/doc_ch/algorithm_rec_spin.md
+++ b/doc/doc_ch/algorithm_rec_spin.md
--- a/doc/doc_ch/inference_args.md
+++ b/doc/doc_ch/inference_args.md
--- a/doc/doc_en/algorithm_det_ct_en.md
+++ b/doc/doc_en/algorithm_det_ct_en.md
--- a/doc/doc_en/algorithm_kie_layoutxlm_en.md
+++ b/doc/doc_en/algorithm_kie_layoutxlm_en.md
--- a/doc/doc_en/algorithm_kie_vi_layoutxlm_en.md
+++ b/doc/doc_en/algorithm_kie_vi_layoutxlm_en.md
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
--- a/doc/doc_en/algorithm_rec_robustscanner_en.md
+++ b/doc/doc_en/algorithm_rec_robustscanner_en.md
--- a/doc/doc_en/algorithm_rec_spin_en.md
+++ b/doc/doc_en/algorithm_rec_spin_en.md
--- a/doc/doc_en/inference_args_en.md
+++ b/doc/doc_en/inference_args_en.md
--- a/doc/imgs_results/det_res_img623_ct.jpg
+++ b/doc/imgs_results/det_res_img623_ct.jpg
--- a/paddleocr.py
+++ b/paddleocr.py
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
--- a/ppocr/data/imaug/ct_process.py
+++ b/ppocr/data/imaug/ct_process.py
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
--- a/ppocr/data/imaug/pg_process.py
+++ b/ppocr/data/imaug/pg_process.py
--- a/ppocr/data/imaug/vqa/__init__.py
+++ b/ppocr/data/imaug/vqa/__init__.py
--- a/ppocr/data/imaug/vqa/token/__init__.py
+++ b/ppocr/data/imaug/vqa/token/__init__.py
--- a/ppocr/data/imaug/vqa/token/vqa_re_convert.py
+++ b/ppocr/data/imaug/vqa/token/vqa_re_convert.py
--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
--- a/ppocr/losses/det_ct_loss.py
+++ b/ppocr/losses/det_ct_loss.py
--- a/ppocr/losses/distillation_loss.py
+++ b/ppocr/losses/distillation_loss.py
--- a/ppocr/losses/e2e_pg_loss.py
+++ b/ppocr/losses/e2e_pg_loss.py
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
--- a/ppocr/metrics/ct_metric.py
+++ b/ppocr/metrics/ct_metric.py
--- a/ppocr/metrics/vqa_token_re_metric.py
+++ b/ppocr/metrics/vqa_token_re_metric.py
--- a/ppocr/modeling/backbones/vqa_layoutlm.py
+++ b/ppocr/modeling/backbones/vqa_layoutlm.py
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
--- a/ppocr/modeling/heads/det_ct_head.py
+++ b/ppocr/modeling/heads/det_ct_head.py
--- a/ppocr/modeling/heads/e2e_pg_head.py
+++ b/ppocr/modeling/heads/e2e_pg_head.py
--- a/ppocr/modeling/heads/table_att_head.py
+++ b/ppocr/modeling/heads/table_att_head.py
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
--- a/ppocr/modeling/necks/ct_fpn.py
+++ b/ppocr/modeling/necks/ct_fpn.py
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
--- a/ppocr/postprocess/ct_postprocess.py
+++ b/ppocr/postprocess/ct_postprocess.py
--- a/ppocr/postprocess/pg_postprocess.py
+++ b/ppocr/postprocess/pg_postprocess.py
--- a/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py
+++ b/ppocr/postprocess/vqa_token_re_layoutlm_postprocess.py
--- a/ppocr/utils/e2e_metric/Deteval.py
+++ b/ppocr/utils/e2e_metric/Deteval.py
--- a/ppocr/utils/e2e_utils/extract_textpoint_fast.py
+++ b/ppocr/utils/e2e_utils/extract_textpoint_fast.py
--- a/ppocr/utils/e2e_utils/pgnet_pp_utils.py
+++ b/ppocr/utils/e2e_utils/pgnet_pp_utils.py
--- a/ppstructure/docs/layout/layout.png
+++ b/ppstructure/docs/layout/layout.png
--- a/ppstructure/docs/models_list.md
+++ b/ppstructure/docs/models_list.md
--- a/ppstructure/kie/README.md
+++ b/ppstructure/kie/README.md
--- a/ppstructure/kie/README_ch.md
+++ b/ppstructure/kie/README_ch.md
--- a/ppstructure/kie/predict_kie_token_ser.py
+++ b/ppstructure/kie/predict_kie_token_ser.py
--- a/ppstructure/kie/predict_kie_token_ser_re.py
+++ b/ppstructure/kie/predict_kie_token_ser_re.py
--- a/ppstructure/layout/README.md
+++ b/ppstructure/layout/README.md
--- a/ppstructure/layout/README_ch.md
+++ b/ppstructure/layout/README_ch.md
--- a/ppstructure/predict_system.py
+++ b/ppstructure/predict_system.py
--- a/ppstructure/recovery/README.md
+++ b/ppstructure/recovery/README.md
--- a/ppstructure/recovery/README_ch.md
+++ b/ppstructure/recovery/README_ch.md
--- a/ppstructure/recovery/recovery_to_doc.py
+++ b/ppstructure/recovery/recovery_to_doc.py
--- a/ppstructure/recovery/requirements.txt
+++ b/ppstructure/recovery/requirements.txt
--- a/ppstructure/recovery/table_process.py
+++ b/ppstructure/recovery/table_process.py
--- a/ppstructure/table/README.md
+++ b/ppstructure/table/README.md
--- a/ppstructure/table/README_ch.md
+++ b/ppstructure/table/README_ch.md
--- a/ppstructure/table/predict_structure.py
+++ b/ppstructure/table/predict_structure.py
--- a/ppstructure/utility.py
+++ b/ppstructure/utility.py
--- a/requirements.txt
+++ b/requirements.txt
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
--- a/test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt
--- a/test_tipc/configs/det_r18_ct/train_infer_python.txt
+++ b/test_tipc/configs/det_r18_ct/train_infer_python.txt
--- a/test_tipc/configs/en_table_structure/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/en_table_structure/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
--- a/test_tipc/configs/en_table_structure/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt
+++ b/test_tipc/configs/en_table_structure/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt
--- a/test_tipc/configs/en_table_structure_PACT/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/en_table_structure_PACT/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
--- a/test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml
+++ b/test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml
--- a/test_tipc/configs/layoutxlm_ser/train_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_infer_python.txt
--- a/test_tipc/configs/layoutxlm_ser/train_linux_gpu_fleet_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_linux_gpu_fleet_normal_infer_python_linux_gpu_cpu.txt
--- a/test_tipc/configs/layoutxlm_ser/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
--- a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
--- a/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt
--- a/test_tipc/configs/slanet/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/slanet/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
--- a/test_tipc/configs/slanet/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt
+++ b/test_tipc/configs/slanet/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt
--- a/test_tipc/configs/slanet/train_linux_gpu_fleet_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/slanet/train_linux_gpu_fleet_normal_infer_python_linux_gpu_cpu.txt
--- a/test_tipc/configs/table_master/train_infer_python.txt
+++ b/test_tipc/configs/table_master/train_infer_python.txt
--- a/test_tipc/docs/jeston_test_train_inference_python.md
+++ b/test_tipc/docs/jeston_test_train_inference_python.md
--- a/test_tipc/docs/mac_test_train_inference_python.md
+++ b/test_tipc/docs/mac_test_train_inference_python.md
--- a/test_tipc/docs/test_inference_cpp.md
+++ b/test_tipc/docs/test_inference_cpp.md
--- a/test_tipc/docs/test_paddle2onnx.md
+++ b/test_tipc/docs/test_paddle2onnx.md
--- a/test_tipc/docs/test_ptq_inference_python.md
+++ b/test_tipc/docs/test_ptq_inference_python.md
--- a/test_tipc/docs/test_serving.md
+++ b/test_tipc/docs/test_serving.md
--- a/test_tipc/docs/test_train_inference_python.md
+++ b/test_tipc/docs/test_train_inference_python.md
--- a/test_tipc/docs/win_test_train_inference_python.md
+++ b/test_tipc/docs/win_test_train_inference_python.md
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
--- a/test_tipc/test_paddle2onnx.sh
+++ b/test_tipc/test_paddle2onnx.sh
--- a/test_tipc/test_train_inference_python_npu.sh
+++ b/test_tipc/test_train_inference_python_npu.sh
--- a/test_tipc/test_train_inference_python_xpu.sh
+++ b/test_tipc/test_train_inference_python_xpu.sh
--- a/tools/export_model.py
+++ b/tools/export_model.py
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
--- a/tools/infer_e2e.py
+++ b/tools/infer_e2e.py
--- a/tools/infer_kie_token_ser_re.py
+++ b/tools/infer_kie_token_ser_re.py
--- a/tools/program.py
+++ b/tools/program.py
--- a/tools/train.py
+++ b/tools/train.py
--- a/train.sh
+++ b/train.sh