提交 df783f5a 编写于 作者: qq_25193841's avatar qq_25193841

Merge remote-tracking branch 'origin/dygraph' into dy1

...@@ -2449,13 +2449,6 @@ class MainWindow(QMainWindow): ...@@ -2449,13 +2449,6 @@ class MainWindow(QMainWindow):
export PPLabel and CSV to JSON (PubTabNet) export PPLabel and CSV to JSON (PubTabNet)
''' '''
import pandas as pd import pandas as pd
from libs.dataPartitionDialog import DataPartitionDialog
# data partition user input
partitionDialog = DataPartitionDialog(parent=self)
partitionDialog.exec()
if partitionDialog.getStatus() == False:
return
# automatically save annotations # automatically save annotations
self.saveFilestate() self.saveFilestate()
...@@ -2479,27 +2472,18 @@ class MainWindow(QMainWindow): ...@@ -2479,27 +2472,18 @@ class MainWindow(QMainWindow):
else: else:
labeldict[file] = [] labeldict[file] = []
train_split, val_split, test_split = partitionDialog.getDataPartition() # read table recognition output
# check validate TableRec_excel_dir = os.path.join(
if train_split + val_split + test_split > 100: self.lastOpenDir, 'tableRec_excel_output')
msg = "The sum of training, validation and testing data should be less than 100%"
QMessageBox.information(self, "Information", msg) # save txt
return fid = open(
print(train_split, val_split, test_split) "{}/gt.txt".format(self.lastOpenDir), "w", encoding='utf-8')
train_split, val_split, test_split = float(train_split) / 100., float(val_split) / 100., float(test_split) / 100.
train_id = int(len(labeldict) * train_split)
val_id = int(len(labeldict) * (train_split + val_split))
print('Data partition: train:', train_id,
'validation:', val_id - train_id,
'test:', len(labeldict) - val_id)
TableRec_excel_dir = os.path.join(self.lastOpenDir, 'tableRec_excel_output')
json_results = []
imgid = 0
for image_path in labeldict.keys(): for image_path in labeldict.keys():
# load csv annotations # load csv annotations
filename, _ = os.path.splitext(os.path.basename(image_path)) filename, _ = os.path.splitext(os.path.basename(image_path))
csv_path = os.path.join(TableRec_excel_dir, filename + '.xlsx') csv_path = os.path.join(
TableRec_excel_dir, filename + '.xlsx')
if not os.path.exists(csv_path): if not os.path.exists(csv_path):
continue continue
...@@ -2518,28 +2502,31 @@ class MainWindow(QMainWindow): ...@@ -2518,28 +2502,31 @@ class MainWindow(QMainWindow):
cells = [] cells = []
for anno in labeldict[image_path]: for anno in labeldict[image_path]:
tokens = list(anno['transcription']) tokens = list(anno['transcription'])
obb = anno['points'] cells.append({
hbb = OBB2HBB(np.array(obb)).tolist() 'tokens': tokens,
cells.append({'tokens': tokens, 'bbox': hbb}) 'bbox': anno['points']
})
# data split
if imgid < train_id: # 构造标注信息
split = 'train' html = {
elif imgid < val_id: 'structure': {
split = 'val' 'tokens': token_list
else: },
split = 'test' 'cells': cells
}
# save dict d = {
html = {'structure': {'tokens': token_list}, 'cell': cells} 'filename': os.path.basename(image_path),
json_results.append({'filename': os.path.basename(image_path), 'split': split, 'imgid': imgid, 'html': html}) 'html': html
imgid += 1 }
# 重构HTML
# save json d['gt'] = rebuild_html_from_ppstructure_label(d)
with open("{}/annotation.json".format(self.lastOpenDir), "w", encoding='utf-8') as fid: fid.write('{}\n'.format(
fid.write(json.dumps(json_results, ensure_ascii=False)) json.dumps(
d, ensure_ascii=False)))
msg = 'JSON sucessfully saved in {}/annotation.json'.format(self.lastOpenDir)
# convert to PP-Structure label format
fid.close()
msg = 'JSON sucessfully saved in {}/gt.txt'.format(self.lastOpenDir)
QMessageBox.information(self, "Information", msg) QMessageBox.information(self, "Information", msg)
def autolcm(self): def autolcm(self):
...@@ -2729,6 +2716,9 @@ class MainWindow(QMainWindow): ...@@ -2729,6 +2716,9 @@ class MainWindow(QMainWindow):
self._update_shape_color(shape) self._update_shape_color(shape)
self.keyDialog.addLabelHistory(key_text) self.keyDialog.addLabelHistory(key_text)
# save changed shape
self.setDirty()
def undoShapeEdit(self): def undoShapeEdit(self):
self.canvas.restoreShape() self.canvas.restoreShape()
self.labelList.clear() self.labelList.clear()
......
...@@ -611,8 +611,8 @@ class Canvas(QWidget): ...@@ -611,8 +611,8 @@ class Canvas(QWidget):
if self.drawing() and not self.prevPoint.isNull() and not self.outOfPixmap(self.prevPoint): if self.drawing() and not self.prevPoint.isNull() and not self.outOfPixmap(self.prevPoint):
p.setPen(QColor(0, 0, 0)) p.setPen(QColor(0, 0, 0))
p.drawLine(self.prevPoint.x(), 0, self.prevPoint.x(), self.pixmap.height()) p.drawLine(int(self.prevPoint.x()), 0, int(self.prevPoint.x()), self.pixmap.height())
p.drawLine(0, self.prevPoint.y(), self.pixmap.width(), self.prevPoint.y()) p.drawLine(0, int(self.prevPoint.y()), self.pixmap.width(), int(self.prevPoint.y()))
self.setAutoFillBackground(True) self.setAutoFillBackground(True)
if self.verified: if self.verified:
......
try:
from PyQt5.QtGui import *
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
except ImportError:
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from libs.utils import newIcon
import time
import datetime
import json
import cv2
import numpy as np
BB = QDialogButtonBox
class DataPartitionDialog(QDialog):
def __init__(self, parent=None):
super().__init__()
self.parnet = parent
self.title = 'DATA PARTITION'
self.train_ratio = 70
self.val_ratio = 15
self.test_ratio = 15
self.initUI()
def initUI(self):
self.setWindowTitle(self.title)
self.setWindowModality(Qt.ApplicationModal)
self.flag_accept = True
if self.parnet.lang == 'ch':
msg = "导出JSON前请保存所有图像的标注且关闭EXCEL!"
else:
msg = "Please save all the annotations and close the EXCEL before exporting JSON!"
info_msg = QLabel(msg, self)
info_msg.setWordWrap(True)
info_msg.setStyleSheet("color: red")
info_msg.setFont(QFont('Arial', 12))
train_lbl = QLabel('Train split: ', self)
train_lbl.setFont(QFont('Arial', 15))
val_lbl = QLabel('Valid split: ', self)
val_lbl.setFont(QFont('Arial', 15))
test_lbl = QLabel('Test split: ', self)
test_lbl.setFont(QFont('Arial', 15))
self.train_input = QLineEdit(self)
self.train_input.setFont(QFont('Arial', 15))
self.val_input = QLineEdit(self)
self.val_input.setFont(QFont('Arial', 15))
self.test_input = QLineEdit(self)
self.test_input.setFont(QFont('Arial', 15))
self.train_input.setText(str(self.train_ratio))
self.val_input.setText(str(self.val_ratio))
self.test_input.setText(str(self.test_ratio))
validator = QIntValidator(0, 100)
self.train_input.setValidator(validator)
self.val_input.setValidator(validator)
self.test_input.setValidator(validator)
gridlayout = QGridLayout()
gridlayout.addWidget(info_msg, 0, 0, 1, 2)
gridlayout.addWidget(train_lbl, 1, 0)
gridlayout.addWidget(val_lbl, 2, 0)
gridlayout.addWidget(test_lbl, 3, 0)
gridlayout.addWidget(self.train_input, 1, 1)
gridlayout.addWidget(self.val_input, 2, 1)
gridlayout.addWidget(self.test_input, 3, 1)
bb = BB(BB.Ok | BB.Cancel, Qt.Horizontal, self)
bb.button(BB.Ok).setIcon(newIcon('done'))
bb.button(BB.Cancel).setIcon(newIcon('undo'))
bb.accepted.connect(self.validate)
bb.rejected.connect(self.cancel)
gridlayout.addWidget(bb, 4, 0, 1, 2)
self.setLayout(gridlayout)
self.show()
def validate(self):
self.flag_accept = True
self.accept()
def cancel(self):
self.flag_accept = False
self.reject()
def getStatus(self):
return self.flag_accept
def getDataPartition(self):
self.train_ratio = int(self.train_input.text())
self.val_ratio = int(self.val_input.text())
self.test_ratio = int(self.test_input.text())
return self.train_ratio, self.val_ratio, self.test_ratio
def closeEvent(self, event):
self.flag_accept = False
self.reject()
...@@ -176,18 +176,6 @@ def boxPad(box, imgShape, pad : int) -> np.array: ...@@ -176,18 +176,6 @@ def boxPad(box, imgShape, pad : int) -> np.array:
return box return box
def OBB2HBB(obb) -> np.array:
"""
Convert Oriented Bounding Box to Horizontal Bounding Box.
"""
hbb = np.zeros(4, dtype=np.int32)
hbb[0] = min(obb[:, 0])
hbb[1] = min(obb[:, 1])
hbb[2] = max(obb[:, 0])
hbb[3] = max(obb[:, 1])
return hbb
def expand_list(merged, html_list): def expand_list(merged, html_list):
''' '''
Fill blanks according to merged cells Fill blanks according to merged cells
...@@ -232,6 +220,26 @@ def convert_token(html_list): ...@@ -232,6 +220,26 @@ def convert_token(html_list):
return token_list return token_list
def rebuild_html_from_ppstructure_label(label_info):
from html import escape
html_code = label_info['html']['structure']['tokens'].copy()
to_insert = [
i for i, tag in enumerate(html_code) if tag in ('<td>', '>')
]
for i, cell in zip(to_insert[::-1], label_info['html']['cells'][::-1]):
if cell['tokens']:
cell = [
escape(token) if len(token) == 1 else token
for token in cell['tokens']
]
cell = ''.join(cell)
html_code.insert(i + 1, cell)
html_code = ''.join(html_code)
html_code = '<html><body><table>{}</table></body></html>'.format(
html_code)
return html_code
def stepsInfo(lang='en'): def stepsInfo(lang='en'):
if lang == 'ch': if lang == 'ch':
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \ msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
......
# 智能运营:通用中文表格识别
- [1. 背景介绍](#1-背景介绍)
- [2. 中文表格识别](#2-中文表格识别)
- [2.1 环境准备](#21-环境准备)
- [2.2 准备数据集](#22-准备数据集)
- [2.2.1 划分训练测试集](#221-划分训练测试集)
- [2.2.2 查看数据集](#222-查看数据集)
- [2.3 训练](#23-训练)
- [2.4 验证](#24-验证)
- [2.5 训练引擎推理](#25-训练引擎推理)
- [2.6 模型导出](#26-模型导出)
- [2.7 预测引擎推理](#27-预测引擎推理)
- [2.8 表格识别](#28-表格识别)
- [3. 表格属性识别](#3-表格属性识别)
- [3.1 代码、环境、数据准备](#31-代码环境数据准备)
- [3.1.1 代码准备](#311-代码准备)
- [3.1.2 环境准备](#312-环境准备)
- [3.1.3 数据准备](#313-数据准备)
- [3.2 表格属性识别训练](#32-表格属性识别训练)
- [3.3 表格属性识别推理和部署](#33-表格属性识别推理和部署)
- [3.3.1 模型转换](#331-模型转换)
- [3.3.2 模型推理](#332-模型推理)
## 1. 背景介绍
中文表格识别在金融行业有着广泛的应用,如保险理赔、财报分析和信息录入等领域。当前,金融行业的表格识别主要以手动录入为主,开发一种自动表格识别成为丞待解决的问题。
![](https://ai-studio-static-online.cdn.bcebos.com/d1e7780f0c7745ada4be540decefd6288e4d59257d8141f6842682a4c05d28b6)
在金融行业中,表格图像主要有清单类的单元格密集型表格,申请表类的大单元格表格,拍照表格和倾斜表格四种主要形式。
![](https://ai-studio-static-online.cdn.bcebos.com/da82ae8ef8fd479aaa38e1049eb3a681cf020dc108fa458eb3ec79da53b45fd1)
![](https://ai-studio-static-online.cdn.bcebos.com/5ffff2093a144a6993a75eef71634a52276015ee43a04566b9c89d353198c746)
当前的表格识别算法不能很好的处理这些场景下的表格图像。在本例中,我们使用PP-Structurev2最新发布的表格识别模型SLANet来演示如何进行中文表格是识别。同时,为了方便作业流程,我们使用表格属性识别模型对表格图像的属性进行识别,对表格的难易程度进行判断,加快人工进行校对速度。
本项目AI Studio链接:https://aistudio.baidu.com/aistudio/projectdetail/4588067
## 2. 中文表格识别
### 2.1 环境准备
```python
# 下载PaddleOCR代码
! git clone -b dygraph https://gitee.com/paddlepaddle/PaddleOCR
```
```python
# 安装PaddleOCR环境
! pip install -r PaddleOCR/requirements.txt --force-reinstall
! pip install protobuf==3.19
```
### 2.2 准备数据集
本例中使用的数据集采用表格[生成工具](https://github.com/WenmuZhou/TableGeneration)制作。
使用如下命令对数据集进行解压,并查看数据集大小
```python
! cd data/data165849 && tar -xf table_gen_dataset.tar && cd -
! wc -l data/data165849/table_gen_dataset/gt.txt
```
#### 2.2.1 划分训练测试集
使用下述命令将数据集划分为训练集和测试集, 这里将90%划分为训练集,10%划分为测试集
```python
import random
with open('/home/aistudio/data/data165849/table_gen_dataset/gt.txt') as f:
lines = f.readlines()
random.shuffle(lines)
train_len = int(len(lines)*0.9)
train_list = lines[:train_len]
val_list = lines[train_len:]
# 保存结果
with open('/home/aistudio/train.txt','w',encoding='utf-8') as f:
f.writelines(train_list)
with open('/home/aistudio/val.txt','w',encoding='utf-8') as f:
f.writelines(val_list)
```
划分完成后,数据集信息如下
|类型|数量|图片地址|标注文件路径|
|---|---|---|---|
|训练集|18000|/home/aistudio/data/data165849/table_gen_dataset|/home/aistudio/train.txt|
|测试集|2000|/home/aistudio/data/data165849/table_gen_dataset|/home/aistudio/val.txt|
#### 2.2.2 查看数据集
```python
import cv2
import os, json
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
def parse_line(data_dir, line):
data_line = line.strip("\n")
info = json.loads(data_line)
file_name = info['filename']
cells = info['html']['cells'].copy()
structure = info['html']['structure']['tokens'].copy()
img_path = os.path.join(data_dir, file_name)
if not os.path.exists(img_path):
print(img_path)
return None
data = {
'img_path': img_path,
'cells': cells,
'structure': structure,
'file_name': file_name
}
return data
def draw_bbox(img_path, points, color=(255, 0, 0), thickness=2):
if isinstance(img_path, str):
img_path = cv2.imread(img_path)
img_path = img_path.copy()
for point in points:
cv2.polylines(img_path, [point.astype(int)], True, color, thickness)
return img_path
def rebuild_html(data):
html_code = data['structure']
cells = data['cells']
to_insert = [i for i, tag in enumerate(html_code) if tag in ('<td>', '>')]
for i, cell in zip(to_insert[::-1], cells[::-1]):
if cell['tokens']:
text = ''.join(cell['tokens'])
# skip empty text
sp_char_list = ['<b>', '</b>', '\u2028', ' ', '<i>', '</i>']
text_remove_style = skip_char(text, sp_char_list)
if len(text_remove_style) == 0:
continue
html_code.insert(i + 1, text)
html_code = ''.join(html_code)
return html_code
def skip_char(text, sp_char_list):
"""
skip empty cell
@param text: text in cell
@param sp_char_list: style char and special code
@return:
"""
for sp_char in sp_char_list:
text = text.replace(sp_char, '')
return text
save_dir = '/home/aistudio/vis'
os.makedirs(save_dir, exist_ok=True)
image_dir = '/home/aistudio/data/data165849/'
html_str = '<table border="1">'
# 解析标注信息并还原html表格
data = parse_line(image_dir, val_list[0])
img = cv2.imread(data['img_path'])
img_name = ''.join(os.path.basename(data['file_name']).split('.')[:-1])
img_save_name = os.path.join(save_dir, img_name)
boxes = [np.array(x['bbox']) for x in data['cells']]
show_img = draw_bbox(data['img_path'], boxes)
cv2.imwrite(img_save_name + '_show.jpg', show_img)
html = rebuild_html(data)
html_str += html
html_str += '</table>'
# 显示标注的html字符串
from IPython.core.display import display, HTML
display(HTML(html_str))
# 显示单元格坐标
plt.figure(figsize=(15,15))
plt.imshow(show_img)
plt.show()
```
### 2.3 训练
这里选用PP-Structurev2中的表格识别模型[SLANet](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/configs/table/SLANet.yml)
SLANet是PP-Structurev2全新推出的表格识别模型,相比PP-Structurev1中TableRec-RARE,在速度不变的情况下精度提升4.7%。TEDS提升2%
|算法|Acc|[TEDS(Tree-Edit-Distance-based Similarity)](https://github.com/ibm-aur-nlp/PubTabNet/tree/master/src)|Speed|
| --- | --- | --- | ---|
| EDD<sup>[2]</sup> |x| 88.3% |x|
| TableRec-RARE(ours) | 71.73%| 93.88% |779ms|
| SLANet(ours) | 76.31%| 95.89%|766ms|
进行训练之前先使用如下命令下载预训练模型
```python
# 进入PaddleOCR工作目录
os.chdir('/home/aistudio/PaddleOCR')
# 下载英文预训练模型
! wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar --no-check-certificate
! cd ./pretrain_models/ && tar xf en_ppstructure_mobile_v2.0_SLANet_train.tar && cd ../
```
使用如下命令即可启动训练,需要修改的配置有
|字段|修改值|含义|
|---|---|---|
|Global.pretrained_model|./pretrain_models/en_ppstructure_mobile_v2.0_SLANet_train/best_accuracy.pdparams|指向英文表格预训练模型地址|
|Global.eval_batch_step|562|模型多少step评估一次,一般设置为一个epoch总的step数|
|Optimizer.lr.name|Const|学习率衰减器 |
|Optimizer.lr.learning_rate|0.0005|学习率设为之前的0.05倍 |
|Train.dataset.data_dir|/home/aistudio/data/data165849|指向训练集图片存放目录 |
|Train.dataset.label_file_list|/home/aistudio/data/data165849/table_gen_dataset/train.txt|指向训练集标注文件 |
|Train.loader.batch_size_per_card|32|训练时每张卡的batch_size |
|Train.loader.num_workers|1|训练集多进程数据读取的进程数,在aistudio中需要设为1 |
|Eval.dataset.data_dir|/home/aistudio/data/data165849|指向测试集图片存放目录 |
|Eval.dataset.label_file_list|/home/aistudio/data/data165849/table_gen_dataset/val.txt|指向测试集标注文件 |
|Eval.loader.batch_size_per_card|32|测试时每张卡的batch_size |
|Eval.loader.num_workers|1|测试集多进程数据读取的进程数,在aistudio中需要设为1 |
已经修改好的配置存储在 `/home/aistudio/SLANet_ch.yml`
```python
import os
os.chdir('/home/aistudio/PaddleOCR')
! python3 tools/train.py -c /home/aistudio/SLANet_ch.yml
```
大约在7个epoch后达到最高精度 97.49%
### 2.4 验证
训练完成后,可使用如下命令在测试集上评估最优模型的精度
```python
! python3 tools/eval.py -c /home/aistudio/SLANet_ch.yml -o Global.checkpoints=/home/aistudio/PaddleOCR/output/SLANet_ch/best_accuracy.pdparams
```
### 2.5 训练引擎推理
使用如下命令可使用训练引擎对单张图片进行推理
```python
import os;os.chdir('/home/aistudio/PaddleOCR')
! python3 tools/infer_table.py -c /home/aistudio/SLANet_ch.yml -o Global.checkpoints=/home/aistudio/PaddleOCR/output/SLANet_ch/best_accuracy.pdparams Global.infer_img=/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg
```
```python
import cv2
from matplotlib import pyplot as plt
%matplotlib inline
# 显示原图
show_img = cv2.imread('/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg')
plt.figure(figsize=(15,15))
plt.imshow(show_img)
plt.show()
# 显示预测的单元格
show_img = cv2.imread('/home/aistudio/PaddleOCR/output/infer/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg')
plt.figure(figsize=(15,15))
plt.imshow(show_img)
plt.show()
```
### 2.6 模型导出
使用如下命令可将模型导出为inference模型
```python
! python3 tools/export_model.py -c /home/aistudio/SLANet_ch.yml -o Global.checkpoints=/home/aistudio/PaddleOCR/output/SLANet_ch/best_accuracy.pdparams Global.save_inference_dir=/home/aistudio/SLANet_ch/infer
```
### 2.7 预测引擎推理
使用如下命令可使用预测引擎对单张图片进行推理
```python
os.chdir('/home/aistudio/PaddleOCR/ppstructure')
! python3 table/predict_structure.py \
--table_model_dir=/home/aistudio/SLANet_ch/infer \
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
--image_dir=/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg \
--output=../output/inference
```
```python
# 显示原图
show_img = cv2.imread('/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg')
plt.figure(figsize=(15,15))
plt.imshow(show_img)
plt.show()
# 显示预测的单元格
show_img = cv2.imread('/home/aistudio/PaddleOCR/output/inference/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg')
plt.figure(figsize=(15,15))
plt.imshow(show_img)
plt.show()
```
### 2.8 表格识别
在表格结构模型训练完成后,可结合OCR检测识别模型,对表格内容进行识别。
首先下载PP-OCRv3文字检测识别模型
```python
# 下载PP-OCRv3文本检测识别模型并解压
! wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar --no-check-certificate
! wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar --no-check-certificate
! cd ./inference/ && tar xf ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_rec_slim_infer.tar && cd ../
```
模型下载完成后,使用如下命令进行表格识别
```python
import os;os.chdir('/home/aistudio/PaddleOCR/ppstructure')
! python3 table/predict_table.py \
--det_model_dir=inference/ch_PP-OCRv3_det_slim_infer \
--rec_model_dir=inference/ch_PP-OCRv3_rec_slim_infer \
--table_model_dir=/home/aistudio/SLANet_ch/infer \
--rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt \
--table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt \
--image_dir=/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg \
--output=../output/table
```
```python
# 显示原图
show_img = cv2.imread('/home/aistudio/data/data165849/table_gen_dataset/img/no_border_18298_G7XZH93DDCMATGJQ8RW2.jpg')
plt.figure(figsize=(15,15))
plt.imshow(show_img)
plt.show()
# 显示预测结果
from IPython.core.display import display, HTML
display(HTML('<html><body><table><tr><td colspan="5">alleadersh</td><td rowspan="2">不贰过,推</td><td rowspan="2">从自己参与浙江数</td><td rowspan="2">。另一方</td></tr><tr><td>AnSha</td><td>自己越</td><td>共商共建工作协商</td><td>w.east </td><td>抓好改革试点任务</td></tr><tr><td>Edime</td><td>ImisesElec</td><td>怀天下”。</td><td></td><td>22.26 </td><td>31.61</td><td>4.30 </td><td>794.94</td></tr><tr><td rowspan="2">ip</td><td> Profundi</td><td>:2019年12月1</td><td>Horspro</td><td>444.48</td><td>2.41 </td><td>87</td><td>679.98</td></tr><tr><td> iehaiTrain</td><td>组长蒋蕊</td><td>Toafterdec</td><td>203.43</td><td>23.54 </td><td>4</td><td>4266.62</td></tr><tr><td>Tyint </td><td> roudlyRol</td><td>谢您的好意,我知道</td><td>ErChows</td><td></td><td>48.90</td><td>1031</td><td>6</td></tr><tr><td>NaFlint</td><td></td><td>一辈的</td><td>aterreclam</td><td>7823.86</td><td>9829.23</td><td>7.96 </td><td> 3068</td></tr><tr><td>家上下游企业,5</td><td>Tr</td><td>景象。当地球上的我们</td><td>Urelaw</td><td>799.62</td><td>354.96</td><td>12.98</td><td>33 </td></tr><tr><td>赛事(</td><td> uestCh</td><td>复制的业务模式并</td><td>Listicjust</td><td>9.23</td><td></td><td>92</td><td>53.22</td></tr><tr><td> Ca</td><td> Iskole</td><td>扶贫"之名引导</td><td> Papua </td><td>7191.90</td><td>1.65</td><td>3.62</td><td>48</td></tr><tr><td rowspan="2">避讳</td><td>ir</td><td>但由于</td><td>Fficeof</td><td>0.22</td><td>6.37</td><td>7.17</td><td>3397.75</td></tr><tr><td>ndaTurk</td><td>百处遗址</td><td>gMa</td><td>1288.34</td><td>2053.66</td><td>2.29</td><td>885.45</td></tr></table></body></html>'))
```
## 3. 表格属性识别
### 3.1 代码、环境、数据准备
#### 3.1.1 代码准备
首先,我们需要准备训练表格属性的代码,PaddleClas集成了PULC方案,该方案可以快速获得一个在CPU上用时2ms的属性识别模型。PaddleClas代码可以clone下载得到。获取方式如下:
```python
! git clone -b develop https://gitee.com/paddlepaddle/PaddleClas
```
#### 3.1.2 环境准备
其次,我们需要安装训练PaddleClas相关的依赖包
```python
! pip install -r PaddleClas/requirements.txt --force-reinstall
! pip install protobuf==3.20.0
```
#### 3.1.3 数据准备
最后,准备训练数据。在这里,我们一共定义了表格的6个属性,分别是表格来源、表格数量、表格颜色、表格清晰度、表格有无干扰、表格角度。其可视化如下:
![](https://user-images.githubusercontent.com/45199522/190587903-ccdfa6fb-51e8-42de-b08b-a127cb04e304.png)
这里,我们提供了一个表格属性的demo子集,可以快速迭代体验。下载方式如下:
```python
%cd PaddleClas/dataset
!wget https://paddleclas.bj.bcebos.com/data/PULC/table_attribute.tar
!tar -xf table_attribute.tar
%cd ../PaddleClas/dataset
%cd ../
```
### 3.2 表格属性识别训练
表格属性训练整体pipelinie如下:
![](https://user-images.githubusercontent.com/45199522/190599426-3415b38e-e16e-4e68-9253-2ff531b1b5ca.png)
1.训练过程中,图片经过预处理之后,送入到骨干网络之中,骨干网络将抽取表格图片的特征,最终该特征连接输出的FC层,FC层经过Sigmoid激活函数后和真实标签做交叉熵损失函数,优化器通过对该损失函数做梯度下降来更新骨干网络的参数,经过多轮训练后,骨干网络的参数可以对为止图片做很好的预测;
2.推理过程中,图片经过预处理之后,送入到骨干网络之中,骨干网络加载学习好的权重后对该表格图片做出预测,预测的结果为一个6维向量,该向量中的每个元素反映了每个属性对应的概率值,通过对该值进一步卡阈值之后,得到最终的输出,最终的输出描述了该表格的6个属性。
当准备好相关的数据之后,可以一键启动表格属性的训练,训练代码如下:
```python
!python tools/train.py -c ./ppcls/configs/PULC/table_attribute/PPLCNet_x1_0.yaml -o Global.device=cpu -o Global.epochs=10
```
### 3.3 表格属性识别推理和部署
#### 3.3.1 模型转换
当训练好模型之后,需要将模型转换为推理模型进行部署。转换脚本如下:
```python
!python tools/export_model.py -c ppcls/configs/PULC/table_attribute/PPLCNet_x1_0.yaml -o Global.pretrained_model=output/PPLCNet_x1_0/best_model
```
执行以上命令之后,会在当前目录上生成`inference`文件夹,该文件夹中保存了当前精度最高的推理模型。
#### 3.3.2 模型推理
安装推理需要的paddleclas包, 此时需要通过下载安装paddleclas的develop的whl包
```python
!pip install https://paddleclas.bj.bcebos.com/whl/paddleclas-0.0.0-py3-none-any.whl
```
进入`deploy`目录下即可对模型进行推理
```python
%cd deploy/
```
推理命令如下:
```python
!python python/predict_cls.py -c configs/PULC/table_attribute/inference_table_attribute.yaml -o Global.inference_model_dir="../inference" -o Global.infer_imgs="../dataset/table_attribute/Table_val/val_9.jpg"
!python python/predict_cls.py -c configs/PULC/table_attribute/inference_table_attribute.yaml -o Global.inference_model_dir="../inference" -o Global.infer_imgs="../dataset/table_attribute/Table_val/val_3253.jpg"
```
推理的表格图片:
![](https://user-images.githubusercontent.com/45199522/190596141-74f4feda-b082-46d7-908d-b0bd5839b430.png)
预测结果如下:
```
val_9.jpg: {'attributes': ['Scanned', 'Little', 'Black-and-White', 'Clear', 'Without-Obstacles', 'Horizontal'], 'output': [1, 1, 1, 1, 1, 1]}
```
推理的表格图片:
![](https://user-images.githubusercontent.com/45199522/190597086-2e685200-22d0-4042-9e46-f61f24e02e4e.png)
预测结果如下:
```
val_3253.jpg: {'attributes': ['Photo', 'Little', 'Black-and-White', 'Blurry', 'Without-Obstacles', 'Tilted'], 'output': [0, 1, 1, 0, 1, 0]}
```
对比两张图片可以发现,第一张图片比较清晰,表格属性的结果也偏向于比较容易识别,我们可以更相信表格识别的结果,第二张图片比较模糊,且存在倾斜现象,表格识别可能存在错误,需要我们人工进一步校验。通过表格的属性识别能力,可以进一步将“人工”和“智能”很好的结合起来,为表格识别能力的落地的精度提供保障。
此差异已折叠。
...@@ -30,7 +30,7 @@ cd PaddleOCR ...@@ -30,7 +30,7 @@ cd PaddleOCR
# 安装PaddleOCR的依赖 # 安装PaddleOCR的依赖
pip install -r requirements.txt pip install -r requirements.txt
# 安装关键信息抽取任务的依赖 # 安装关键信息抽取任务的依赖
pip install -r ./ppstructure/vqa/requirements.txt pip install -r ./ppstructure/kie/requirements.txt
``` ```
## 4. 关键信息抽取 ## 4. 关键信息抽取
...@@ -94,7 +94,7 @@ VI-LayoutXLM的配置为[ser_vi_layoutxlm_xfund_zh_udml.yml](../configs/kie/vi_l ...@@ -94,7 +94,7 @@ VI-LayoutXLM的配置为[ser_vi_layoutxlm_xfund_zh_udml.yml](../configs/kie/vi_l
```yml ```yml
Architecture: Architecture:
model_type: &model_type "vqa" model_type: &model_type "kie"
name: DistillationModel name: DistillationModel
algorithm: Distillation algorithm: Distillation
Models: Models:
...@@ -177,7 +177,7 @@ python3 tools/eval.py -c ./fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone. ...@@ -177,7 +177,7 @@ python3 tools/eval.py -c ./fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.
使用下面的命令进行预测。 使用下面的命令进行预测。
```bash ```bash
python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/XFUND/zh_val/val.json Global.infer_mode=False python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/XFUND/zh_val/val.json Global.infer_mode=False
``` ```
预测结果会保存在配置文件中的`Global.save_res_path`目录中。 预测结果会保存在配置文件中的`Global.save_res_path`目录中。
...@@ -195,7 +195,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect ...@@ -195,7 +195,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect
```bash ```bash
python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True
``` ```
结果如下所示。 结果如下所示。
...@@ -211,7 +211,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect ...@@ -211,7 +211,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect
如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型,可以使用下面的方法传入检测与识别的inference 模型路径,即可完成OCR文本检测与识别以及SER的串联过程。 如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型,可以使用下面的方法传入检测与识别的inference 模型路径,即可完成OCR文本检测与识别以及SER的串联过程。
```bash ```bash
python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model" python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
``` ```
### 4.4 关系抽取(Relation Extraction) ### 4.4 关系抽取(Relation Extraction)
...@@ -316,7 +316,7 @@ python3 tools/eval.py -c ./fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.c ...@@ -316,7 +316,7 @@ python3 tools/eval.py -c ./fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.c
# -o 后面的字段是RE任务的配置 # -o 后面的字段是RE任务的配置
# -c_ser 后面的是SER任务的配置文件 # -c_ser 后面的是SER任务的配置文件
# -c_ser 后面的字段是SER任务的配置 # -c_ser 后面的字段是SER任务的配置
python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=False -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_trained/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=False -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_trained/best_accuracy
``` ```
预测结果会保存在配置文件中的`Global.save_res_path`目录中。 预测结果会保存在配置文件中的`Global.save_res_path`目录中。
...@@ -333,11 +333,11 @@ python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Archite ...@@ -333,11 +333,11 @@ python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Archite
如果希望使用OCR引擎结果得到的结果进行推理,则可以使用下面的命令进行推理。 如果希望使用OCR引擎结果得到的结果进行推理,则可以使用下面的命令进行推理。
```bash ```bash
python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy
``` ```
如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型,可以使用下面的方法传入,即可完成SER + RE的串联过程。 如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型,可以使用下面的方法传入,即可完成SER + RE的串联过程。
```bash ```bash
python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model" python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
``` ```
此差异已折叠。
# 金融智能核验:扫描合同关键信息抽取
本案例将使用OCR技术和通用信息抽取技术,实现合同关键信息审核和比对。通过本章的学习,你可以快速掌握:
1. 使用PaddleOCR提取扫描文本内容
2. 使用PaddleNLP抽取自定义信息
点击进入 [AI Studio 项目](https://aistudio.baidu.com/aistudio/projectdetail/4545772)
## 1. 项目背景
合同审核广泛应用于大中型企业、上市公司、证券、基金公司中,是规避风险的重要任务。
- 合同内容对比:合同审核场景中,快速找出不同版本合同修改区域、版本差异;如合同盖章归档场景中有效识别实际签署的纸质合同、电子版合同差异。
- 合规性检查:法务人员进行合同审核,如合同完备性检查、大小写金额检查、签约主体一致性检查、双方权利和义务对等性分析等。
- 风险点识别:通过合同审核可识别事实倾向型风险点和数值计算型风险点等,例如交付地点约定不明、合同总价款不一致、重要条款缺失等风险点。
![](https://ai-studio-static-online.cdn.bcebos.com/d5143df967fa4364a38868793fe7c57b0c0b1213930243babd6ae01423dcbc4d)
传统业务中大多使用人工进行纸质版合同审核,存在成本高,工作量大,效率低的问题,且一旦出错将造成巨额损失。
本项目针对以上场景,使用PaddleOCR+PaddleNLP快速提取文本内容,经过少量数据微调即可准确抽取关键信息,**高效完成合同内容对比、合规性检查、风险点识别等任务,提高效率,降低风险**
![](https://ai-studio-static-online.cdn.bcebos.com/54f3053e6e1b47a39b26e757006fe2c44910d60a3809422ab76c25396b92e69b)
## 2. 解决方案
### 2.1 扫描合同文本内容提取
使用PaddleOCR开源的模型可以快速完成扫描文档的文本内容提取,在清晰文档上识别准确率可达到95%+。下面来快速体验一下:
#### 2.1.1 环境准备
[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)提供了适用于通用场景的高精轻量模型,提供数据预处理-模型推理-后处理全流程,支持pip安装:
```
python -m pip install paddleocr
```
#### 2.1.2 效果测试
使用一张合同图片作为测试样本,感受ppocrv3模型效果:
<img src=https://ai-studio-static-online.cdn.bcebos.com/46258d0dc9dc40bab3ea0e70434e4a905646df8a647f4c49921e217de5142def width=300>
使用中文检测+识别模型提取文本,实例化PaddleOCR类:
```
from paddleocr import PaddleOCR, draw_ocr
# paddleocr目前支持中英文、英文、法语、德语、韩语、日语等80个语种,可以通过修改lang参数进行切换
ocr = PaddleOCR(use_angle_cls=False, lang="ch") # need to run only once to download and load model into memory
```
一行命令启动预测,预测结果包括`检测框``文本识别内容`:
```
img_path = "./test_img/hetong2.jpg"
result = ocr.ocr(img_path, cls=False)
for line in result:
print(line)
# 可视化结果
from PIL import Image
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.show()
```
#### 2.1.3 图片预处理
通过上图可视化结果可以看到,印章部分造成的文本遮盖,影响了文本识别结果,因此可以考虑通道提取,去除图片中的红色印章:
```
import cv2
import numpy as np
import matplotlib.pyplot as plt
#读入图像,三通道
image=cv2.imread("./test_img/hetong2.jpg",cv2.IMREAD_COLOR) #timg.jpeg
#获得三个通道
Bch,Gch,Rch=cv2.split(image)
#保存三通道图片
cv2.imwrite('blue_channel.jpg',Bch)
cv2.imwrite('green_channel.jpg',Gch)
cv2.imwrite('red_channel.jpg',Rch)
```
#### 2.1.4 合同文本信息提取
经过2.1.3的预处理后,合同照片的红色通道被分离,获得了一张相对更干净的图片,此时可以再次使用ppocr模型提取文本内容:
```
import numpy as np
import cv2
img_path = './red_channel.jpg'
result = ocr.ocr(img_path, cls=False)
# 可视化结果
from PIL import Image
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./simfang.ttf')
im_show = Image.fromarray(im_show)
vis = np.array(im_show)
im_show.show()
```
忽略检测框内容,提取完整的合同文本:
```
txts = [line[1][0] for line in result]
all_context = "\n".join(txts)
print(all_context)
```
通过以上环节就完成了扫描合同关键信息抽取的第一步:文本内容提取,接下来可以基于识别出的文本内容抽取关键信息
### 2.2 合同关键信息抽取
#### 2.2.1 环境准备
安装PaddleNLP
```
pip install --upgrade pip
pip install --upgrade paddlenlp
```
#### 2.2.2 合同关键信息抽取
PaddleNLP 使用 Taskflow 统一管理多场景任务的预测功能,其中`information_extraction` 通过大量的有标签样本进行训练,在通用的场景中一般可以直接使用,只需更换关键字即可。例如在合同信息抽取中,我们重新定义抽取关键字:
甲方、乙方、币种、金额、付款方式
将使用OCR提取好的文本作为输入,使用三行命令可以对上文中提取到的合同文本进行关键信息抽取:
```
from paddlenlp import Taskflow
schema = ["甲方","乙方","总价"]
ie = Taskflow('information_extraction', schema=schema)
ie.set_schema(schema)
ie(all_context)
```
可以看到UIE模型可以准确的提取出关键信息,用于后续的信息比对或审核。
## 3.效果优化
### 3.1 文本识别后处理调优
实际图片采集过程中,可能出现部分图片弯曲等问题,导致使用默认参数识别文本时存在漏检,影响关键信息获取。
例如下图:
<img src="https://ai-studio-static-online.cdn.bcebos.com/fe350481be0241c58736d487d1bf06c2e65911bf01254a79944be629c4c10091" height="300" width="300">
直接进行预测:
```
img_path = "./test_img/hetong3.jpg"
# 预测结果
result = ocr.ocr(img_path, cls=False)
# 可视化结果
from PIL import Image
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.show()
```
可视化结果可以看到,弯曲图片存在漏检,一般来说可以通过调整后处理参数解决,无需重新训练模型。漏检问题往往是因为检测模型获得的分割图太小,生成框的得分过低被过滤掉了,通常有两种方式调整参数:
- 开启`use_dilatiion=True` 膨胀分割区域
- 调小`det_db_box_thresh`阈值
```
# 重新实例化 PaddleOCR
ocr = PaddleOCR(use_angle_cls=False, lang="ch", det_db_box_thresh=0.3, use_dilation=True)
# 预测并可视化
img_path = "./test_img/hetong3.jpg"
# 预测结果
result = ocr.ocr(img_path, cls=False)
# 可视化结果
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.show()
```
可以看到漏检问题被很好的解决,提取完整的文本内容:
```
txts = [line[1][0] for line in result]
context = "\n".join(txts)
print(context)
```
### 3.2 关键信息提取调优
UIE通过大量有标签样本进行训练,得到了一个开箱即用的高精模型。 然而针对不同场景,可能会出现部分实体无法被抽取的情况。通常来说有以下几个方法进行效果调优:
- 修改 schema
- 添加正则方法
- 标注小样本微调模型
**修改schema**
Prompt和原文描述越像,抽取效果越好,例如
```
三:合同价格:总价为人民币大写:参拾玖万捌仟伍佰
元,小写:398500.00元。总价中包括站房工程建设、安装
及相关避雷、消防、接地、电力、材料费、检验费、安全、
验收等所需费用及其他相关费用和税金。
```
schema = ["总金额"] 时无法准确抽取,与原文描述差异较大。 修改 schema = ["总价"] 再次尝试:
```
from paddlenlp import Taskflow
# schema = ["总金额"]
schema = ["总价"]
ie = Taskflow('information_extraction', schema=schema)
ie.set_schema(schema)
ie(all_context)
```
**模型微调**
UIE的建模方式主要是通过 `Prompt` 方式来建模, `Prompt` 在小样本上进行微调效果非常有效。详细的数据标注+模型微调步骤可以参考项目:
[PaddleNLP信息抽取技术重磅升级!](https://aistudio.baidu.com/aistudio/projectdetail/3914778?channelType=0&channel=0)
[工单信息抽取](https://aistudio.baidu.com/aistudio/projectdetail/3914778?contributionType=1)
[快递单信息抽取](https://aistudio.baidu.com/aistudio/projectdetail/4038499?contributionType=1)
## 总结
扫描合同的关键信息提取可以使用 PaddleOCR + PaddleNLP 组合实现,两个工具均有以下优势:
* 使用简单:whl包一键安装,3行命令调用
* 效果领先:优秀的模型效果可覆盖几乎全部的应用场景
* 调优成本低:OCR模型可通过后处理参数的调整适配略有偏差的扫描文本, UIE模型可以通过极少的标注样本微调,成本很低。
## 作业
尝试自己解析出 `test_img/homework.png` 扫描合同中的 [甲方、乙方] 关键词:
<img src=https://ai-studio-static-online.cdn.bcebos.com/50a49a3c9f8348bfa04e8c8b97d3cce0d0dd6b14040f43939268d120688ef7ca width=300 hight=400>
更多场景下的垂类模型获取,请扫下图二维码填写问卷,加入PaddleOCR官方交流群获取模型下载链接、《动手学OCR》电子书等全套OCR学习资料🎁
<img src=https://ai-studio-static-online.cdn.bcebos.com/606538b59ea845cb99943b1dec6efe724e78f75c1e9c49228c7bf7da9f8837f5 width=300 hight=300>
Global:
use_gpu: true
epoch_num: 600
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/det_ct/
save_epoch_step: 10
# evaluation is run every 2000 iterations
eval_batch_step: [0,1000]
cal_metric_during_train: False
pretrained_model: ./pretrain_models/ResNet18_vd_pretrained.pdparams
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_en/img623.jpg
save_res_path: ./output/det_ct/predicts_ct.txt
Architecture:
model_type: det
algorithm: CT
Transform:
Backbone:
name: ResNet_vd
layers: 18
Neck:
name: CTFPN
Head:
name: CT_Head
in_channels: 512
hidden_dim: 128
num_classes: 3
Loss:
name: CTLoss
Optimizer:
name: Adam
lr: #PolynomialDecay
name: Linear
learning_rate: 0.001
end_lr: 0.
epochs: 600
step_each_epoch: 1254
power: 0.9
PostProcess:
name: CTPostProcess
box_type: poly
Metric:
name: CTMetric
main_indicator: f_score
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/total_text/train
label_file_list:
- ./train_data/total_text/train/train.txt
ratio_list: [1.0]
transforms:
- DecodeImage:
img_mode: RGB
channel_first: False
- CTLabelEncode: # Class handling label
- RandomScale:
- MakeShrink:
- GroupRandomHorizontalFlip:
- GroupRandomRotate:
- GroupRandomCropPadding:
- MakeCentripetalShift:
- ColorJitter:
brightness: 0.125
saturation: 0.5
- ToCHWImage:
- NormalizeImage:
- KeepKeys:
keep_keys: ['image', 'gt_kernel', 'training_mask', 'gt_instance', 'gt_kernel_instance', 'training_mask_distance', 'gt_distance'] # the order of the dataloader list
loader:
shuffle: True
drop_last: True
batch_size_per_card: 4
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/total_text/test
label_file_list:
- ./train_data/total_text/test/test.txt
ratio_list: [1.0]
transforms:
- DecodeImage:
img_mode: RGB
channel_first: False
- CTLabelEncode: # Class handling label
- ScaleAlignedShort:
- NormalizeImage:
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: ['image', 'shape', 'polys', 'texts'] # the order of the dataloader list
loader:
shuffle: False
drop_last: False
batch_size_per_card: 1
num_workers: 2
...@@ -13,6 +13,7 @@ Global: ...@@ -13,6 +13,7 @@ Global:
save_inference_dir: save_inference_dir:
use_visualdl: False use_visualdl: False
infer_img: infer_img:
infer_visual_type: EN # two mode: EN is for english datasets, CN is for chinese datasets
valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words
save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt
character_dict_path: ppocr/utils/ic15_dict.txt character_dict_path: ppocr/utils/ic15_dict.txt
...@@ -32,6 +33,7 @@ Architecture: ...@@ -32,6 +33,7 @@ Architecture:
name: PGFPN name: PGFPN
Head: Head:
name: PGHead name: PGHead
character_dict_path: ppocr/utils/ic15_dict.txt # the same as Global:character_dict_path
Loss: Loss:
name: PGLoss name: PGLoss
...@@ -45,16 +47,18 @@ Optimizer: ...@@ -45,16 +47,18 @@ Optimizer:
beta1: 0.9 beta1: 0.9
beta2: 0.999 beta2: 0.999
lr: lr:
name: Cosine
learning_rate: 0.001 learning_rate: 0.001
warmup_epoch: 50
regularizer: regularizer:
name: 'L2' name: 'L2'
factor: 0 factor: 0.0001
PostProcess: PostProcess:
name: PGPostProcess name: PGPostProcess
score_thresh: 0.5 score_thresh: 0.5
mode: fast # fast or slow two ways mode: fast # fast or slow two ways
point_gather_mode: align # same as PGProcessTrain: point_gather_mode
Metric: Metric:
name: E2EMetric name: E2EMetric
...@@ -76,9 +80,12 @@ Train: ...@@ -76,9 +80,12 @@ Train:
- E2ELabelEncodeTrain: - E2ELabelEncodeTrain:
- PGProcessTrain: - PGProcessTrain:
batch_size: 14 # same as loader: batch_size_per_card batch_size: 14 # same as loader: batch_size_per_card
use_resize: True
use_random_crop: False
min_crop_size: 24 min_crop_size: 24
min_text_size: 4 min_text_size: 4
max_text_size: 512 max_text_size: 512
point_gather_mode: align # two mode: align and none, align mode is better than none mode
- KeepKeys: - KeepKeys:
keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
loader: loader:
......
...@@ -68,6 +68,7 @@ Train: ...@@ -68,6 +68,7 @@ Train:
- VQAReTokenRelation: - VQAReTokenRelation:
- VQAReTokenChunk: - VQAReTokenChunk:
max_seq_len: *max_seq_len max_seq_len: *max_seq_len
- TensorizeEntitiesRelations:
- Resize: - Resize:
size: [224,224] size: [224,224]
- NormalizeImage: - NormalizeImage:
...@@ -83,7 +84,6 @@ Train: ...@@ -83,7 +84,6 @@ Train:
drop_last: False drop_last: False
batch_size_per_card: 2 batch_size_per_card: 2
num_workers: 8 num_workers: 8
collate_fn: ListCollator
Eval: Eval:
dataset: dataset:
...@@ -105,6 +105,7 @@ Eval: ...@@ -105,6 +105,7 @@ Eval:
- VQAReTokenRelation: - VQAReTokenRelation:
- VQAReTokenChunk: - VQAReTokenChunk:
max_seq_len: *max_seq_len max_seq_len: *max_seq_len
- TensorizeEntitiesRelations:
- Resize: - Resize:
size: [224,224] size: [224,224]
- NormalizeImage: - NormalizeImage:
...@@ -120,4 +121,3 @@ Eval: ...@@ -120,4 +121,3 @@ Eval:
drop_last: False drop_last: False
batch_size_per_card: 8 batch_size_per_card: 8
num_workers: 8 num_workers: 8
collate_fn: ListCollator
...@@ -73,6 +73,7 @@ Train: ...@@ -73,6 +73,7 @@ Train:
- VQAReTokenRelation: - VQAReTokenRelation:
- VQAReTokenChunk: - VQAReTokenChunk:
max_seq_len: *max_seq_len max_seq_len: *max_seq_len
- TensorizeEntitiesRelations:
- Resize: - Resize:
size: [224,224] size: [224,224]
- NormalizeImage: - NormalizeImage:
...@@ -82,13 +83,12 @@ Train: ...@@ -82,13 +83,12 @@ Train:
order: 'hwc' order: 'hwc'
- ToCHWImage: - ToCHWImage:
- KeepKeys: - KeepKeys:
keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'entities', 'relations'] # dataloader will return list in this order
loader: loader:
shuffle: True shuffle: True
drop_last: False drop_last: False
batch_size_per_card: 2 batch_size_per_card: 2
num_workers: 4 num_workers: 4
collate_fn: ListCollator
Eval: Eval:
dataset: dataset:
...@@ -112,6 +112,7 @@ Eval: ...@@ -112,6 +112,7 @@ Eval:
- VQAReTokenRelation: - VQAReTokenRelation:
- VQAReTokenChunk: - VQAReTokenChunk:
max_seq_len: *max_seq_len max_seq_len: *max_seq_len
- TensorizeEntitiesRelations:
- Resize: - Resize:
size: [224,224] size: [224,224]
- NormalizeImage: - NormalizeImage:
...@@ -121,11 +122,9 @@ Eval: ...@@ -121,11 +122,9 @@ Eval:
order: 'hwc' order: 'hwc'
- ToCHWImage: - ToCHWImage:
- KeepKeys: - KeepKeys:
keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'entities', 'relations'] # dataloader will return list in this order
loader: loader:
shuffle: False shuffle: False
drop_last: False drop_last: False
batch_size_per_card: 8 batch_size_per_card: 8
num_workers: 8 num_workers: 8
collate_fn: ListCollator
...@@ -57,14 +57,16 @@ Loss: ...@@ -57,14 +57,16 @@ Loss:
mode: "l2" mode: "l2"
model_name_pairs: model_name_pairs:
- ["Student", "Teacher"] - ["Student", "Teacher"]
key: hidden_states_5 key: hidden_states
index: 5
name: "loss_5" name: "loss_5"
- DistillationVQADistanceLoss: - DistillationVQADistanceLoss:
weight: 0.5 weight: 0.5
mode: "l2" mode: "l2"
model_name_pairs: model_name_pairs:
- ["Student", "Teacher"] - ["Student", "Teacher"]
key: hidden_states_8 key: hidden_states
index: 8
name: "loss_8" name: "loss_8"
...@@ -116,6 +118,7 @@ Train: ...@@ -116,6 +118,7 @@ Train:
- VQAReTokenRelation: - VQAReTokenRelation:
- VQAReTokenChunk: - VQAReTokenChunk:
max_seq_len: *max_seq_len max_seq_len: *max_seq_len
- TensorizeEntitiesRelations:
- Resize: - Resize:
size: [224,224] size: [224,224]
- NormalizeImage: - NormalizeImage:
...@@ -125,13 +128,12 @@ Train: ...@@ -125,13 +128,12 @@ Train:
order: 'hwc' order: 'hwc'
- ToCHWImage: - ToCHWImage:
- KeepKeys: - KeepKeys:
keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order keep_keys: [ 'input_ids', 'bbox','attention_mask', 'token_type_ids', 'entities', 'relations'] # dataloader will return list in this order
loader: loader:
shuffle: True shuffle: True
drop_last: False drop_last: False
batch_size_per_card: 2 batch_size_per_card: 2
num_workers: 4 num_workers: 4
collate_fn: ListCollator
Eval: Eval:
dataset: dataset:
...@@ -155,6 +157,7 @@ Eval: ...@@ -155,6 +157,7 @@ Eval:
- VQAReTokenRelation: - VQAReTokenRelation:
- VQAReTokenChunk: - VQAReTokenChunk:
max_seq_len: *max_seq_len max_seq_len: *max_seq_len
- TensorizeEntitiesRelations:
- Resize: - Resize:
size: [224,224] size: [224,224]
- NormalizeImage: - NormalizeImage:
...@@ -164,12 +167,11 @@ Eval: ...@@ -164,12 +167,11 @@ Eval:
order: 'hwc' order: 'hwc'
- ToCHWImage: - ToCHWImage:
- KeepKeys: - KeepKeys:
keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'entities', 'relations'] # dataloader will return list in this order keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'entities', 'relations'] # dataloader will return list in this order
loader: loader:
shuffle: False shuffle: False
drop_last: False drop_last: False
batch_size_per_card: 8 batch_size_per_card: 8
num_workers: 8 num_workers: 8
collate_fn: ListCollator
...@@ -70,14 +70,16 @@ Loss: ...@@ -70,14 +70,16 @@ Loss:
mode: "l2" mode: "l2"
model_name_pairs: model_name_pairs:
- ["Student", "Teacher"] - ["Student", "Teacher"]
key: hidden_states_5 key: hidden_states
index: 5
name: "loss_5" name: "loss_5"
- DistillationVQADistanceLoss: - DistillationVQADistanceLoss:
weight: 0.5 weight: 0.5
mode: "l2" mode: "l2"
model_name_pairs: model_name_pairs:
- ["Student", "Teacher"] - ["Student", "Teacher"]
key: hidden_states_8 key: hidden_states
index: 8
name: "loss_8" name: "loss_8"
......
...@@ -88,6 +88,7 @@ Train: ...@@ -88,6 +88,7 @@ Train:
prob: 0.5 prob: 0.5
ext_data_num: 2 ext_data_num: 2
image_shape: [48, 320, 3] image_shape: [48, 320, 3]
max_text_length: *max_text_length
- RecAug: - RecAug:
- MultiLabelEncode: - MultiLabelEncode:
- RecResizeImg: - RecResizeImg:
......
...@@ -162,6 +162,7 @@ Train: ...@@ -162,6 +162,7 @@ Train:
prob: 0.5 prob: 0.5
ext_data_num: 2 ext_data_num: 2
image_shape: [48, 320, 3] image_shape: [48, 320, 3]
max_text_length: *max_text_length
- RecAug: - RecAug:
- MultiLabelEncode: - MultiLabelEncode:
- RecResizeImg: - RecResizeImg:
......
...@@ -88,6 +88,7 @@ Train: ...@@ -88,6 +88,7 @@ Train:
prob: 0.5 prob: 0.5
ext_data_num: 2 ext_data_num: 2
image_shape: [48, 320, 3] image_shape: [48, 320, 3]
max_text_length: *max_text_length
- RecAug: - RecAug:
- MultiLabelEncode: - MultiLabelEncode:
- RecResizeImg: - RecResizeImg:
......
...@@ -12,7 +12,7 @@ Global: ...@@ -12,7 +12,7 @@ Global:
checkpoints: checkpoints:
save_inference_dir: save_inference_dir:
use_visualdl: False use_visualdl: False
infer_img: ./inference/rec_inference infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict90.txt character_dict_path: ppocr/utils/dict90.txt
max_text_length: &max_text_length 40 max_text_length: &max_text_length 40
......
...@@ -12,7 +12,7 @@ Global: ...@@ -12,7 +12,7 @@ Global:
checkpoints: checkpoints:
save_inference_dir: save_inference_dir:
use_visualdl: False use_visualdl: False
infer_img: doc/imgs_words/ch/word_1.jpg infer_img: doc/imgs_words_en/word_10.png
# for data or label process # for data or label process
character_dict_path: ./ppocr/utils/dict/spin_dict.txt character_dict_path: ./ppocr/utils/dict/spin_dict.txt
max_text_length: 25 max_text_length: 25
......
...@@ -12,7 +12,7 @@ Global: ...@@ -12,7 +12,7 @@ Global:
checkpoints: checkpoints:
save_inference_dir: ./output/SLANet/infer save_inference_dir: ./output/SLANet/infer
use_visualdl: False use_visualdl: False
infer_img: doc/table/table.jpg infer_img: ppstructure/docs/table/table.jpg
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/table_structure_dict.txt character_dict_path: ppocr/utils/dict/table_structure_dict.txt
character_type: en character_type: en
......
...@@ -12,7 +12,7 @@ Global: ...@@ -12,7 +12,7 @@ Global:
checkpoints: checkpoints:
save_inference_dir: ./output/SLANet_ch/infer save_inference_dir: ./output/SLANet_ch/infer
use_visualdl: False use_visualdl: False
infer_img: doc/table/table.jpg infer_img: ppstructure/docs/table/table.jpg
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
character_type: en character_type: en
......
...@@ -43,7 +43,6 @@ Architecture: ...@@ -43,7 +43,6 @@ Architecture:
Head: Head:
name: TableAttentionHead name: TableAttentionHead
hidden_size: 256 hidden_size: 256
loc_type: 2
max_text_length: *max_text_length max_text_length: *max_text_length
loc_reg_num: &loc_reg_num 4 loc_reg_num: &loc_reg_num 4
......
...@@ -49,13 +49,20 @@ DECLARE_int32(rec_batch_num); ...@@ -49,13 +49,20 @@ DECLARE_int32(rec_batch_num);
DECLARE_string(rec_char_dict_path); DECLARE_string(rec_char_dict_path);
DECLARE_int32(rec_img_h); DECLARE_int32(rec_img_h);
DECLARE_int32(rec_img_w); DECLARE_int32(rec_img_w);
// layout model related
DECLARE_string(layout_model_dir);
DECLARE_string(layout_dict_path);
DECLARE_double(layout_score_threshold);
DECLARE_double(layout_nms_threshold);
// structure model related // structure model related
DECLARE_string(table_model_dir); DECLARE_string(table_model_dir);
DECLARE_int32(table_max_len); DECLARE_int32(table_max_len);
DECLARE_int32(table_batch_num); DECLARE_int32(table_batch_num);
DECLARE_string(table_char_dict_path); DECLARE_string(table_char_dict_path);
DECLARE_bool(merge_no_span_structure);
// forward related // forward related
DECLARE_bool(det); DECLARE_bool(det);
DECLARE_bool(rec); DECLARE_bool(rec);
DECLARE_bool(cls); DECLARE_bool(cls);
DECLARE_bool(table); DECLARE_bool(table);
DECLARE_bool(layout);
\ No newline at end of file
...@@ -14,26 +14,12 @@ ...@@ -14,26 +14,12 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" #include "paddle_api.h"
#include "paddle_inference_api.h" #include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/preprocess_op.h> #include <include/preprocess_op.h>
#include <include/utility.h> #include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
class Classifier { class Classifier {
...@@ -66,7 +52,7 @@ public: ...@@ -66,7 +52,7 @@ public:
std::vector<float> &cls_scores, std::vector<double> &times); std::vector<float> &cls_scores, std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<paddle_infer::Predictor> predictor_;
bool use_gpu_ = false; bool use_gpu_ = false;
int gpu_id_ = 0; int gpu_id_ = 0;
......
...@@ -14,26 +14,12 @@ ...@@ -14,26 +14,12 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" #include "paddle_api.h"
#include "paddle_inference_api.h" #include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/postprocess_op.h> #include <include/postprocess_op.h>
#include <include/preprocess_op.h> #include <include/preprocess_op.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
class DBDetector { class DBDetector {
...@@ -41,7 +27,7 @@ public: ...@@ -41,7 +27,7 @@ public:
explicit DBDetector(const std::string &model_dir, const bool &use_gpu, explicit DBDetector(const std::string &model_dir, const bool &use_gpu,
const int &gpu_id, const int &gpu_mem, const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const string &limit_type, const bool &use_mkldnn, const std::string &limit_type,
const int &limit_side_len, const double &det_db_thresh, const int &limit_side_len, const double &det_db_thresh,
const double &det_db_box_thresh, const double &det_db_box_thresh,
const double &det_db_unclip_ratio, const double &det_db_unclip_ratio,
...@@ -77,7 +63,7 @@ public: ...@@ -77,7 +63,7 @@ public:
std::vector<double> &times); std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<paddle_infer::Predictor> predictor_;
bool use_gpu_ = false; bool use_gpu_ = false;
int gpu_id_ = 0; int gpu_id_ = 0;
...@@ -85,7 +71,7 @@ private: ...@@ -85,7 +71,7 @@ private:
int cpu_math_library_num_threads_ = 4; int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false; bool use_mkldnn_ = false;
string limit_type_ = "max"; std::string limit_type_ = "max";
int limit_side_len_ = 960; int limit_side_len_ = 960;
double det_db_thresh_ = 0.3; double det_db_thresh_ = 0.3;
......
...@@ -14,27 +14,12 @@ ...@@ -14,27 +14,12 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" #include "paddle_api.h"
#include "paddle_inference_api.h" #include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/ocr_cls.h> #include <include/ocr_cls.h>
#include <include/preprocess_op.h>
#include <include/utility.h> #include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
class CRNNRecognizer { class CRNNRecognizer {
...@@ -42,7 +27,7 @@ public: ...@@ -42,7 +27,7 @@ public:
explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu, explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu,
const int &gpu_id, const int &gpu_mem, const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const string &label_path, const bool &use_mkldnn, const std::string &label_path,
const bool &use_tensorrt, const bool &use_tensorrt,
const std::string &precision, const std::string &precision,
const int &rec_batch_num, const int &rec_img_h, const int &rec_batch_num, const int &rec_img_h,
...@@ -75,7 +60,7 @@ public: ...@@ -75,7 +60,7 @@ public:
std::vector<float> &rec_text_scores, std::vector<double> &times); std::vector<float> &rec_text_scores, std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<paddle_infer::Predictor> predictor_;
bool use_gpu_ = false; bool use_gpu_ = false;
int gpu_id_ = 0; int gpu_id_ = 0;
......
...@@ -14,28 +14,9 @@ ...@@ -14,28 +14,9 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/ocr_cls.h> #include <include/ocr_cls.h>
#include <include/ocr_det.h> #include <include/ocr_det.h>
#include <include/ocr_rec.h> #include <include/ocr_rec.h>
#include <include/preprocess_op.h>
#include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
...@@ -43,21 +24,27 @@ class PPOCR { ...@@ -43,21 +24,27 @@ class PPOCR {
public: public:
explicit PPOCR(); explicit PPOCR();
~PPOCR(); ~PPOCR();
std::vector<std::vector<OCRPredictResult>>
ocr(std::vector<cv::String> cv_all_img_names, bool det = true, std::vector<std::vector<OCRPredictResult>> ocr(std::vector<cv::Mat> img_list,
bool det = true,
bool rec = true,
bool cls = true);
std::vector<OCRPredictResult> ocr(cv::Mat img, bool det = true,
bool rec = true, bool cls = true); bool rec = true, bool cls = true);
void reset_timer();
void benchmark_log(int img_num);
protected: protected:
void det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results, std::vector<double> time_info_det = {0, 0, 0};
std::vector<double> &times); std::vector<double> time_info_rec = {0, 0, 0};
std::vector<double> time_info_cls = {0, 0, 0};
void det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results);
void rec(std::vector<cv::Mat> img_list, void rec(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results, std::vector<OCRPredictResult> &ocr_results);
std::vector<double> &times);
void cls(std::vector<cv::Mat> img_list, void cls(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results, std::vector<OCRPredictResult> &ocr_results);
std::vector<double> &times);
void log(std::vector<double> &det_times, std::vector<double> &rec_times,
std::vector<double> &cls_times, int img_num);
private: private:
DBDetector *detector_ = nullptr; DBDetector *detector_ = nullptr;
......
...@@ -14,27 +14,9 @@ ...@@ -14,27 +14,9 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/paddleocr.h> #include <include/paddleocr.h>
#include <include/preprocess_op.h> #include <include/structure_layout.h>
#include <include/structure_table.h> #include <include/structure_table.h>
#include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
...@@ -42,27 +24,32 @@ class PaddleStructure : public PPOCR { ...@@ -42,27 +24,32 @@ class PaddleStructure : public PPOCR {
public: public:
explicit PaddleStructure(); explicit PaddleStructure();
~PaddleStructure(); ~PaddleStructure();
std::vector<std::vector<StructurePredictResult>>
structure(std::vector<cv::String> cv_all_img_names, bool layout = false, std::vector<StructurePredictResult> structure(cv::Mat img,
bool table = true); bool layout = false,
bool table = true,
bool ocr = false);
void reset_timer();
void benchmark_log(int img_num);
private: private:
StructureTableRecognizer *recognizer_ = nullptr; std::vector<double> time_info_table = {0, 0, 0};
std::vector<double> time_info_layout = {0, 0, 0};
StructureTableRecognizer *table_model_ = nullptr;
StructureLayoutRecognizer *layout_model_ = nullptr;
void layout(cv::Mat img,
std::vector<StructurePredictResult> &structure_result);
void table(cv::Mat img, StructurePredictResult &structure_result);
void table(cv::Mat img, StructurePredictResult &structure_result, std::string rebuild_table(std::vector<std::string> rec_html_tags,
std::vector<double> &time_info_table, std::vector<std::vector<int>> rec_boxes,
std::vector<double> &time_info_det,
std::vector<double> &time_info_rec,
std::vector<double> &time_info_cls);
std::string
rebuild_table(std::vector<std::string> rec_html_tags,
std::vector<std::vector<std::vector<int>>> rec_boxes,
std::vector<OCRPredictResult> &ocr_result); std::vector<OCRPredictResult> &ocr_result);
float iou(std::vector<std::vector<int>> &box1, float dis(std::vector<int> &box1, std::vector<int> &box2);
std::vector<std::vector<int>> &box2);
float dis(std::vector<std::vector<int>> &box1,
std::vector<std::vector<int>> &box2);
static bool comparison_dis(const std::vector<float> &dis1, static bool comparison_dis(const std::vector<float> &dis1,
const std::vector<float> &dis2) { const std::vector<float> &dis2) {
......
...@@ -14,24 +14,9 @@ ...@@ -14,24 +14,9 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include "include/clipper.h" #include "include/clipper.h"
#include "include/utility.h" #include "include/utility.h"
using namespace std;
namespace PaddleOCR { namespace PaddleOCR {
class DBPostProcessor { class DBPostProcessor {
...@@ -92,13 +77,12 @@ private: ...@@ -92,13 +77,12 @@ private:
class TablePostProcessor { class TablePostProcessor {
public: public:
void init(std::string label_path); void init(std::string label_path, bool merge_no_span_structure = true);
void void Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs,
Run(std::vector<float> &loc_preds, std::vector<float> &structure_probs,
std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape, std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
std::vector<int> &structure_probs_shape, std::vector<int> &structure_probs_shape,
std::vector<std::vector<std::string>> &rec_html_tag_batch, std::vector<std::vector<std::string>> &rec_html_tag_batch,
std::vector<std::vector<std::vector<std::vector<int>>>> &rec_boxes_batch, std::vector<std::vector<std::vector<int>>> &rec_boxes_batch,
std::vector<int> &width_list, std::vector<int> &height_list); std::vector<int> &width_list, std::vector<int> &height_list);
private: private:
...@@ -107,4 +91,27 @@ private: ...@@ -107,4 +91,27 @@ private:
std::string beg = "sos"; std::string beg = "sos";
}; };
class PicodetPostProcessor {
public:
void init(std::string label_path, const double score_threshold = 0.4,
const double nms_threshold = 0.5,
const std::vector<int> &fpn_stride = {8, 16, 32, 64});
void Run(std::vector<StructurePredictResult> &results,
std::vector<std::vector<float>> outs, std::vector<int> ori_shape,
std::vector<int> resize_shape, int eg_max);
std::vector<int> fpn_stride_ = {8, 16, 32, 64};
private:
StructurePredictResult disPred2Bbox(std::vector<float> bbox_pred, int label,
float score, int x, int y, int stride,
std::vector<int> im_shape, int reg_max);
void nms(std::vector<StructurePredictResult> &input_boxes,
float nms_threshold);
std::vector<std::string> label_list_;
double score_threshold_ = 0.4;
double nms_threshold_ = 0.5;
int num_class_ = 5;
};
} // namespace PaddleOCR } // namespace PaddleOCR
...@@ -14,21 +14,12 @@ ...@@ -14,21 +14,12 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream> #include <iostream>
#include <ostream>
#include <vector> #include <vector>
#include <cstring> #include "opencv2/core.hpp"
#include <fstream> #include "opencv2/imgcodecs.hpp"
#include <numeric> #include "opencv2/imgproc.hpp"
using namespace std;
using namespace paddle;
namespace PaddleOCR { namespace PaddleOCR {
...@@ -51,9 +42,9 @@ public: ...@@ -51,9 +42,9 @@ public:
class ResizeImgType0 { class ResizeImgType0 {
public: public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, string limit_type, virtual void Run(const cv::Mat &img, cv::Mat &resize_img,
int limit_side_len, float &ratio_h, float &ratio_w, std::string limit_type, int limit_side_len, float &ratio_h,
bool use_tensorrt); float &ratio_w, bool use_tensorrt);
}; };
class CrnnResizeImg { class CrnnResizeImg {
...@@ -82,4 +73,10 @@ public: ...@@ -82,4 +73,10 @@ public:
const int max_len = 488); const int max_len = 488);
}; };
class Resize {
public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, const int h,
const int w);
};
} // namespace PaddleOCR } // namespace PaddleOCR
\ No newline at end of file
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <include/postprocess_op.h>
#include <include/preprocess_op.h>
namespace PaddleOCR {
class StructureLayoutRecognizer {
public:
explicit StructureLayoutRecognizer(
const std::string &model_dir, const bool &use_gpu, const int &gpu_id,
const int &gpu_mem, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const std::string &label_path,
const bool &use_tensorrt, const std::string &precision,
const double &layout_score_threshold,
const double &layout_nms_threshold) {
this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem;
this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
this->use_mkldnn_ = use_mkldnn;
this->use_tensorrt_ = use_tensorrt;
this->precision_ = precision;
this->post_processor_.init(label_path, layout_score_threshold,
layout_nms_threshold);
LoadModel(model_dir);
}
// Load Paddle inference model
void LoadModel(const std::string &model_dir);
void Run(cv::Mat img, std::vector<StructurePredictResult> &result,
std::vector<double> &times);
private:
std::shared_ptr<paddle_infer::Predictor> predictor_;
bool use_gpu_ = false;
int gpu_id_ = 0;
int gpu_mem_ = 4000;
int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false;
std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
std::vector<float> scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
bool is_scale_ = true;
bool use_tensorrt_ = false;
std::string precision_ = "fp32";
// pre-process
Resize resize_op_;
Normalize normalize_op_;
Permute permute_op_;
// post-process
PicodetPostProcessor post_processor_;
};
} // namespace PaddleOCR
\ No newline at end of file
...@@ -14,26 +14,11 @@ ...@@ -14,26 +14,11 @@
#pragma once #pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" #include "paddle_api.h"
#include "paddle_inference_api.h" #include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/postprocess_op.h> #include <include/postprocess_op.h>
#include <include/preprocess_op.h> #include <include/preprocess_op.h>
#include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR { namespace PaddleOCR {
...@@ -42,9 +27,10 @@ public: ...@@ -42,9 +27,10 @@ public:
explicit StructureTableRecognizer( explicit StructureTableRecognizer(
const std::string &model_dir, const bool &use_gpu, const int &gpu_id, const std::string &model_dir, const bool &use_gpu, const int &gpu_id,
const int &gpu_mem, const int &cpu_math_library_num_threads, const int &gpu_mem, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const string &label_path, const bool &use_mkldnn, const std::string &label_path,
const bool &use_tensorrt, const std::string &precision, const bool &use_tensorrt, const std::string &precision,
const int &table_batch_num, const int &table_max_len) { const int &table_batch_num, const int &table_max_len,
const bool &merge_no_span_structure) {
this->use_gpu_ = use_gpu; this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id; this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem; this->gpu_mem_ = gpu_mem;
...@@ -55,7 +41,7 @@ public: ...@@ -55,7 +41,7 @@ public:
this->table_batch_num_ = table_batch_num; this->table_batch_num_ = table_batch_num;
this->table_max_len_ = table_max_len; this->table_max_len_ = table_max_len;
this->post_processor_.init(label_path); this->post_processor_.init(label_path, merge_no_span_structure);
LoadModel(model_dir); LoadModel(model_dir);
} }
...@@ -65,11 +51,11 @@ public: ...@@ -65,11 +51,11 @@ public:
void Run(std::vector<cv::Mat> img_list, void Run(std::vector<cv::Mat> img_list,
std::vector<std::vector<std::string>> &rec_html_tags, std::vector<std::vector<std::string>> &rec_html_tags,
std::vector<float> &rec_scores, std::vector<float> &rec_scores,
std::vector<std::vector<std::vector<std::vector<int>>>> &rec_boxes, std::vector<std::vector<std::vector<int>>> &rec_boxes,
std::vector<double> &times); std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<paddle_infer::Predictor> predictor_;
bool use_gpu_ = false; bool use_gpu_ = false;
int gpu_id_ = 0; int gpu_id_ = 0;
......
...@@ -41,11 +41,13 @@ struct OCRPredictResult { ...@@ -41,11 +41,13 @@ struct OCRPredictResult {
}; };
struct StructurePredictResult { struct StructurePredictResult {
std::vector<int> box; std::vector<float> box;
std::vector<std::vector<int>> cell_box;
std::string type; std::string type;
std::vector<OCRPredictResult> text_res; std::vector<OCRPredictResult> text_res;
std::string html; std::string html;
float html_score = -1; float html_score = -1;
float confidence;
}; };
class Utility { class Utility {
...@@ -56,6 +58,10 @@ public: ...@@ -56,6 +58,10 @@ public:
const std::vector<OCRPredictResult> &ocr_result, const std::vector<OCRPredictResult> &ocr_result,
const std::string &save_path); const std::string &save_path);
static void VisualizeBboxes(const cv::Mat &srcimg,
const StructurePredictResult &structure_result,
const std::string &save_path);
template <class ForwardIterator> template <class ForwardIterator>
inline static size_t argmax(ForwardIterator first, ForwardIterator last) { inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
return std::distance(first, std::max_element(first, last)); return std::distance(first, std::max_element(first, last));
...@@ -77,10 +83,20 @@ public: ...@@ -77,10 +83,20 @@ public:
static void print_result(const std::vector<OCRPredictResult> &ocr_result); static void print_result(const std::vector<OCRPredictResult> &ocr_result);
static cv::Mat crop_image(cv::Mat &img, std::vector<int> &area); static cv::Mat crop_image(cv::Mat &img, const std::vector<int> &area);
static cv::Mat crop_image(cv::Mat &img, const std::vector<float> &area);
static void sorted_boxes(std::vector<OCRPredictResult> &ocr_result); static void sorted_boxes(std::vector<OCRPredictResult> &ocr_result);
static std::vector<int> xyxyxyxy2xyxy(std::vector<std::vector<int>> &box);
static std::vector<int> xyxyxyxy2xyxy(std::vector<int> &box);
static float fast_exp(float x);
static std::vector<float>
activation_function_softmax(std::vector<float> &src);
static float iou(std::vector<int> &box1, std::vector<int> &box2);
static float iou(std::vector<float> &box1, std::vector<float> &box2);
private: private:
static bool comparison_box(const OCRPredictResult &result1, static bool comparison_box(const OCRPredictResult &result1,
const OCRPredictResult &result2) { const OCRPredictResult &result2) {
......
...@@ -174,6 +174,9 @@ inference/ ...@@ -174,6 +174,9 @@ inference/
|-- table |-- table
| |--inference.pdiparams | |--inference.pdiparams
| |--inference.pdmodel | |--inference.pdmodel
|-- layout
| |--inference.pdiparams
| |--inference.pdmodel
``` ```
...@@ -278,8 +281,30 @@ Specifically, ...@@ -278,8 +281,30 @@ Specifically,
--cls=true \ --cls=true \
``` ```
##### 7. layout+table
```shell
./build/ppocr --det_model_dir=inference/det_db \
--rec_model_dir=inference/rec_rcnn \
--table_model_dir=inference/table \
--image_dir=../../ppstructure/docs/table/table.jpg \
--layout_model_dir=inference/layout \
--type=structure \
--table=true \
--layout=true
```
##### 8. layout
```shell
./build/ppocr --layout_model_dir=inference/layout \
--image_dir=../../ppstructure/docs/table/1.png \
--type=structure \
--table=false \
--layout=true \
--det=false \
--rec=false
```
##### 7. table ##### 9. table
```shell ```shell
./build/ppocr --det_model_dir=inference/det_db \ ./build/ppocr --det_model_dir=inference/det_db \
--rec_model_dir=inference/rec_rcnn \ --rec_model_dir=inference/rec_rcnn \
...@@ -343,6 +368,16 @@ More parameters are as follows, ...@@ -343,6 +368,16 @@ More parameters are as follows,
|rec_img_h|int|48|image height of recognition| |rec_img_h|int|48|image height of recognition|
|rec_img_w|int|320|image width of recognition| |rec_img_w|int|320|image width of recognition|
- Layout related parameters
|parameter|data type|default|meaning|
| :---: | :---: | :---: | :---: |
|layout_model_dir|string|-| Address of layout inference model|
|layout_dict_path|string|../../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt|dictionary file|
|layout_score_threshold|float|0.5|Threshold of score.|
|layout_nms_threshold|float|0.5|Threshold of nms.|
- Table recognition related parameters - Table recognition related parameters
|parameter|data type|default|meaning| |parameter|data type|default|meaning|
...@@ -350,6 +385,7 @@ More parameters are as follows, ...@@ -350,6 +385,7 @@ More parameters are as follows,
|table_model_dir|string|-|Address of table recognition inference model| |table_model_dir|string|-|Address of table recognition inference model|
|table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict.txt|dictionary file| |table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict.txt|dictionary file|
|table_max_len|int|488|The size of the long side of the input image of the table recognition model, the final input image size of the network is(table_max_len,table_max_len)| |table_max_len|int|488|The size of the long side of the input image of the table recognition model, the final input image size of the network is(table_max_len,table_max_len)|
|merge_no_span_structure|bool|true|Whether to merge <td> and </td> to <td></td|
* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `rec_char_dict_path` and `rec_model_dir`. * Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `rec_char_dict_path` and `rec_model_dir`.
...@@ -367,11 +403,51 @@ predict img: ../../doc/imgs/12.jpg ...@@ -367,11 +403,51 @@ predict img: ../../doc/imgs/12.jpg
The detection visualized image saved in ./output//12.jpg The detection visualized image saved in ./output//12.jpg
``` ```
- table - layout+table
```bash ```bash
predict img: ../../ppstructure/docs/table/table.jpg predict img: ../../ppstructure/docs/table/1.png
0 type: table, region: [0,0,371,293], res: <html><body><table><thead><tr><td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN [3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html> 0 type: text, region: [12,729,410,848], score: 0.781044, res: count of ocr result is : 7
********** print ocr result **********
0 det boxes: [[4,1],[79,1],[79,12],[4,12]] rec text: CTW1500. rec score: 0.769472
...
6 det boxes: [[4,99],[391,99],[391,112],[4,112]] rec text: sate-of-the-artmethods[12.34.36l.ourapproachachieves rec score: 0.90414
********** end print ocr result **********
1 type: text, region: [69,342,342,359], score: 0.703666, res: count of ocr result is : 1
********** print ocr result **********
0 det boxes: [[8,2],[269,2],[269,13],[8,13]] rec text: Table6.Experimentalresults on CTW-1500 rec score: 0.890454
********** end print ocr result **********
2 type: text, region: [70,316,706,332], score: 0.659738, res: count of ocr result is : 2
********** print ocr result **********
0 det boxes: [[373,2],[630,2],[630,11],[373,11]] rec text: oroposals.andthegreencontoursarefinal rec score: 0.919729
1 det boxes: [[8,3],[357,3],[357,11],[8,11]] rec text: Visualexperimentalresultshebluecontoursareboundar rec score: 0.915963
********** end print ocr result **********
3 type: text, region: [489,342,789,359], score: 0.630538, res: count of ocr result is : 1
********** print ocr result **********
0 det boxes: [[8,2],[294,2],[294,14],[8,14]] rec text: Table7.Experimentalresults onMSRA-TD500 rec score: 0.942251
********** end print ocr result **********
4 type: text, region: [444,751,841,848], score: 0.607345, res: count of ocr result is : 5
********** print ocr result **********
0 det boxes: [[19,3],[389,3],[389,17],[19,17]] rec text: Inthispaper,weproposeanovel adaptivebound rec score: 0.941031
1 det boxes: [[4,22],[390,22],[390,36],[4,36]] rec text: aryproposalnetworkforarbitraryshapetextdetection rec score: 0.960172
2 det boxes: [[4,42],[392,42],[392,56],[4,56]] rec text: whichadoptanboundaryproposalmodeltogeneratecoarse rec score: 0.934647
3 det boxes: [[4,61],[389,61],[389,75],[4,75]] rec text: ooundaryproposals,andthenadoptanadaptiveboundary rec score: 0.946296
4 det boxes: [[5,80],[387,80],[387,93],[5,93]] rec text: leformationmodelcombinedwithGCNandRNNtoper rec score: 0.952401
********** end print ocr result **********
5 type: title, region: [444,705,564,724], score: 0.785429, res: count of ocr result is : 1
********** print ocr result **********
0 det boxes: [[6,2],[113,2],[113,14],[6,14]] rec text: 5.Conclusion rec score: 0.856903
********** end print ocr result **********
6 type: table, region: [14,360,402,711], score: 0.963643, res: <html><body><table><thead><tr><td>Methods</td><td>Ext</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>TextSnake [18]</td><td>Syn</td><td>85.3</td><td>67.9</td><td>75.6</td><td></td></tr><tr><td>CSE [17]</td><td>MiLT</td><td>76.1</td><td>78.7</td><td>77.4</td><td>0.38</td></tr><tr><td>LOMO[40]</td><td>Syn</td><td>76.5</td><td>85.7</td><td>80.8</td><td>4.4</td></tr><tr><td>ATRR[35]</td><td>Sy-</td><td>80.2</td><td>80.1</td><td>80.1</td><td>-</td></tr><tr><td>SegLink++ [28]</td><td>Syn</td><td>79.8</td><td>82.8</td><td>81.3</td><td>-</td></tr><tr><td>TextField [37]</td><td>Syn</td><td>79.8</td><td>83.0</td><td>81.4</td><td>6.0</td></tr><tr><td>MSR[38]</td><td>Syn</td><td>79.0</td><td>84.1</td><td>81.5</td><td>4.3</td></tr><tr><td>PSENet-1s [33]</td><td>MLT</td><td>79.7</td><td>84.8</td><td>82.2</td><td>3.9</td></tr><tr><td>DB [12]</td><td>Syn</td><td>80.2</td><td>86.9</td><td>83.4</td><td>22.0</td></tr><tr><td>CRAFT [2]</td><td>Syn</td><td>81.1</td><td>86.0</td><td>83.5</td><td>-</td></tr><tr><td>TextDragon [5]</td><td>MLT+</td><td>82.8</td><td>84.5</td><td>83.6</td><td></td></tr><tr><td>PAN [34]</td><td>Syn</td><td>81.2</td><td>86.4</td><td>83.7</td><td>39.8</td></tr><tr><td>ContourNet [36]</td><td></td><td>84.1</td><td>83.7</td><td>83.9</td><td>4.5</td></tr><tr><td>DRRG [41]</td><td>MLT</td><td>83.02</td><td>85.93</td><td>84.45</td><td>-</td></tr><tr><td>TextPerception[23]</td><td>Syn</td><td>81.9</td><td>87.5</td><td>84.6</td><td></td></tr><tr><td>Ours</td><td> Syn</td><td>80.57</td><td>87.66</td><td>83.97</td><td>12.08</td></tr><tr><td>Ours</td><td></td><td>81.45</td><td>87.81</td><td>84.51</td><td>12.15</td></tr><tr><td>Ours</td><td>MLT</td><td>83.60</td><td>86.45</td><td>85.00</td><td>12.21</td></tr></tbody></table></body></html>
The table visualized image saved in ./output//6_1.png
7 type: table, region: [462,359,820,657], score: 0.953917, res: <html><body><table><thead><tr><td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN[3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>:</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td></td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
The table visualized image saved in ./output//7_1.png
8 type: figure, region: [14,3,836,310], score: 0.969443, res: count of ocr result is : 26
********** print ocr result **********
0 det boxes: [[506,14],[539,15],[539,22],[506,21]] rec text: E rec score: 0.318073
...
25 det boxes: [[680,290],[759,288],[759,303],[680,305]] rec text: (d) CTW1500 rec score: 0.95911
********** end print ocr result **********
``` ```
<a name="3"></a> <a name="3"></a>
......
...@@ -184,6 +184,9 @@ inference/ ...@@ -184,6 +184,9 @@ inference/
|-- table |-- table
| |--inference.pdiparams | |--inference.pdiparams
| |--inference.pdmodel | |--inference.pdmodel
|-- layout
| |--inference.pdiparams
| |--inference.pdmodel
``` ```
<a name="22"></a> <a name="22"></a>
...@@ -288,7 +291,30 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -288,7 +291,30 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
--cls=true \ --cls=true \
``` ```
##### 7. 表格识别 ##### 7. 版面分析+表格识别
```shell
./build/ppocr --det_model_dir=inference/det_db \
--rec_model_dir=inference/rec_rcnn \
--table_model_dir=inference/table \
--image_dir=../../ppstructure/docs/table/table.jpg \
--layout_model_dir=inference/layout \
--type=structure \
--table=true \
--layout=true
```
##### 8. 版面分析
```shell
./build/ppocr --layout_model_dir=inference/layout \
--image_dir=../../ppstructure/docs/table/1.png \
--type=structure \
--table=false \
--layout=true \
--det=false \
--rec=false
```
##### 9. 表格识别
```shell ```shell
./build/ppocr --det_model_dir=inference/det_db \ ./build/ppocr --det_model_dir=inference/det_db \
--rec_model_dir=inference/rec_rcnn \ --rec_model_dir=inference/rec_rcnn \
...@@ -352,13 +378,24 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -352,13 +378,24 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
|rec_img_w|int|320|文字识别模型输入图像宽度| |rec_img_w|int|320|文字识别模型输入图像宽度|
- 版面分析模型相关
|参数名称|类型|默认参数|意义|
| :---: | :---: | :---: | :---: |
|layout_model_dir|string|-|版面分析模型inference model地址|
|layout_dict_path|string|../../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt|字典文件|
|layout_score_threshold|float|0.5|检测框的分数阈值|
|layout_nms_threshold|float|0.5|nms的阈值|
- 表格识别模型相关 - 表格识别模型相关
|参数名称|类型|默认参数|意义| |参数名称|类型|默认参数|意义|
| :---: | :---: | :---: | :---: | | :---: | :---: | :---: | :---: |
|table_model_dir|string|-|表格识别模型inference model地址| |table_model_dir|string|-|表格识别模型inference model地址|
|table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict.txt|字典文件| |table_char_dict_path|string|../../ppocr/utils/dict/table_structure_dict_ch.txt|字典文件|
|table_max_len|int|488|表格识别模型输入图像长边大小,最终网络输入图像大小为(table_max_len,table_max_len)| |table_max_len|int|488|表格识别模型输入图像长边大小,最终网络输入图像大小为(table_max_len,table_max_len)|
|merge_no_span_structure|bool|true|是否合并<td></td><td></td>|
* PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`rec_char_dict_path`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。 * PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`rec_char_dict_path`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。
...@@ -377,11 +414,51 @@ predict img: ../../doc/imgs/12.jpg ...@@ -377,11 +414,51 @@ predict img: ../../doc/imgs/12.jpg
The detection visualized image saved in ./output//12.jpg The detection visualized image saved in ./output//12.jpg
``` ```
- table - layout+table
```bash ```bash
predict img: ../../ppstructure/docs/table/table.jpg predict img: ../../ppstructure/docs/table/1.png
0 type: table, region: [0,0,371,293], res: <html><body><table><thead><tr><td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN [3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html> 0 type: text, region: [12,729,410,848], score: 0.781044, res: count of ocr result is : 7
********** print ocr result **********
0 det boxes: [[4,1],[79,1],[79,12],[4,12]] rec text: CTW1500. rec score: 0.769472
...
6 det boxes: [[4,99],[391,99],[391,112],[4,112]] rec text: sate-of-the-artmethods[12.34.36l.ourapproachachieves rec score: 0.90414
********** end print ocr result **********
1 type: text, region: [69,342,342,359], score: 0.703666, res: count of ocr result is : 1
********** print ocr result **********
0 det boxes: [[8,2],[269,2],[269,13],[8,13]] rec text: Table6.Experimentalresults on CTW-1500 rec score: 0.890454
********** end print ocr result **********
2 type: text, region: [70,316,706,332], score: 0.659738, res: count of ocr result is : 2
********** print ocr result **********
0 det boxes: [[373,2],[630,2],[630,11],[373,11]] rec text: oroposals.andthegreencontoursarefinal rec score: 0.919729
1 det boxes: [[8,3],[357,3],[357,11],[8,11]] rec text: Visualexperimentalresultshebluecontoursareboundar rec score: 0.915963
********** end print ocr result **********
3 type: text, region: [489,342,789,359], score: 0.630538, res: count of ocr result is : 1
********** print ocr result **********
0 det boxes: [[8,2],[294,2],[294,14],[8,14]] rec text: Table7.Experimentalresults onMSRA-TD500 rec score: 0.942251
********** end print ocr result **********
4 type: text, region: [444,751,841,848], score: 0.607345, res: count of ocr result is : 5
********** print ocr result **********
0 det boxes: [[19,3],[389,3],[389,17],[19,17]] rec text: Inthispaper,weproposeanovel adaptivebound rec score: 0.941031
1 det boxes: [[4,22],[390,22],[390,36],[4,36]] rec text: aryproposalnetworkforarbitraryshapetextdetection rec score: 0.960172
2 det boxes: [[4,42],[392,42],[392,56],[4,56]] rec text: whichadoptanboundaryproposalmodeltogeneratecoarse rec score: 0.934647
3 det boxes: [[4,61],[389,61],[389,75],[4,75]] rec text: ooundaryproposals,andthenadoptanadaptiveboundary rec score: 0.946296
4 det boxes: [[5,80],[387,80],[387,93],[5,93]] rec text: leformationmodelcombinedwithGCNandRNNtoper rec score: 0.952401
********** end print ocr result **********
5 type: title, region: [444,705,564,724], score: 0.785429, res: count of ocr result is : 1
********** print ocr result **********
0 det boxes: [[6,2],[113,2],[113,14],[6,14]] rec text: 5.Conclusion rec score: 0.856903
********** end print ocr result **********
6 type: table, region: [14,360,402,711], score: 0.963643, res: <html><body><table><thead><tr><td>Methods</td><td>Ext</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>TextSnake [18]</td><td>Syn</td><td>85.3</td><td>67.9</td><td>75.6</td><td></td></tr><tr><td>CSE [17]</td><td>MiLT</td><td>76.1</td><td>78.7</td><td>77.4</td><td>0.38</td></tr><tr><td>LOMO[40]</td><td>Syn</td><td>76.5</td><td>85.7</td><td>80.8</td><td>4.4</td></tr><tr><td>ATRR[35]</td><td>Sy-</td><td>80.2</td><td>80.1</td><td>80.1</td><td>-</td></tr><tr><td>SegLink++ [28]</td><td>Syn</td><td>79.8</td><td>82.8</td><td>81.3</td><td>-</td></tr><tr><td>TextField [37]</td><td>Syn</td><td>79.8</td><td>83.0</td><td>81.4</td><td>6.0</td></tr><tr><td>MSR[38]</td><td>Syn</td><td>79.0</td><td>84.1</td><td>81.5</td><td>4.3</td></tr><tr><td>PSENet-1s [33]</td><td>MLT</td><td>79.7</td><td>84.8</td><td>82.2</td><td>3.9</td></tr><tr><td>DB [12]</td><td>Syn</td><td>80.2</td><td>86.9</td><td>83.4</td><td>22.0</td></tr><tr><td>CRAFT [2]</td><td>Syn</td><td>81.1</td><td>86.0</td><td>83.5</td><td>-</td></tr><tr><td>TextDragon [5]</td><td>MLT+</td><td>82.8</td><td>84.5</td><td>83.6</td><td></td></tr><tr><td>PAN [34]</td><td>Syn</td><td>81.2</td><td>86.4</td><td>83.7</td><td>39.8</td></tr><tr><td>ContourNet [36]</td><td></td><td>84.1</td><td>83.7</td><td>83.9</td><td>4.5</td></tr><tr><td>DRRG [41]</td><td>MLT</td><td>83.02</td><td>85.93</td><td>84.45</td><td>-</td></tr><tr><td>TextPerception[23]</td><td>Syn</td><td>81.9</td><td>87.5</td><td>84.6</td><td></td></tr><tr><td>Ours</td><td> Syn</td><td>80.57</td><td>87.66</td><td>83.97</td><td>12.08</td></tr><tr><td>Ours</td><td></td><td>81.45</td><td>87.81</td><td>84.51</td><td>12.15</td></tr><tr><td>Ours</td><td>MLT</td><td>83.60</td><td>86.45</td><td>85.00</td><td>12.21</td></tr></tbody></table></body></html>
The table visualized image saved in ./output//6_1.png
7 type: table, region: [462,359,820,657], score: 0.953917, res: <html><body><table><thead><tr><td>Methods</td><td>R</td><td>P</td><td>F</td><td>FPS</td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN[3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>:</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td></td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></body></html>
The table visualized image saved in ./output//7_1.png
8 type: figure, region: [14,3,836,310], score: 0.969443, res: count of ocr result is : 26
********** print ocr result **********
0 det boxes: [[506,14],[539,15],[539,22],[506,21]] rec text: E rec score: 0.318073
...
25 det boxes: [[680,290],[759,288],[759,303],[680,305]] rec text: (d) CTW1500 rec score: 0.95911
********** end print ocr result **********
``` ```
<a name="3"></a> <a name="3"></a>
......
...@@ -51,12 +51,21 @@ DEFINE_string(rec_char_dict_path, "../../ppocr/utils/ppocr_keys_v1.txt", ...@@ -51,12 +51,21 @@ DEFINE_string(rec_char_dict_path, "../../ppocr/utils/ppocr_keys_v1.txt",
DEFINE_int32(rec_img_h, 48, "rec image height"); DEFINE_int32(rec_img_h, 48, "rec image height");
DEFINE_int32(rec_img_w, 320, "rec image width"); DEFINE_int32(rec_img_w, 320, "rec image width");
// layout model related
DEFINE_string(layout_model_dir, "", "Path of table layout inference model.");
DEFINE_string(layout_dict_path,
"../../ppocr/utils/dict/layout_dict/layout_publaynet_dict.txt",
"Path of dictionary.");
DEFINE_double(layout_score_threshold, 0.5, "Threshold of score.");
DEFINE_double(layout_nms_threshold, 0.5, "Threshold of nms.");
// structure model related // structure model related
DEFINE_string(table_model_dir, "", "Path of table struture inference model."); DEFINE_string(table_model_dir, "", "Path of table struture inference model.");
DEFINE_int32(table_max_len, 488, "max len size of input image."); DEFINE_int32(table_max_len, 488, "max len size of input image.");
DEFINE_int32(table_batch_num, 1, "table_batch_num."); DEFINE_int32(table_batch_num, 1, "table_batch_num.");
DEFINE_bool(merge_no_span_structure, true,
"Whether merge <td> and </td> to <td></td>");
DEFINE_string(table_char_dict_path, DEFINE_string(table_char_dict_path,
"../../ppocr/utils/dict/table_structure_dict.txt", "../../ppocr/utils/dict/table_structure_dict_ch.txt",
"Path of dictionary."); "Path of dictionary.");
// ocr forward related // ocr forward related
...@@ -64,3 +73,4 @@ DEFINE_bool(det, true, "Whether use det in forward."); ...@@ -64,3 +73,4 @@ DEFINE_bool(det, true, "Whether use det in forward.");
DEFINE_bool(rec, true, "Whether use rec in forward."); DEFINE_bool(rec, true, "Whether use rec in forward.");
DEFINE_bool(cls, false, "Whether use cls in forward."); DEFINE_bool(cls, false, "Whether use cls in forward.");
DEFINE_bool(table, false, "Whether use table structure in forward."); DEFINE_bool(table, false, "Whether use table structure in forward.");
DEFINE_bool(layout, false, "Whether use layout analysis in forward.");
\ No newline at end of file
...@@ -65,9 +65,18 @@ void check_params() { ...@@ -65,9 +65,18 @@ void check_params() {
exit(1); exit(1);
} }
} }
if (FLAGS_layout) {
if (FLAGS_layout_model_dir.empty() || FLAGS_image_dir.empty()) {
std::cout << "Usage[layout]: ./ppocr "
<< "--layout_model_dir=/PATH/TO/LAYOUT_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
exit(1);
}
}
if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" && if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" &&
FLAGS_precision != "int8") { FLAGS_precision != "int8") {
cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl; std::cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. "
<< std::endl;
exit(1); exit(1);
} }
} }
...@@ -75,65 +84,94 @@ void check_params() { ...@@ -75,65 +84,94 @@ void check_params() {
void ocr(std::vector<cv::String> &cv_all_img_names) { void ocr(std::vector<cv::String> &cv_all_img_names) {
PPOCR ocr = PPOCR(); PPOCR ocr = PPOCR();
std::vector<std::vector<OCRPredictResult>> ocr_results =
ocr.ocr(cv_all_img_names, FLAGS_det, FLAGS_rec, FLAGS_cls);
for (int i = 0; i < cv_all_img_names.size(); ++i) {
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
cout << cv_all_img_names[i] << '\t'; ocr.reset_timer();
if (FLAGS_rec && FLAGS_det) {
Utility::print_result(ocr_results[i]);
} else if (FLAGS_det) {
for (int n = 0; n < ocr_results[i].size(); n++) {
for (int m = 0; m < ocr_results[i][n].box.size(); m++) {
cout << ocr_results[i][n].box[m][0] << ' '
<< ocr_results[i][n].box[m][1] << ' ';
} }
std::vector<cv::Mat> img_list;
std::vector<cv::String> img_names;
for (int i = 0; i < cv_all_img_names.size(); ++i) {
cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!img.data) {
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << std::endl;
continue;
} }
cout << endl; img_list.push_back(img);
} else { img_names.push_back(cv_all_img_names[i]);
Utility::print_result(ocr_results[i]);
} }
} else {
cout << cv_all_img_names[i] << "\n"; std::vector<std::vector<OCRPredictResult>> ocr_results =
ocr.ocr(img_list, FLAGS_det, FLAGS_rec, FLAGS_cls);
for (int i = 0; i < img_names.size(); ++i) {
std::cout << "predict img: " << cv_all_img_names[i] << std::endl;
Utility::print_result(ocr_results[i]); Utility::print_result(ocr_results[i]);
if (FLAGS_visualize && FLAGS_det) { if (FLAGS_visualize && FLAGS_det) {
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); std::string file_name = Utility::basename(img_names[i]);
if (!srcimg.data) { cv::Mat srcimg = img_list[i];
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1);
}
std::string file_name = Utility::basename(cv_all_img_names[i]);
Utility::VisualizeBboxes(srcimg, ocr_results[i], Utility::VisualizeBboxes(srcimg, ocr_results[i],
FLAGS_output + "/" + file_name); FLAGS_output + "/" + file_name);
} }
cout << "***************************" << endl;
} }
if (FLAGS_benchmark) {
ocr.benchmark_log(cv_all_img_names.size());
} }
} }
void structure(std::vector<cv::String> &cv_all_img_names) { void structure(std::vector<cv::String> &cv_all_img_names) {
PaddleOCR::PaddleStructure engine = PaddleOCR::PaddleStructure(); PaddleOCR::PaddleStructure engine = PaddleOCR::PaddleStructure();
std::vector<std::vector<StructurePredictResult>> structure_results =
engine.structure(cv_all_img_names, false, FLAGS_table); if (FLAGS_benchmark) {
engine.reset_timer();
}
for (int i = 0; i < cv_all_img_names.size(); i++) { for (int i = 0; i < cv_all_img_names.size(); i++) {
cout << "predict img: " << cv_all_img_names[i] << endl; std::cout << "predict img: " << cv_all_img_names[i] << std::endl;
for (int j = 0; j < structure_results[i].size(); j++) { cv::Mat img = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
std::cout << j << "\ttype: " << structure_results[i][j].type if (!img.data) {
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << std::endl;
continue;
}
std::vector<StructurePredictResult> structure_results = engine.structure(
img, FLAGS_layout, FLAGS_table, FLAGS_det && FLAGS_rec);
for (int j = 0; j < structure_results.size(); j++) {
std::cout << j << "\ttype: " << structure_results[j].type
<< ", region: ["; << ", region: [";
std::cout << structure_results[i][j].box[0] << "," std::cout << structure_results[j].box[0] << ","
<< structure_results[i][j].box[1] << "," << structure_results[j].box[1] << ","
<< structure_results[i][j].box[2] << "," << structure_results[j].box[2] << ","
<< structure_results[i][j].box[3] << "], res: "; << structure_results[j].box[3] << "], score: ";
if (structure_results[i][j].type == "table") { std::cout << structure_results[j].confidence << ", res: ";
std::cout << structure_results[i][j].html << std::endl;
if (structure_results[j].type == "table") {
std::cout << structure_results[j].html << std::endl;
if (structure_results[j].cell_box.size() > 0 && FLAGS_visualize) {
std::string file_name = Utility::basename(cv_all_img_names[i]);
Utility::VisualizeBboxes(img, structure_results[j],
FLAGS_output + "/" + std::to_string(j) +
"_" + file_name);
}
} else { } else {
Utility::print_result(structure_results[i][j].text_res); std::cout << "count of ocr result is : "
<< structure_results[j].text_res.size() << std::endl;
if (structure_results[j].text_res.size() > 0) {
std::cout << "********** print ocr result "
<< "**********" << std::endl;
Utility::print_result(structure_results[j].text_res);
std::cout << "********** end print ocr result "
<< "**********" << std::endl;
} }
} }
} }
}
if (FLAGS_benchmark) {
engine.benchmark_log(cv_all_img_names.size());
}
} }
int main(int argc, char **argv) { int main(int argc, char **argv) {
...@@ -143,19 +181,22 @@ int main(int argc, char **argv) { ...@@ -143,19 +181,22 @@ int main(int argc, char **argv) {
if (!Utility::PathExists(FLAGS_image_dir)) { if (!Utility::PathExists(FLAGS_image_dir)) {
std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir
<< endl; << std::endl;
exit(1); exit(1);
} }
std::vector<cv::String> cv_all_img_names; std::vector<cv::String> cv_all_img_names;
cv::glob(FLAGS_image_dir, cv_all_img_names); cv::glob(FLAGS_image_dir, cv_all_img_names);
std::cout << "total images num: " << cv_all_img_names.size() << endl; std::cout << "total images num: " << cv_all_img_names.size() << std::endl;
if (!Utility::PathExists(FLAGS_output)) {
Utility::CreateDir(FLAGS_output);
}
if (FLAGS_type == "ocr") { if (FLAGS_type == "ocr") {
ocr(cv_all_img_names); ocr(cv_all_img_names);
} else if (FLAGS_type == "structure") { } else if (FLAGS_type == "structure") {
structure(cv_all_img_names); structure(cv_all_img_names);
} else { } else {
std::cout << "only value in ['ocr','structure'] is supported" << endl; std::cout << "only value in ['ocr','structure'] is supported" << std::endl;
} }
} }
...@@ -32,7 +32,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list, ...@@ -32,7 +32,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list,
for (int beg_img_no = 0; beg_img_no < img_num; for (int beg_img_no = 0; beg_img_no < img_num;
beg_img_no += this->cls_batch_num_) { beg_img_no += this->cls_batch_num_) {
auto preprocess_start = std::chrono::steady_clock::now(); auto preprocess_start = std::chrono::steady_clock::now();
int end_img_no = min(img_num, beg_img_no + this->cls_batch_num_); int end_img_no = std::min(img_num, beg_img_no + this->cls_batch_num_);
int batch_num = end_img_no - beg_img_no; int batch_num = end_img_no - beg_img_no;
// preprocess // preprocess
std::vector<cv::Mat> norm_img_batch; std::vector<cv::Mat> norm_img_batch;
...@@ -97,7 +97,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list, ...@@ -97,7 +97,7 @@ void Classifier::Run(std::vector<cv::Mat> img_list,
} }
void Classifier::LoadModel(const std::string &model_dir) { void Classifier::LoadModel(const std::string &model_dir) {
AnalysisConfig config; paddle_infer::Config config;
config.SetModel(model_dir + "/inference.pdmodel", config.SetModel(model_dir + "/inference.pdmodel",
model_dir + "/inference.pdiparams"); model_dir + "/inference.pdiparams");
...@@ -112,6 +112,11 @@ void Classifier::LoadModel(const std::string &model_dir) { ...@@ -112,6 +112,11 @@ void Classifier::LoadModel(const std::string &model_dir) {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false); config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
if (!Utility::PathExists("./trt_cls_shape.txt")) {
config.CollectShapeRangeInfo("./trt_cls_shape.txt");
} else {
config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true);
}
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
...@@ -131,6 +136,6 @@ void Classifier::LoadModel(const std::string &model_dir) { ...@@ -131,6 +136,6 @@ void Classifier::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim(); config.EnableMemoryOptim();
config.DisableGlogInfo(); config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = paddle_infer::CreatePredictor(config);
} }
} // namespace PaddleOCR } // namespace PaddleOCR
...@@ -32,49 +32,12 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -32,49 +32,12 @@ void DBDetector::LoadModel(const std::string &model_dir) {
if (this->precision_ == "int8") { if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine(1 << 20, 1, 20, precision, false, false); config.EnableTensorRtEngine(1 << 30, 1, 20, precision, false, false);
std::map<std::string, std::vector<int>> min_input_shape = { if (!Utility::PathExists("./trt_det_shape.txt")) {
{"x", {1, 3, 50, 50}}, config.CollectShapeRangeInfo("./trt_det_shape.txt");
{"conv2d_92.tmp_0", {1, 120, 20, 20}}, } else {
{"conv2d_91.tmp_0", {1, 24, 10, 10}}, config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true);
{"conv2d_59.tmp_0", {1, 96, 20, 20}}, }
{"nearest_interp_v2_1.tmp_0", {1, 256, 10, 10}},
{"nearest_interp_v2_2.tmp_0", {1, 256, 20, 20}},
{"conv2d_124.tmp_0", {1, 256, 20, 20}},
{"nearest_interp_v2_3.tmp_0", {1, 64, 20, 20}},
{"nearest_interp_v2_4.tmp_0", {1, 64, 20, 20}},
{"nearest_interp_v2_5.tmp_0", {1, 64, 20, 20}},
{"elementwise_add_7", {1, 56, 2, 2}},
{"nearest_interp_v2_0.tmp_0", {1, 256, 2, 2}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{"x", {1, 3, 1536, 1536}},
{"conv2d_92.tmp_0", {1, 120, 400, 400}},
{"conv2d_91.tmp_0", {1, 24, 200, 200}},
{"conv2d_59.tmp_0", {1, 96, 400, 400}},
{"nearest_interp_v2_1.tmp_0", {1, 256, 200, 200}},
{"nearest_interp_v2_2.tmp_0", {1, 256, 400, 400}},
{"conv2d_124.tmp_0", {1, 256, 400, 400}},
{"nearest_interp_v2_3.tmp_0", {1, 64, 400, 400}},
{"nearest_interp_v2_4.tmp_0", {1, 64, 400, 400}},
{"nearest_interp_v2_5.tmp_0", {1, 64, 400, 400}},
{"elementwise_add_7", {1, 56, 400, 400}},
{"nearest_interp_v2_0.tmp_0", {1, 256, 400, 400}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{"x", {1, 3, 640, 640}},
{"conv2d_92.tmp_0", {1, 120, 160, 160}},
{"conv2d_91.tmp_0", {1, 24, 80, 80}},
{"conv2d_59.tmp_0", {1, 96, 160, 160}},
{"nearest_interp_v2_1.tmp_0", {1, 256, 80, 80}},
{"nearest_interp_v2_2.tmp_0", {1, 256, 160, 160}},
{"conv2d_124.tmp_0", {1, 256, 160, 160}},
{"nearest_interp_v2_3.tmp_0", {1, 64, 160, 160}},
{"nearest_interp_v2_4.tmp_0", {1, 64, 160, 160}},
{"nearest_interp_v2_5.tmp_0", {1, 64, 160, 160}},
{"elementwise_add_7", {1, 56, 40, 40}},
{"nearest_interp_v2_0.tmp_0", {1, 256, 40, 40}}};
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
...@@ -95,7 +58,7 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -95,7 +58,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim(); config.EnableMemoryOptim();
// config.DisableGlogInfo(); // config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = paddle_infer::CreatePredictor(config);
} }
void DBDetector::Run(cv::Mat &img, void DBDetector::Run(cv::Mat &img,
......
...@@ -37,7 +37,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -37,7 +37,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
for (int beg_img_no = 0; beg_img_no < img_num; for (int beg_img_no = 0; beg_img_no < img_num;
beg_img_no += this->rec_batch_num_) { beg_img_no += this->rec_batch_num_) {
auto preprocess_start = std::chrono::steady_clock::now(); auto preprocess_start = std::chrono::steady_clock::now();
int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_); int end_img_no = std::min(img_num, beg_img_no + this->rec_batch_num_);
int batch_num = end_img_no - beg_img_no; int batch_num = end_img_no - beg_img_no;
int imgH = this->rec_image_shape_[1]; int imgH = this->rec_image_shape_[1];
int imgW = this->rec_image_shape_[2]; int imgW = this->rec_image_shape_[2];
...@@ -46,7 +46,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -46,7 +46,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
int h = img_list[indices[ino]].rows; int h = img_list[indices[ino]].rows;
int w = img_list[indices[ino]].cols; int w = img_list[indices[ino]].cols;
float wh_ratio = w * 1.0 / h; float wh_ratio = w * 1.0 / h;
max_wh_ratio = max(max_wh_ratio, wh_ratio); max_wh_ratio = std::max(max_wh_ratio, wh_ratio);
} }
int batch_width = imgW; int batch_width = imgW;
...@@ -60,7 +60,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -60,7 +60,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
this->is_scale_); this->is_scale_);
norm_img_batch.push_back(resize_img); norm_img_batch.push_back(resize_img);
batch_width = max(resize_img.cols, batch_width); batch_width = std::max(resize_img.cols, batch_width);
} }
std::vector<float> input(batch_num * 3 * imgH * batch_width, 0.0f); std::vector<float> input(batch_num * 3 * imgH * batch_width, 0.0f);
...@@ -115,7 +115,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -115,7 +115,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
last_index = argmax_idx; last_index = argmax_idx;
} }
score /= count; score /= count;
if (isnan(score)) { if (std::isnan(score)) {
continue; continue;
} }
rec_texts[indices[beg_img_no + m]] = str_res; rec_texts[indices[beg_img_no + m]] = str_res;
...@@ -130,7 +130,6 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -130,7 +130,6 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
} }
void CRNNRecognizer::LoadModel(const std::string &model_dir) { void CRNNRecognizer::LoadModel(const std::string &model_dir) {
// AnalysisConfig config;
paddle_infer::Config config; paddle_infer::Config config;
config.SetModel(model_dir + "/inference.pdmodel", config.SetModel(model_dir + "/inference.pdmodel",
model_dir + "/inference.pdiparams"); model_dir + "/inference.pdiparams");
...@@ -147,20 +146,11 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -147,20 +146,11 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
if (this->precision_ == "int8") { if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine(1 << 20, 10, 15, precision, false, false); if (!Utility::PathExists("./trt_rec_shape.txt")) {
int imgH = this->rec_image_shape_[1]; config.CollectShapeRangeInfo("./trt_rec_shape.txt");
int imgW = this->rec_image_shape_[2]; } else {
std::map<std::string, std::vector<int>> min_input_shape = { config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true);
{"x", {1, 3, imgH, 10}}, {"lstm_0.tmp_0", {10, 1, 96}}}; }
std::map<std::string, std::vector<int>> max_input_shape = {
{"x", {this->rec_batch_num_, 3, imgH, 2500}},
{"lstm_0.tmp_0", {1000, 1, 96}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{"x", {this->rec_batch_num_, 3, imgH, imgW}},
{"lstm_0.tmp_0", {25, 1, 96}}};
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
...@@ -185,7 +175,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -185,7 +175,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim(); config.EnableMemoryOptim();
// config.DisableGlogInfo(); // config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = paddle_infer::CreatePredictor(config);
} }
} // namespace PaddleOCR } // namespace PaddleOCR
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <include/paddleocr.h> #include <include/paddleocr.h>
#include "auto_log/autolog.h" #include "auto_log/autolog.h"
#include <numeric>
namespace PaddleOCR { namespace PaddleOCR {
PPOCR::PPOCR() { PPOCR::PPOCR() {
...@@ -44,84 +44,15 @@ PPOCR::PPOCR() { ...@@ -44,84 +44,15 @@ PPOCR::PPOCR() {
} }
}; };
void PPOCR::det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times) {
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<double> det_times;
this->detector_->Run(img, boxes, det_times);
for (int i = 0; i < boxes.size(); i++) {
OCRPredictResult res;
res.box = boxes[i];
ocr_results.push_back(res);
}
// sort boex from top to bottom, from left to right
Utility::sorted_boxes(ocr_results);
times[0] += det_times[0];
times[1] += det_times[1];
times[2] += det_times[2];
}
void PPOCR::rec(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times) {
std::vector<std::string> rec_texts(img_list.size(), "");
std::vector<float> rec_text_scores(img_list.size(), 0);
std::vector<double> rec_times;
this->recognizer_->Run(img_list, rec_texts, rec_text_scores, rec_times);
// output rec results
for (int i = 0; i < rec_texts.size(); i++) {
ocr_results[i].text = rec_texts[i];
ocr_results[i].score = rec_text_scores[i];
}
times[0] += rec_times[0];
times[1] += rec_times[1];
times[2] += rec_times[2];
}
void PPOCR::cls(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times) {
std::vector<int> cls_labels(img_list.size(), 0);
std::vector<float> cls_scores(img_list.size(), 0);
std::vector<double> cls_times;
this->classifier_->Run(img_list, cls_labels, cls_scores, cls_times);
// output cls results
for (int i = 0; i < cls_labels.size(); i++) {
ocr_results[i].cls_label = cls_labels[i];
ocr_results[i].cls_score = cls_scores[i];
}
times[0] += cls_times[0];
times[1] += cls_times[1];
times[2] += cls_times[2];
}
std::vector<std::vector<OCRPredictResult>> std::vector<std::vector<OCRPredictResult>>
PPOCR::ocr(std::vector<cv::String> cv_all_img_names, bool det, bool rec, PPOCR::ocr(std::vector<cv::Mat> img_list, bool det, bool rec, bool cls) {
bool cls) {
std::vector<double> time_info_det = {0, 0, 0};
std::vector<double> time_info_rec = {0, 0, 0};
std::vector<double> time_info_cls = {0, 0, 0};
std::vector<std::vector<OCRPredictResult>> ocr_results; std::vector<std::vector<OCRPredictResult>> ocr_results;
if (!det) { if (!det) {
std::vector<OCRPredictResult> ocr_result; std::vector<OCRPredictResult> ocr_result;
// read image ocr_result.resize(img_list.size());
std::vector<cv::Mat> img_list;
for (int i = 0; i < cv_all_img_names.size(); ++i) {
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1);
}
img_list.push_back(srcimg);
OCRPredictResult res;
ocr_result.push_back(res);
}
if (cls && this->classifier_ != nullptr) { if (cls && this->classifier_ != nullptr) {
this->cls(img_list, ocr_result, time_info_cls); this->cls(img_list, ocr_result);
for (int i = 0; i < img_list.size(); i++) { for (int i = 0; i < img_list.size(); i++) {
if (ocr_result[i].cls_label % 2 == 1 && if (ocr_result[i].cls_label % 2 == 1 &&
ocr_result[i].cls_score > this->classifier_->cls_thresh) { ocr_result[i].cls_score > this->classifier_->cls_thresh) {
...@@ -130,43 +61,39 @@ PPOCR::ocr(std::vector<cv::String> cv_all_img_names, bool det, bool rec, ...@@ -130,43 +61,39 @@ PPOCR::ocr(std::vector<cv::String> cv_all_img_names, bool det, bool rec,
} }
} }
if (rec) { if (rec) {
this->rec(img_list, ocr_result, time_info_rec); this->rec(img_list, ocr_result);
} }
for (int i = 0; i < cv_all_img_names.size(); ++i) { for (int i = 0; i < ocr_result.size(); ++i) {
std::vector<OCRPredictResult> ocr_result_tmp; std::vector<OCRPredictResult> ocr_result_tmp;
ocr_result_tmp.push_back(ocr_result[i]); ocr_result_tmp.push_back(ocr_result[i]);
ocr_results.push_back(ocr_result_tmp); ocr_results.push_back(ocr_result_tmp);
} }
} else { } else {
if (!Utility::PathExists(FLAGS_output) && FLAGS_det) { for (int i = 0; i < img_list.size(); ++i) {
Utility::CreateDir(FLAGS_output); std::vector<OCRPredictResult> ocr_result =
this->ocr(img_list[i], true, rec, cls);
ocr_results.push_back(ocr_result);
} }
for (int i = 0; i < cv_all_img_names.size(); ++i) {
std::vector<OCRPredictResult> ocr_result;
if (!FLAGS_benchmark) {
cout << "predict img: " << cv_all_img_names[i] << endl;
} }
return ocr_results;
}
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); std::vector<OCRPredictResult> PPOCR::ocr(cv::Mat img, bool det, bool rec,
if (!srcimg.data) { bool cls) {
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl; std::vector<OCRPredictResult> ocr_result;
exit(1);
}
// det // det
this->det(srcimg, ocr_result, time_info_det); this->det(img, ocr_result);
// crop image // crop image
std::vector<cv::Mat> img_list; std::vector<cv::Mat> img_list;
for (int j = 0; j < ocr_result.size(); j++) { for (int j = 0; j < ocr_result.size(); j++) {
cv::Mat crop_img; cv::Mat crop_img;
crop_img = Utility::GetRotateCropImage(srcimg, ocr_result[j].box); crop_img = Utility::GetRotateCropImage(img, ocr_result[j].box);
img_list.push_back(crop_img); img_list.push_back(crop_img);
} }
// cls // cls
if (cls && this->classifier_ != nullptr) { if (cls && this->classifier_ != nullptr) {
this->cls(img_list, ocr_result, time_info_cls); this->cls(img_list, ocr_result);
for (int i = 0; i < img_list.size(); i++) { for (int i = 0; i < img_list.size(); i++) {
if (ocr_result[i].cls_label % 2 == 1 && if (ocr_result[i].cls_label % 2 == 1 &&
ocr_result[i].cls_score > this->classifier_->cls_thresh) { ocr_result[i].cls_score > this->classifier_->cls_thresh) {
...@@ -176,41 +103,93 @@ PPOCR::ocr(std::vector<cv::String> cv_all_img_names, bool det, bool rec, ...@@ -176,41 +103,93 @@ PPOCR::ocr(std::vector<cv::String> cv_all_img_names, bool det, bool rec,
} }
// rec // rec
if (rec) { if (rec) {
this->rec(img_list, ocr_result, time_info_rec); this->rec(img_list, ocr_result);
} }
ocr_results.push_back(ocr_result); return ocr_result;
}
void PPOCR::det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results) {
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<double> det_times;
this->detector_->Run(img, boxes, det_times);
for (int i = 0; i < boxes.size(); i++) {
OCRPredictResult res;
res.box = boxes[i];
ocr_results.push_back(res);
} }
// sort boex from top to bottom, from left to right
Utility::sorted_boxes(ocr_results);
this->time_info_det[0] += det_times[0];
this->time_info_det[1] += det_times[1];
this->time_info_det[2] += det_times[2];
}
void PPOCR::rec(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results) {
std::vector<std::string> rec_texts(img_list.size(), "");
std::vector<float> rec_text_scores(img_list.size(), 0);
std::vector<double> rec_times;
this->recognizer_->Run(img_list, rec_texts, rec_text_scores, rec_times);
// output rec results
for (int i = 0; i < rec_texts.size(); i++) {
ocr_results[i].text = rec_texts[i];
ocr_results[i].score = rec_text_scores[i];
} }
if (FLAGS_benchmark) { this->time_info_rec[0] += rec_times[0];
this->log(time_info_det, time_info_rec, time_info_cls, this->time_info_rec[1] += rec_times[1];
cv_all_img_names.size()); this->time_info_rec[2] += rec_times[2];
}
void PPOCR::cls(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results) {
std::vector<int> cls_labels(img_list.size(), 0);
std::vector<float> cls_scores(img_list.size(), 0);
std::vector<double> cls_times;
this->classifier_->Run(img_list, cls_labels, cls_scores, cls_times);
// output cls results
for (int i = 0; i < cls_labels.size(); i++) {
ocr_results[i].cls_label = cls_labels[i];
ocr_results[i].cls_score = cls_scores[i];
} }
return ocr_results; this->time_info_cls[0] += cls_times[0];
} // namespace PaddleOCR this->time_info_cls[1] += cls_times[1];
this->time_info_cls[2] += cls_times[2];
}
void PPOCR::log(std::vector<double> &det_times, std::vector<double> &rec_times, void PPOCR::reset_timer() {
std::vector<double> &cls_times, int img_num) { this->time_info_det = {0, 0, 0};
if (det_times[0] + det_times[1] + det_times[2] > 0) { this->time_info_rec = {0, 0, 0};
this->time_info_cls = {0, 0, 0};
}
void PPOCR::benchmark_log(int img_num) {
if (this->time_info_det[0] + this->time_info_det[1] + this->time_info_det[2] >
0) {
AutoLogger autolog_det("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt, AutoLogger autolog_det("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic", FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic",
FLAGS_precision, det_times, img_num); FLAGS_precision, this->time_info_det, img_num);
autolog_det.report(); autolog_det.report();
} }
if (rec_times[0] + rec_times[1] + rec_times[2] > 0) { if (this->time_info_rec[0] + this->time_info_rec[1] + this->time_info_rec[2] >
0) {
AutoLogger autolog_rec("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt, AutoLogger autolog_rec("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_cpu_threads,
FLAGS_rec_batch_num, "dynamic", FLAGS_precision, FLAGS_rec_batch_num, "dynamic", FLAGS_precision,
rec_times, img_num); this->time_info_rec, img_num);
autolog_rec.report(); autolog_rec.report();
} }
if (cls_times[0] + cls_times[1] + cls_times[2] > 0) { if (this->time_info_cls[0] + this->time_info_cls[1] + this->time_info_cls[2] >
0) {
AutoLogger autolog_cls("ocr_cls", FLAGS_use_gpu, FLAGS_use_tensorrt, AutoLogger autolog_cls("ocr_cls", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_cpu_threads,
FLAGS_cls_batch_num, "dynamic", FLAGS_precision, FLAGS_cls_batch_num, "dynamic", FLAGS_precision,
cls_times, img_num); this->time_info_cls, img_num);
autolog_cls.report(); autolog_cls.report();
} }
} }
PPOCR::~PPOCR() { PPOCR::~PPOCR() {
if (this->detector_ != nullptr) { if (this->detector_ != nullptr) {
delete this->detector_; delete this->detector_;
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <include/clipper.h>
#include <include/postprocess_op.h> #include <include/postprocess_op.h>
namespace PaddleOCR { namespace PaddleOCR {
...@@ -352,8 +351,21 @@ std::vector<std::vector<std::vector<int>>> DBPostProcessor::FilterTagDetRes( ...@@ -352,8 +351,21 @@ std::vector<std::vector<std::vector<int>>> DBPostProcessor::FilterTagDetRes(
return root_points; return root_points;
} }
void TablePostProcessor::init(std::string label_path) { void TablePostProcessor::init(std::string label_path,
bool merge_no_span_structure) {
this->label_list_ = Utility::ReadDict(label_path); this->label_list_ = Utility::ReadDict(label_path);
if (merge_no_span_structure) {
this->label_list_.push_back("<td></td>");
std::vector<std::string>::iterator it;
for (it = this->label_list_.begin(); it != this->label_list_.end();) {
if (*it == "<td>") {
it = this->label_list_.erase(it);
} else {
++it;
}
}
}
// add_special_char
this->label_list_.insert(this->label_list_.begin(), this->beg); this->label_list_.insert(this->label_list_.begin(), this->beg);
this->label_list_.push_back(this->end); this->label_list_.push_back(this->end);
} }
...@@ -363,12 +375,12 @@ void TablePostProcessor::Run( ...@@ -363,12 +375,12 @@ void TablePostProcessor::Run(
std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape, std::vector<float> &rec_scores, std::vector<int> &loc_preds_shape,
std::vector<int> &structure_probs_shape, std::vector<int> &structure_probs_shape,
std::vector<std::vector<std::string>> &rec_html_tag_batch, std::vector<std::vector<std::string>> &rec_html_tag_batch,
std::vector<std::vector<std::vector<std::vector<int>>>> &rec_boxes_batch, std::vector<std::vector<std::vector<int>>> &rec_boxes_batch,
std::vector<int> &width_list, std::vector<int> &height_list) { std::vector<int> &width_list, std::vector<int> &height_list) {
for (int batch_idx = 0; batch_idx < structure_probs_shape[0]; batch_idx++) { for (int batch_idx = 0; batch_idx < structure_probs_shape[0]; batch_idx++) {
// image tags and boxs // image tags and boxs
std::vector<std::string> rec_html_tags; std::vector<std::string> rec_html_tags;
std::vector<std::vector<std::vector<int>>> rec_boxes; std::vector<std::vector<int>> rec_boxes;
float score = 0.f; float score = 0.f;
int count = 0; int count = 0;
...@@ -378,7 +390,7 @@ void TablePostProcessor::Run( ...@@ -378,7 +390,7 @@ void TablePostProcessor::Run(
// step // step
for (int step_idx = 0; step_idx < structure_probs_shape[1]; step_idx++) { for (int step_idx = 0; step_idx < structure_probs_shape[1]; step_idx++) {
std::string html_tag; std::string html_tag;
std::vector<std::vector<int>> rec_box; std::vector<int> rec_box;
// html tag // html tag
int step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) * int step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
structure_probs_shape[2]; structure_probs_shape[2];
...@@ -399,24 +411,26 @@ void TablePostProcessor::Run( ...@@ -399,24 +411,26 @@ void TablePostProcessor::Run(
count += 1; count += 1;
score += char_score; score += char_score;
rec_html_tags.push_back(html_tag); rec_html_tags.push_back(html_tag);
// box // box
if (html_tag == "<td>" || html_tag == "<td" || html_tag == "<td></td>") { if (html_tag == "<td>" || html_tag == "<td" || html_tag == "<td></td>") {
for (int point_idx = 0; point_idx < loc_preds_shape[2]; for (int point_idx = 0; point_idx < loc_preds_shape[2]; point_idx++) {
point_idx += 2) {
std::vector<int> point(2, 0);
step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) * step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
loc_preds_shape[2] + loc_preds_shape[2] +
point_idx; point_idx;
point[0] = int(loc_preds[step_start_idx] * width_list[batch_idx]); float point = loc_preds[step_start_idx];
point[1] = if (point_idx % 2 == 0) {
int(loc_preds[step_start_idx + 1] * height_list[batch_idx]); point = int(point * width_list[batch_idx]);
} else {
point = int(point * height_list[batch_idx]);
}
rec_box.push_back(point); rec_box.push_back(point);
} }
rec_boxes.push_back(rec_box); rec_boxes.push_back(rec_box);
} }
} }
score /= count; score /= count;
if (isnan(score) || rec_boxes.size() == 0) { if (std::isnan(score) || rec_boxes.size() == 0) {
score = -1; score = -1;
} }
rec_scores.push_back(score); rec_scores.push_back(score);
...@@ -425,4 +439,137 @@ void TablePostProcessor::Run( ...@@ -425,4 +439,137 @@ void TablePostProcessor::Run(
} }
} }
void PicodetPostProcessor::init(std::string label_path,
const double score_threshold,
const double nms_threshold,
const std::vector<int> &fpn_stride) {
this->label_list_ = Utility::ReadDict(label_path);
this->score_threshold_ = score_threshold;
this->nms_threshold_ = nms_threshold;
this->num_class_ = label_list_.size();
this->fpn_stride_ = fpn_stride;
}
void PicodetPostProcessor::Run(std::vector<StructurePredictResult> &results,
std::vector<std::vector<float>> outs,
std::vector<int> ori_shape,
std::vector<int> resize_shape, int reg_max) {
int in_h = resize_shape[0];
int in_w = resize_shape[1];
float scale_factor_h = resize_shape[0] / float(ori_shape[0]);
float scale_factor_w = resize_shape[1] / float(ori_shape[1]);
std::vector<std::vector<StructurePredictResult>> bbox_results;
bbox_results.resize(this->num_class_);
for (int i = 0; i < this->fpn_stride_.size(); ++i) {
int feature_h = std::ceil((float)in_h / this->fpn_stride_[i]);
int feature_w = std::ceil((float)in_w / this->fpn_stride_[i]);
for (int idx = 0; idx < feature_h * feature_w; idx++) {
// score and label
float score = 0;
int cur_label = 0;
for (int label = 0; label < this->num_class_; label++) {
if (outs[i][idx * this->num_class_ + label] > score) {
score = outs[i][idx * this->num_class_ + label];
cur_label = label;
}
}
// bbox
if (score > this->score_threshold_) {
int row = idx / feature_w;
int col = idx % feature_w;
std::vector<float> bbox_pred(
outs[i + this->fpn_stride_.size()].begin() + idx * 4 * reg_max,
outs[i + this->fpn_stride_.size()].begin() +
(idx + 1) * 4 * reg_max);
bbox_results[cur_label].push_back(
this->disPred2Bbox(bbox_pred, cur_label, score, col, row,
this->fpn_stride_[i], resize_shape, reg_max));
}
}
}
for (int i = 0; i < bbox_results.size(); i++) {
bool flag = bbox_results[i].size() <= 0;
}
for (int i = 0; i < bbox_results.size(); i++) {
bool flag = bbox_results[i].size() <= 0;
if (bbox_results[i].size() <= 0) {
continue;
}
this->nms(bbox_results[i], this->nms_threshold_);
for (auto box : bbox_results[i]) {
box.box[0] = box.box[0] / scale_factor_w;
box.box[2] = box.box[2] / scale_factor_w;
box.box[1] = box.box[1] / scale_factor_h;
box.box[3] = box.box[3] / scale_factor_h;
results.push_back(box);
}
}
}
StructurePredictResult
PicodetPostProcessor::disPred2Bbox(std::vector<float> bbox_pred, int label,
float score, int x, int y, int stride,
std::vector<int> im_shape, int reg_max) {
float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred;
dis_pred.resize(4);
for (int i = 0; i < 4; i++) {
float dis = 0;
std::vector<float> bbox_pred_i(bbox_pred.begin() + i * reg_max,
bbox_pred.begin() + (i + 1) * reg_max);
std::vector<float> dis_after_sm =
Utility::activation_function_softmax(bbox_pred_i);
for (int j = 0; j < reg_max; j++) {
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
}
float xmin = (std::max)(ct_x - dis_pred[0], .0f);
float ymin = (std::max)(ct_y - dis_pred[1], .0f);
float xmax = (std::min)(ct_x + dis_pred[2], (float)im_shape[1]);
float ymax = (std::min)(ct_y + dis_pred[3], (float)im_shape[0]);
StructurePredictResult result_item;
result_item.box = {xmin, ymin, xmax, ymax};
result_item.type = this->label_list_[label];
result_item.confidence = score;
return result_item;
}
void PicodetPostProcessor::nms(std::vector<StructurePredictResult> &input_boxes,
float nms_threshold) {
std::sort(input_boxes.begin(), input_boxes.end(),
[](StructurePredictResult a, StructurePredictResult b) {
return a.confidence > b.confidence;
});
std::vector<int> picked(input_boxes.size(), 1);
for (int i = 0; i < input_boxes.size(); ++i) {
if (picked[i] == 0) {
continue;
}
for (int j = i + 1; j < input_boxes.size(); ++j) {
if (picked[j] == 0) {
continue;
}
float iou = Utility::iou(input_boxes[i].box, input_boxes[j].box);
if (iou > nms_threshold) {
picked[j] = 0;
}
}
}
std::vector<StructurePredictResult> input_boxes_nms;
for (int i = 0; i < input_boxes.size(); ++i) {
if (picked[i] == 1) {
input_boxes_nms.push_back(input_boxes[i]);
}
}
input_boxes = input_boxes_nms;
}
} // namespace PaddleOCR } // namespace PaddleOCR
此差异已折叠。
...@@ -20,7 +20,7 @@ void StructureTableRecognizer::Run( ...@@ -20,7 +20,7 @@ void StructureTableRecognizer::Run(
std::vector<cv::Mat> img_list, std::vector<cv::Mat> img_list,
std::vector<std::vector<std::string>> &structure_html_tags, std::vector<std::vector<std::string>> &structure_html_tags,
std::vector<float> &structure_scores, std::vector<float> &structure_scores,
std::vector<std::vector<std::vector<std::vector<int>>>> &structure_boxes, std::vector<std::vector<std::vector<int>>> &structure_boxes,
std::vector<double> &times) { std::vector<double> &times) {
std::chrono::duration<float> preprocess_diff = std::chrono::duration<float> preprocess_diff =
std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
...@@ -34,7 +34,7 @@ void StructureTableRecognizer::Run( ...@@ -34,7 +34,7 @@ void StructureTableRecognizer::Run(
beg_img_no += this->table_batch_num_) { beg_img_no += this->table_batch_num_) {
// preprocess // preprocess
auto preprocess_start = std::chrono::steady_clock::now(); auto preprocess_start = std::chrono::steady_clock::now();
int end_img_no = min(img_num, beg_img_no + this->table_batch_num_); int end_img_no = std::min(img_num, beg_img_no + this->table_batch_num_);
int batch_num = end_img_no - beg_img_no; int batch_num = end_img_no - beg_img_no;
std::vector<cv::Mat> norm_img_batch; std::vector<cv::Mat> norm_img_batch;
std::vector<int> width_list; std::vector<int> width_list;
...@@ -89,8 +89,7 @@ void StructureTableRecognizer::Run( ...@@ -89,8 +89,7 @@ void StructureTableRecognizer::Run(
auto postprocess_start = std::chrono::steady_clock::now(); auto postprocess_start = std::chrono::steady_clock::now();
std::vector<std::vector<std::string>> structure_html_tag_batch; std::vector<std::vector<std::string>> structure_html_tag_batch;
std::vector<float> structure_score_batch; std::vector<float> structure_score_batch;
std::vector<std::vector<std::vector<std::vector<int>>>> std::vector<std::vector<std::vector<int>>> structure_boxes_batch;
structure_boxes_batch;
this->post_processor_.Run(loc_preds, structure_probs, structure_score_batch, this->post_processor_.Run(loc_preds, structure_probs, structure_score_batch,
predict_shape0, predict_shape1, predict_shape0, predict_shape1,
structure_html_tag_batch, structure_boxes_batch, structure_html_tag_batch, structure_boxes_batch,
...@@ -119,7 +118,7 @@ void StructureTableRecognizer::Run( ...@@ -119,7 +118,7 @@ void StructureTableRecognizer::Run(
} }
void StructureTableRecognizer::LoadModel(const std::string &model_dir) { void StructureTableRecognizer::LoadModel(const std::string &model_dir) {
AnalysisConfig config; paddle_infer::Config config;
config.SetModel(model_dir + "/inference.pdmodel", config.SetModel(model_dir + "/inference.pdmodel",
model_dir + "/inference.pdiparams"); model_dir + "/inference.pdiparams");
...@@ -134,6 +133,11 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) { ...@@ -134,6 +133,11 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false); config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
if (!Utility::PathExists("./trt_table_shape.txt")) {
config.CollectShapeRangeInfo("./trt_table_shape.txt");
} else {
config.EnableTunedTensorRtDynamicShape("./trt_table_shape.txt", true);
}
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
...@@ -153,6 +157,6 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) { ...@@ -153,6 +157,6 @@ void StructureTableRecognizer::LoadModel(const std::string &model_dir) {
config.EnableMemoryOptim(); config.EnableMemoryOptim();
config.DisableGlogInfo(); config.DisableGlogInfo();
this->predictor_ = CreatePredictor(config); this->predictor_ = paddle_infer::CreatePredictor(config);
} }
} // namespace PaddleOCR } // namespace PaddleOCR
此差异已折叠。
...@@ -5,4 +5,4 @@ det_db_unclip_ratio 1.6 ...@@ -5,4 +5,4 @@ det_db_unclip_ratio 1.6
det_db_use_dilate 0 det_db_use_dilate 0
det_use_polygon_score 1 det_use_polygon_score 1
use_direction_classify 1 use_direction_classify 1
rec_image_height 32 rec_image_height 48
\ No newline at end of file \ No newline at end of file
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册