提交 bb0955dd 编写于 作者: 文幕地方's avatar 文幕地方

Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into cpp_infer

...@@ -2449,13 +2449,6 @@ class MainWindow(QMainWindow): ...@@ -2449,13 +2449,6 @@ class MainWindow(QMainWindow):
export PPLabel and CSV to JSON (PubTabNet) export PPLabel and CSV to JSON (PubTabNet)
''' '''
import pandas as pd import pandas as pd
from libs.dataPartitionDialog import DataPartitionDialog
# data partition user input
partitionDialog = DataPartitionDialog(parent=self)
partitionDialog.exec()
if partitionDialog.getStatus() == False:
return
# automatically save annotations # automatically save annotations
self.saveFilestate() self.saveFilestate()
...@@ -2479,27 +2472,18 @@ class MainWindow(QMainWindow): ...@@ -2479,27 +2472,18 @@ class MainWindow(QMainWindow):
else: else:
labeldict[file] = [] labeldict[file] = []
train_split, val_split, test_split = partitionDialog.getDataPartition() # read table recognition output
# check validate TableRec_excel_dir = os.path.join(
if train_split + val_split + test_split > 100: self.lastOpenDir, 'tableRec_excel_output')
msg = "The sum of training, validation and testing data should be less than 100%"
QMessageBox.information(self, "Information", msg) # save txt
return fid = open(
print(train_split, val_split, test_split) "{}/gt.txt".format(self.lastOpenDir), "w", encoding='utf-8')
train_split, val_split, test_split = float(train_split) / 100., float(val_split) / 100., float(test_split) / 100.
train_id = int(len(labeldict) * train_split)
val_id = int(len(labeldict) * (train_split + val_split))
print('Data partition: train:', train_id,
'validation:', val_id - train_id,
'test:', len(labeldict) - val_id)
TableRec_excel_dir = os.path.join(self.lastOpenDir, 'tableRec_excel_output')
json_results = []
imgid = 0
for image_path in labeldict.keys(): for image_path in labeldict.keys():
# load csv annotations # load csv annotations
filename, _ = os.path.splitext(os.path.basename(image_path)) filename, _ = os.path.splitext(os.path.basename(image_path))
csv_path = os.path.join(TableRec_excel_dir, filename + '.xlsx') csv_path = os.path.join(
TableRec_excel_dir, filename + '.xlsx')
if not os.path.exists(csv_path): if not os.path.exists(csv_path):
continue continue
...@@ -2518,28 +2502,31 @@ class MainWindow(QMainWindow): ...@@ -2518,28 +2502,31 @@ class MainWindow(QMainWindow):
cells = [] cells = []
for anno in labeldict[image_path]: for anno in labeldict[image_path]:
tokens = list(anno['transcription']) tokens = list(anno['transcription'])
obb = anno['points'] cells.append({
hbb = OBB2HBB(np.array(obb)).tolist() 'tokens': tokens,
cells.append({'tokens': tokens, 'bbox': hbb}) 'bbox': anno['points']
})
# data split
if imgid < train_id: # 构造标注信息
split = 'train' html = {
elif imgid < val_id: 'structure': {
split = 'val' 'tokens': token_list
else: },
split = 'test' 'cells': cells
}
# save dict d = {
html = {'structure': {'tokens': token_list}, 'cell': cells} 'filename': os.path.basename(image_path),
json_results.append({'filename': os.path.basename(image_path), 'split': split, 'imgid': imgid, 'html': html}) 'html': html
imgid += 1 }
# 重构HTML
# save json d['gt'] = rebuild_html_from_ppstructure_label(d)
with open("{}/annotation.json".format(self.lastOpenDir), "w", encoding='utf-8') as fid: fid.write('{}\n'.format(
fid.write(json.dumps(json_results, ensure_ascii=False)) json.dumps(
d, ensure_ascii=False)))
msg = 'JSON sucessfully saved in {}/annotation.json'.format(self.lastOpenDir)
# convert to PP-Structure label format
fid.close()
msg = 'JSON sucessfully saved in {}/gt.txt'.format(self.lastOpenDir)
QMessageBox.information(self, "Information", msg) QMessageBox.information(self, "Information", msg)
def autolcm(self): def autolcm(self):
......
English | [简体中文](README_ch.md) English | [简体中文](README_ch.md)
# PPOCRLabel # PPOCRLabelv2
PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PP-OCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box, table and multi-point annotation modes. Annotations can be directly used for the training of PP-OCR detection and recognition models. PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PP-OCR model to automatically detect and re-recognize data. It is written in Python3 and PyQT5, supporting rectangular box, table, irregular text and key information annotation modes. Annotations can be directly used for the training of PP-OCR detection and recognition models.
<img src="./data/gif/steps_en.gif" width="100%"/> | regular text annotation | table annotation |
| :-------------------------------------------------: | :--------------------------------------------: |
| <img src="./data/gif/steps_en.gif" width="80%"/> | <img src="./data/gif/table.gif" width="100%"/> |
| **irregular text annotation** | **key information annotation** |
| <img src="./data/gif/multi-point.gif" width="80%"/> | <img src="./data/gif/kie.gif" width="100%"/> |
### Recent Update ### Recent Update
......
[English](README.md) | 简体中文 [English](README.md) | 简体中文
# PPOCRLabel # PPOCRLabelv2
PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置PP-OCR模型对数据自动标注和重新识别。使用Python3和PyQT5编写,支持矩形框标注和四点标注模式,导出格式可直接用于PaddleOCR检测和识别模型的训练。 PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置PP-OCR模型对数据自动标注和重新识别。使用Python3和PyQT5编写,支持矩形框标注和四点标注模式,导出格式可直接用于PaddleOCR检测和识别模型的训练。
<img src="./data/gif/steps.gif" width="100%"/> | 常规标注 | 表格标注 |
| :-------------------------------------------------: | :--------------------------------------------: |
| <img src="./data/gif/steps_en.gif" width="80%"/> | <img src="./data/gif/table.gif" width="100%"/> |
| **不规则文本标注** | **关键信息标注** |
| <img src="./data/gif/multi-point.gif" width="80%"/> | <img src="./data/gif/kie.gif" width="100%"/> |
#### 近期更新 #### 近期更新
- 2022.05:**新增表格标注**,使用方法见下方`2.2 表格标注`(by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest)) - 2022.05:**新增表格标注**,使用方法见下方`2.2 表格标注`(by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest))
......
try:
from PyQt5.QtGui import *
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
except ImportError:
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from libs.utils import newIcon
import time
import datetime
import json
import cv2
import numpy as np
BB = QDialogButtonBox
class DataPartitionDialog(QDialog):
def __init__(self, parent=None):
super().__init__()
self.parnet = parent
self.title = 'DATA PARTITION'
self.train_ratio = 70
self.val_ratio = 15
self.test_ratio = 15
self.initUI()
def initUI(self):
self.setWindowTitle(self.title)
self.setWindowModality(Qt.ApplicationModal)
self.flag_accept = True
if self.parnet.lang == 'ch':
msg = "导出JSON前请保存所有图像的标注且关闭EXCEL!"
else:
msg = "Please save all the annotations and close the EXCEL before exporting JSON!"
info_msg = QLabel(msg, self)
info_msg.setWordWrap(True)
info_msg.setStyleSheet("color: red")
info_msg.setFont(QFont('Arial', 12))
train_lbl = QLabel('Train split: ', self)
train_lbl.setFont(QFont('Arial', 15))
val_lbl = QLabel('Valid split: ', self)
val_lbl.setFont(QFont('Arial', 15))
test_lbl = QLabel('Test split: ', self)
test_lbl.setFont(QFont('Arial', 15))
self.train_input = QLineEdit(self)
self.train_input.setFont(QFont('Arial', 15))
self.val_input = QLineEdit(self)
self.val_input.setFont(QFont('Arial', 15))
self.test_input = QLineEdit(self)
self.test_input.setFont(QFont('Arial', 15))
self.train_input.setText(str(self.train_ratio))
self.val_input.setText(str(self.val_ratio))
self.test_input.setText(str(self.test_ratio))
validator = QIntValidator(0, 100)
self.train_input.setValidator(validator)
self.val_input.setValidator(validator)
self.test_input.setValidator(validator)
gridlayout = QGridLayout()
gridlayout.addWidget(info_msg, 0, 0, 1, 2)
gridlayout.addWidget(train_lbl, 1, 0)
gridlayout.addWidget(val_lbl, 2, 0)
gridlayout.addWidget(test_lbl, 3, 0)
gridlayout.addWidget(self.train_input, 1, 1)
gridlayout.addWidget(self.val_input, 2, 1)
gridlayout.addWidget(self.test_input, 3, 1)
bb = BB(BB.Ok | BB.Cancel, Qt.Horizontal, self)
bb.button(BB.Ok).setIcon(newIcon('done'))
bb.button(BB.Cancel).setIcon(newIcon('undo'))
bb.accepted.connect(self.validate)
bb.rejected.connect(self.cancel)
gridlayout.addWidget(bb, 4, 0, 1, 2)
self.setLayout(gridlayout)
self.show()
def validate(self):
self.flag_accept = True
self.accept()
def cancel(self):
self.flag_accept = False
self.reject()
def getStatus(self):
return self.flag_accept
def getDataPartition(self):
self.train_ratio = int(self.train_input.text())
self.val_ratio = int(self.val_input.text())
self.test_ratio = int(self.test_input.text())
return self.train_ratio, self.val_ratio, self.test_ratio
def closeEvent(self, event):
self.flag_accept = False
self.reject()
...@@ -176,18 +176,6 @@ def boxPad(box, imgShape, pad : int) -> np.array: ...@@ -176,18 +176,6 @@ def boxPad(box, imgShape, pad : int) -> np.array:
return box return box
def OBB2HBB(obb) -> np.array:
"""
Convert Oriented Bounding Box to Horizontal Bounding Box.
"""
hbb = np.zeros(4, dtype=np.int32)
hbb[0] = min(obb[:, 0])
hbb[1] = min(obb[:, 1])
hbb[2] = max(obb[:, 0])
hbb[3] = max(obb[:, 1])
return hbb
def expand_list(merged, html_list): def expand_list(merged, html_list):
''' '''
Fill blanks according to merged cells Fill blanks according to merged cells
...@@ -232,6 +220,26 @@ def convert_token(html_list): ...@@ -232,6 +220,26 @@ def convert_token(html_list):
return token_list return token_list
def rebuild_html_from_ppstructure_label(label_info):
from html import escape
html_code = label_info['html']['structure']['tokens'].copy()
to_insert = [
i for i, tag in enumerate(html_code) if tag in ('<td>', '>')
]
for i, cell in zip(to_insert[::-1], label_info['html']['cells'][::-1]):
if cell['tokens']:
cell = [
escape(token) if len(token) == 1 else token
for token in cell['tokens']
]
cell = ''.join(cell)
html_code.insert(i + 1, cell)
html_code = ''.join(html_code)
html_code = '<html><body><table>{}</table></body></html>'.format(
html_code)
return html_code
def stepsInfo(lang='en'): def stepsInfo(lang='en'):
if lang == 'ch': if lang == 'ch':
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \ msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
......
...@@ -33,7 +33,7 @@ setup( ...@@ -33,7 +33,7 @@ setup(
package_dir={'PPOCRLabel': ''}, package_dir={'PPOCRLabel': ''},
include_package_data=True, include_package_data=True,
entry_points={"console_scripts": ["PPOCRLabel= PPOCRLabel.PPOCRLabel:main"]}, entry_points={"console_scripts": ["PPOCRLabel= PPOCRLabel.PPOCRLabel:main"]},
version='1.0.2', version='2.1.1',
install_requires=requirements, install_requires=requirements,
license='Apache License 2.0', license='Apache License 2.0',
description='PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PPOCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box annotation and four-point annotation modes. Annotations can be directly used for the training of PPOCR detection and recognition models', description='PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PPOCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box annotation and four-point annotation modes. Annotations can be directly used for the training of PPOCR detection and recognition models',
......
...@@ -123,7 +123,7 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel ...@@ -123,7 +123,7 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel
- [Inference and Deployment](./deploy/README.md) - [Inference and Deployment](./deploy/README.md)
- [Python Inference](./ppstructure/docs/inference_en.md) - [Python Inference](./ppstructure/docs/inference_en.md)
- [C++ Inference](./deploy/cpp_infer/readme.md) - [C++ Inference](./deploy/cpp_infer/readme.md)
- [Serving](./deploy/pdserving/README.md) - [Serving](./deploy/hubserving/readme_en.md)
- [Academic Algorithms](./doc/doc_en/algorithm_overview_en.md) - [Academic Algorithms](./doc/doc_en/algorithm_overview_en.md)
- [Text detection](./doc/doc_en/algorithm_overview_en.md) - [Text detection](./doc/doc_en/algorithm_overview_en.md)
- [Text recognition](./doc/doc_en/algorithm_overview_en.md) - [Text recognition](./doc/doc_en/algorithm_overview_en.md)
......
...@@ -135,7 +135,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 ...@@ -135,7 +135,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- [推理部署](./deploy/README_ch.md) - [推理部署](./deploy/README_ch.md)
- [基于Python预测引擎推理](./ppstructure/docs/inference.md) - [基于Python预测引擎推理](./ppstructure/docs/inference.md)
- [基于C++预测引擎推理](./deploy/cpp_infer/readme_ch.md) - [基于C++预测引擎推理](./deploy/cpp_infer/readme_ch.md)
- [服务化部署](./deploy/pdserving/README_CN.md) - [服务化部署](./deploy/hubserving/readme.md)
- [前沿算法与模型🚀](./doc/doc_ch/algorithm_overview.md) - [前沿算法与模型🚀](./doc/doc_ch/algorithm_overview.md)
- [文本检测算法](./doc/doc_ch/algorithm_overview.md) - [文本检测算法](./doc/doc_ch/algorithm_overview.md)
- [文本识别算法](./doc/doc_ch/algorithm_overview.md) - [文本识别算法](./doc/doc_ch/algorithm_overview.md)
......
...@@ -30,7 +30,7 @@ cd PaddleOCR ...@@ -30,7 +30,7 @@ cd PaddleOCR
# 安装PaddleOCR的依赖 # 安装PaddleOCR的依赖
pip install -r requirements.txt pip install -r requirements.txt
# 安装关键信息抽取任务的依赖 # 安装关键信息抽取任务的依赖
pip install -r ./ppstructure/vqa/requirements.txt pip install -r ./ppstructure/kie/requirements.txt
``` ```
## 4. 关键信息抽取 ## 4. 关键信息抽取
...@@ -94,7 +94,7 @@ VI-LayoutXLM的配置为[ser_vi_layoutxlm_xfund_zh_udml.yml](../configs/kie/vi_l ...@@ -94,7 +94,7 @@ VI-LayoutXLM的配置为[ser_vi_layoutxlm_xfund_zh_udml.yml](../configs/kie/vi_l
```yml ```yml
Architecture: Architecture:
model_type: &model_type "vqa" model_type: &model_type "kie"
name: DistillationModel name: DistillationModel
algorithm: Distillation algorithm: Distillation
Models: Models:
...@@ -177,7 +177,7 @@ python3 tools/eval.py -c ./fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone. ...@@ -177,7 +177,7 @@ python3 tools/eval.py -c ./fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.
使用下面的命令进行预测。 使用下面的命令进行预测。
```bash ```bash
python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/XFUND/zh_val/val.json Global.infer_mode=False python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/XFUND/zh_val/val.json Global.infer_mode=False
``` ```
预测结果会保存在配置文件中的`Global.save_res_path`目录中。 预测结果会保存在配置文件中的`Global.save_res_path`目录中。
...@@ -195,7 +195,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect ...@@ -195,7 +195,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect
```bash ```bash
python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True
``` ```
结果如下所示。 结果如下所示。
...@@ -211,7 +211,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect ...@@ -211,7 +211,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect
如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型,可以使用下面的方法传入检测与识别的inference 模型路径,即可完成OCR文本检测与识别以及SER的串联过程。 如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型,可以使用下面的方法传入检测与识别的inference 模型路径,即可完成OCR文本检测与识别以及SER的串联过程。
```bash ```bash
python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model" python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
``` ```
### 4.4 关系抽取(Relation Extraction) ### 4.4 关系抽取(Relation Extraction)
...@@ -316,7 +316,7 @@ python3 tools/eval.py -c ./fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.c ...@@ -316,7 +316,7 @@ python3 tools/eval.py -c ./fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.c
# -o 后面的字段是RE任务的配置 # -o 后面的字段是RE任务的配置
# -c_ser 后面的是SER任务的配置文件 # -c_ser 后面的是SER任务的配置文件
# -c_ser 后面的字段是SER任务的配置 # -c_ser 后面的字段是SER任务的配置
python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=False -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_trained/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=False -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_trained/best_accuracy
``` ```
预测结果会保存在配置文件中的`Global.save_res_path`目录中。 预测结果会保存在配置文件中的`Global.save_res_path`目录中。
...@@ -333,11 +333,11 @@ python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Archite ...@@ -333,11 +333,11 @@ python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Archite
如果希望使用OCR引擎结果得到的结果进行推理,则可以使用下面的命令进行推理。 如果希望使用OCR引擎结果得到的结果进行推理,则可以使用下面的命令进行推理。
```bash ```bash
python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy
``` ```
如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型,可以使用下面的方法传入,即可完成SER + RE的串联过程。 如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型,可以使用下面的方法传入,即可完成SER + RE的串联过程。
```bash ```bash
python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model" python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
``` ```
...@@ -191,7 +191,6 @@ Eval: ...@@ -191,7 +191,6 @@ Eval:
channel_first: False channel_first: False
- DetLabelEncode: # Class handling label - DetLabelEncode: # Class handling label
- DetResizeForTest: - DetResizeForTest:
# image_shape: [736, 1280]
- NormalizeImage: - NormalizeImage:
scale: 1./255. scale: 1./255.
mean: [0.485, 0.456, 0.406] mean: [0.485, 0.456, 0.406]
......
...@@ -24,6 +24,7 @@ Architecture: ...@@ -24,6 +24,7 @@ Architecture:
model_type: det model_type: det
Models: Models:
Student: Student:
pretrained:
model_type: det model_type: det
algorithm: DB algorithm: DB
Transform: null Transform: null
...@@ -40,6 +41,7 @@ Architecture: ...@@ -40,6 +41,7 @@ Architecture:
name: DBHead name: DBHead
k: 50 k: 50
Student2: Student2:
pretrained:
model_type: det model_type: det
algorithm: DB algorithm: DB
Transform: null Transform: null
...@@ -91,14 +93,11 @@ Loss: ...@@ -91,14 +93,11 @@ Loss:
- ["Student", "Student2"] - ["Student", "Student2"]
maps_name: "thrink_maps" maps_name: "thrink_maps"
weight: 1.0 weight: 1.0
# act: None
model_name_pairs: ["Student", "Student2"] model_name_pairs: ["Student", "Student2"]
key: maps key: maps
- DistillationDBLoss: - DistillationDBLoss:
weight: 1.0 weight: 1.0
model_name_list: ["Student", "Student2"] model_name_list: ["Student", "Student2"]
# key: maps
# name: DBLoss
balance_loss: true balance_loss: true
main_loss_type: DiceLoss main_loss_type: DiceLoss
alpha: 5 alpha: 5
...@@ -197,6 +196,7 @@ Train: ...@@ -197,6 +196,7 @@ Train:
drop_last: false drop_last: false
batch_size_per_card: 8 batch_size_per_card: 8
num_workers: 4 num_workers: 4
Eval: Eval:
dataset: dataset:
name: SimpleDataSet name: SimpleDataSet
...@@ -204,31 +204,21 @@ Eval: ...@@ -204,31 +204,21 @@ Eval:
label_file_list: label_file_list:
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms: transforms:
- DecodeImage: - DecodeImage: # load image
img_mode: BGR img_mode: BGR
channel_first: false channel_first: False
- DetLabelEncode: null - DetLabelEncode: # Class handling label
- DetResizeForTest: null - DetResizeForTest:
- NormalizeImage: - NormalizeImage:
scale: 1./255. scale: 1./255.
mean: mean: [0.485, 0.456, 0.406]
- 0.485 std: [0.229, 0.224, 0.225]
- 0.456 order: 'hwc'
- 0.406 - ToCHWImage:
std:
- 0.229
- 0.224
- 0.225
order: hwc
- ToCHWImage: null
- KeepKeys: - KeepKeys:
keep_keys: keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
- image
- shape
- polys
- ignore_tags
loader: loader:
shuffle: false shuffle: False
drop_last: false drop_last: False
batch_size_per_card: 1 batch_size_per_card: 1 # must be 1
num_workers: 2 num_workers: 2
\ No newline at end of file
Global:
use_gpu: true
epoch_num: 600
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/det_ct/
save_epoch_step: 10
# evaluation is run every 2000 iterations
eval_batch_step: [0,1000]
cal_metric_during_train: False
pretrained_model: ./pretrain_models/ResNet18_vd_pretrained.pdparams
checkpoints:
save_inference_dir:
use_visualdl: False
infer_img: doc/imgs_en/img623.jpg
save_res_path: ./output/det_ct/predicts_ct.txt
Architecture:
model_type: det
algorithm: CT
Transform:
Backbone:
name: ResNet_vd
layers: 18
Neck:
name: CTFPN
Head:
name: CT_Head
in_channels: 512
hidden_dim: 128
num_classes: 3
Loss:
name: CTLoss
Optimizer:
name: Adam
lr: #PolynomialDecay
name: Linear
learning_rate: 0.001
end_lr: 0.
epochs: 600
step_each_epoch: 1254
power: 0.9
PostProcess:
name: CTPostProcess
box_type: poly
Metric:
name: CTMetric
main_indicator: f_score
Train:
dataset:
name: SimpleDataSet
data_dir: ./train_data/total_text/train
label_file_list:
- ./train_data/total_text/train/train.txt
ratio_list: [1.0]
transforms:
- DecodeImage:
img_mode: RGB
channel_first: False
- CTLabelEncode: # Class handling label
- RandomScale:
- MakeShrink:
- GroupRandomHorizontalFlip:
- GroupRandomRotate:
- GroupRandomCropPadding:
- MakeCentripetalShift:
- ColorJitter:
brightness: 0.125
saturation: 0.5
- ToCHWImage:
- NormalizeImage:
- KeepKeys:
keep_keys: ['image', 'gt_kernel', 'training_mask', 'gt_instance', 'gt_kernel_instance', 'training_mask_distance', 'gt_distance'] # the order of the dataloader list
loader:
shuffle: True
drop_last: True
batch_size_per_card: 4
num_workers: 8
Eval:
dataset:
name: SimpleDataSet
data_dir: ./train_data/total_text/test
label_file_list:
- ./train_data/total_text/test/test.txt
ratio_list: [1.0]
transforms:
- DecodeImage:
img_mode: RGB
channel_first: False
- CTLabelEncode: # Class handling label
- ScaleAlignedShort:
- NormalizeImage:
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: ['image', 'shape', 'polys', 'texts'] # the order of the dataloader list
loader:
shuffle: False
drop_last: False
batch_size_per_card: 1
num_workers: 2
...@@ -13,6 +13,7 @@ Global: ...@@ -13,6 +13,7 @@ Global:
save_inference_dir: save_inference_dir:
use_visualdl: False use_visualdl: False
infer_img: infer_img:
infer_visual_type: EN # two mode: EN is for english datasets, CN is for chinese datasets
valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words
save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt
character_dict_path: ppocr/utils/ic15_dict.txt character_dict_path: ppocr/utils/ic15_dict.txt
...@@ -32,6 +33,7 @@ Architecture: ...@@ -32,6 +33,7 @@ Architecture:
name: PGFPN name: PGFPN
Head: Head:
name: PGHead name: PGHead
character_dict_path: ppocr/utils/ic15_dict.txt # the same as Global:character_dict_path
Loss: Loss:
name: PGLoss name: PGLoss
...@@ -45,16 +47,18 @@ Optimizer: ...@@ -45,16 +47,18 @@ Optimizer:
beta1: 0.9 beta1: 0.9
beta2: 0.999 beta2: 0.999
lr: lr:
name: Cosine
learning_rate: 0.001 learning_rate: 0.001
warmup_epoch: 50
regularizer: regularizer:
name: 'L2' name: 'L2'
factor: 0 factor: 0.0001
PostProcess: PostProcess:
name: PGPostProcess name: PGPostProcess
score_thresh: 0.5 score_thresh: 0.5
mode: fast # fast or slow two ways mode: fast # fast or slow two ways
point_gather_mode: align # same as PGProcessTrain: point_gather_mode
Metric: Metric:
name: E2EMetric name: E2EMetric
...@@ -76,9 +80,12 @@ Train: ...@@ -76,9 +80,12 @@ Train:
- E2ELabelEncodeTrain: - E2ELabelEncodeTrain:
- PGProcessTrain: - PGProcessTrain:
batch_size: 14 # same as loader: batch_size_per_card batch_size: 14 # same as loader: batch_size_per_card
use_resize: True
use_random_crop: False
min_crop_size: 24 min_crop_size: 24
min_text_size: 4 min_text_size: 4
max_text_size: 512 max_text_size: 512
point_gather_mode: align # two mode: align and none, align mode is better than none mode
- KeepKeys: - KeepKeys:
keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
loader: loader:
......
...@@ -12,7 +12,7 @@ Global: ...@@ -12,7 +12,7 @@ Global:
checkpoints: checkpoints:
save_inference_dir: ./output/SLANet/infer save_inference_dir: ./output/SLANet/infer
use_visualdl: False use_visualdl: False
infer_img: doc/table/table.jpg infer_img: ppstructure/docs/table/table.jpg
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/table_structure_dict.txt character_dict_path: ppocr/utils/dict/table_structure_dict.txt
character_type: en character_type: en
......
...@@ -12,7 +12,7 @@ Global: ...@@ -12,7 +12,7 @@ Global:
checkpoints: checkpoints:
save_inference_dir: ./output/SLANet_ch/infer save_inference_dir: ./output/SLANet_ch/infer
use_visualdl: False use_visualdl: False
infer_img: doc/table/table.jpg infer_img: ppstructure/docs/table/table.jpg
# for data or label process # for data or label process
character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
character_type: en character_type: en
......
...@@ -112,6 +112,11 @@ void Classifier::LoadModel(const std::string &model_dir) { ...@@ -112,6 +112,11 @@ void Classifier::LoadModel(const std::string &model_dir) {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false); config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
if (!Utility::PathExists("./trt_cls_shape.txt")){
config.CollectShapeRangeInfo("./trt_cls_shape.txt");
} else {
config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true);
}
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
......
...@@ -32,49 +32,13 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -32,49 +32,13 @@ void DBDetector::LoadModel(const std::string &model_dir) {
if (this->precision_ == "int8") { if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine(1 << 20, 1, 20, precision, false, false); config.EnableTensorRtEngine(1 << 30, 1, 20, precision, false, false);
std::map<std::string, std::vector<int>> min_input_shape = { if (!Utility::PathExists("./trt_det_shape.txt")){
{"x", {1, 3, 50, 50}}, config.CollectShapeRangeInfo("./trt_det_shape.txt");
{"conv2d_92.tmp_0", {1, 120, 20, 20}}, } else {
{"conv2d_91.tmp_0", {1, 24, 10, 10}}, config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true);
{"conv2d_59.tmp_0", {1, 96, 20, 20}}, }
{"nearest_interp_v2_1.tmp_0", {1, 256, 10, 10}},
{"nearest_interp_v2_2.tmp_0", {1, 256, 20, 20}},
{"conv2d_124.tmp_0", {1, 256, 20, 20}},
{"nearest_interp_v2_3.tmp_0", {1, 64, 20, 20}},
{"nearest_interp_v2_4.tmp_0", {1, 64, 20, 20}},
{"nearest_interp_v2_5.tmp_0", {1, 64, 20, 20}},
{"elementwise_add_7", {1, 56, 2, 2}},
{"nearest_interp_v2_0.tmp_0", {1, 256, 2, 2}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{"x", {1, 3, 1536, 1536}},
{"conv2d_92.tmp_0", {1, 120, 400, 400}},
{"conv2d_91.tmp_0", {1, 24, 200, 200}},
{"conv2d_59.tmp_0", {1, 96, 400, 400}},
{"nearest_interp_v2_1.tmp_0", {1, 256, 200, 200}},
{"nearest_interp_v2_2.tmp_0", {1, 256, 400, 400}},
{"conv2d_124.tmp_0", {1, 256, 400, 400}},
{"nearest_interp_v2_3.tmp_0", {1, 64, 400, 400}},
{"nearest_interp_v2_4.tmp_0", {1, 64, 400, 400}},
{"nearest_interp_v2_5.tmp_0", {1, 64, 400, 400}},
{"elementwise_add_7", {1, 56, 400, 400}},
{"nearest_interp_v2_0.tmp_0", {1, 256, 400, 400}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{"x", {1, 3, 640, 640}},
{"conv2d_92.tmp_0", {1, 120, 160, 160}},
{"conv2d_91.tmp_0", {1, 24, 80, 80}},
{"conv2d_59.tmp_0", {1, 96, 160, 160}},
{"nearest_interp_v2_1.tmp_0", {1, 256, 80, 80}},
{"nearest_interp_v2_2.tmp_0", {1, 256, 160, 160}},
{"conv2d_124.tmp_0", {1, 256, 160, 160}},
{"nearest_interp_v2_3.tmp_0", {1, 64, 160, 160}},
{"nearest_interp_v2_4.tmp_0", {1, 64, 160, 160}},
{"nearest_interp_v2_5.tmp_0", {1, 64, 160, 160}},
{"elementwise_add_7", {1, 56, 40, 40}},
{"nearest_interp_v2_0.tmp_0", {1, 256, 40, 40}}};
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
......
...@@ -147,20 +147,12 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -147,20 +147,12 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
if (this->precision_ == "int8") { if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine(1 << 20, 10, 15, precision, false, false); if (!Utility::PathExists("./trt_rec_shape.txt")){
int imgH = this->rec_image_shape_[1]; config.CollectShapeRangeInfo("./trt_rec_shape.txt");
int imgW = this->rec_image_shape_[2]; } else {
std::map<std::string, std::vector<int>> min_input_shape = { config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true);
{"x", {1, 3, imgH, 10}}, {"lstm_0.tmp_0", {10, 1, 96}}}; }
std::map<std::string, std::vector<int>> max_input_shape = {
{"x", {this->rec_batch_num_, 3, imgH, 2500}},
{"lstm_0.tmp_0", {1000, 1, 96}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{"x", {this->rec_batch_num_, 3, imgH, imgW}},
{"lstm_0.tmp_0", {25, 1, 96}}};
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
......
...@@ -5,4 +5,4 @@ det_db_unclip_ratio 1.6 ...@@ -5,4 +5,4 @@ det_db_unclip_ratio 1.6
det_db_use_dilate 0 det_db_use_dilate 0
det_use_polygon_score 1 det_use_polygon_score 1
use_direction_classify 1 use_direction_classify 1
rec_image_height 32 rec_image_height 48
\ No newline at end of file \ No newline at end of file
...@@ -99,6 +99,8 @@ The following table also provides a series of models that can be deployed on mob ...@@ -99,6 +99,8 @@ The following table also provides a series of models that can be deployed on mob
|Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch| |Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch|
|---|---|---|---|---|---|---| |---|---|---|---|---|---|---|
|PP-OCRv3|extra-lightweight chinese OCR optimized model|16.2M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.nb)|v2.10|
|PP-OCRv3(slim)|extra-lightweight chinese OCR optimized model|5.9M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb)|v2.10|
|PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10| |PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
|PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.6M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10| |PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.6M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
...@@ -134,17 +136,16 @@ Introduction to paddle_lite_opt parameters: ...@@ -134,17 +136,16 @@ Introduction to paddle_lite_opt parameters:
The following takes the ultra-lightweight Chinese model of PaddleOCR as an example to introduce the use of the compiled opt file to complete the conversion of the inference model to the Paddle-Lite optimized model The following takes the ultra-lightweight Chinese model of PaddleOCR as an example to introduce the use of the compiled opt file to complete the conversion of the inference model to the Paddle-Lite optimized model
``` ```
# 【[Recommendation] Download the Chinese and English inference model of PP-OCRv2 # 【[Recommendation] Download the Chinese and English inference model of PP-OCRv3
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar
# Convert detection model # Convert detection model
./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer paddle_lite_opt --model_file=./ch_PP-OCRv3_det_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_det_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# Convert recognition model # Convert recognition model
./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer paddle_lite_opt --model_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# Convert angle classifier model # Convert angle classifier model
./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer paddle_lite_opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
``` ```
After the conversion is successful, there will be more files ending with `.nb` in the inference model directory, which is the successfully converted model file. After the conversion is successful, there will be more files ending with `.nb` in the inference model directory, which is the successfully converted model file.
...@@ -197,15 +198,15 @@ Some preparatory work is required first. ...@@ -197,15 +198,15 @@ Some preparatory work is required first.
cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/ cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/
``` ```
Prepare the test image, taking PaddleOCR/doc/imgs/11.jpg as an example, copy the image file to the demo/cxx/ocr/debug/ folder. Prepare the model files optimized by the lite opt tool, ch_det_mv3_db_opt.nb, ch_rec_mv3_crnn_opt.nb, and place them under the demo/cxx/ocr/debug/ folder. Prepare the test image, taking PaddleOCR/doc/imgs/11.jpg as an example, copy the image file to the demo/cxx/ocr/debug/ folder. Prepare the model files optimized by the lite opt tool, ch_PP-OCRv3_det_slim_opt.nb , ch_PP-OCRv3_rec_slim_opt.nb , and place them under the demo/cxx/ocr/debug/ folder.
The structure of the OCR demo is as follows after the above command is executed: The structure of the OCR demo is as follows after the above command is executed:
``` ```
demo/cxx/ocr/ demo/cxx/ocr/
|-- debug/ |-- debug/
| |--ch_PP-OCRv2_det_slim_opt.nb Detection model | |--ch_PP-OCRv3_det_slim_opt.nb Detection model
| |--ch_PP-OCRv2_rec_slim_opt.nb Recognition model | |--ch_PP-OCRv3_rec_slim_opt.nb Recognition model
| |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb Text direction classification model | |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb Text direction classification model
| |--11.jpg Image for OCR | |--11.jpg Image for OCR
| |--ppocr_keys_v1.txt Dictionary file | |--ppocr_keys_v1.txt Dictionary file
...@@ -240,7 +241,7 @@ det_db_thresh 0.3 # Used to filter the binarized image of DB prediction, ...@@ -240,7 +241,7 @@ det_db_thresh 0.3 # Used to filter the binarized image of DB prediction,
det_db_box_thresh 0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate det_db_box_thresh 0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate
det_db_unclip_ratio 1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text det_db_unclip_ratio 1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text
use_direction_classify 0 # Whether to use the direction classifier, 0 means not to use, 1 means to use use_direction_classify 0 # Whether to use the direction classifier, 0 means not to use, 1 means to use
rec_image_height 32 # The height of the input image of the recognition model, the PP-OCRv3 model needs to be set to 48, and the PP-OCRv2 model needs to be set to 32 rec_image_height 48 # The height of the input image of the recognition model, the PP-OCRv3 model needs to be set to 48, and the PP-OCRv2 model needs to be set to 32
``` ```
5. Run Model on phone 5. Run Model on phone
...@@ -260,14 +261,14 @@ After the above steps are completed, you can use adb to push the file to the pho ...@@ -260,14 +261,14 @@ After the above steps are completed, you can use adb to push the file to the pho
export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
# The use of ocr_db_crnn is: # The use of ocr_db_crnn is:
# ./ocr_db_crnn Mode Detection model file Orientation classifier model file Recognition model file Hardware Precision Threads Batchsize Test image path Dictionary file path # ./ocr_db_crnn Mode Detection model file Orientation classifier model file Recognition model file Hardware Precision Threads Batchsize Test image path Dictionary file path
./ocr_db_crnn system ch_PP-OCRv2_det_slim_opt.nb ch_PP-OCRv2_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True ./ocr_db_crnn system ch_PP-OCRv3_det_slim_opt.nb ch_PP-OCRv3_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True
# precision can be INT8 for quantitative model or FP32 for normal model. # precision can be INT8 for quantitative model or FP32 for normal model.
# Only using detection model # Only using detection model
./ocr_db_crnn det ch_PP-OCRv2_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ./ocr_db_crnn det ch_PP-OCRv3_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt
# Only using recognition model # Only using recognition model
./ocr_db_crnn rec ch_PP-OCRv2_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt ./ocr_db_crnn rec ch_PP-OCRv3_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
``` ```
If you modify the code, you need to recompile and push to the phone. If you modify the code, you need to recompile and push to the phone.
......
...@@ -97,6 +97,8 @@ Paddle-Lite 提供了多种策略来自动优化原始的模型,其中包括 ...@@ -97,6 +97,8 @@ Paddle-Lite 提供了多种策略来自动优化原始的模型,其中包括
|模型版本|模型简介|模型大小|检测模型|文本方向分类模型|识别模型|Paddle-Lite版本| |模型版本|模型简介|模型大小|检测模型|文本方向分类模型|识别模型|Paddle-Lite版本|
|---|---|---|---|---|---|---| |---|---|---|---|---|---|---|
|PP-OCRv3|蒸馏版超轻量中文OCR移动端模型|16.2M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.nb)|v2.10|
|PP-OCRv3(slim)|蒸馏版超轻量中文OCR移动端模型|5.9M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb)|v2.10|
|PP-OCRv2|蒸馏版超轻量中文OCR移动端模型|11M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10| |PP-OCRv2|蒸馏版超轻量中文OCR移动端模型|11M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
|PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.6M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10| |PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.6M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
...@@ -131,16 +133,16 @@ paddle_lite_opt 参数介绍: ...@@ -131,16 +133,16 @@ paddle_lite_opt 参数介绍:
下面以PaddleOCR的超轻量中文模型为例,介绍使用编译好的opt文件完成inference模型到Paddle-Lite优化模型的转换。 下面以PaddleOCR的超轻量中文模型为例,介绍使用编译好的opt文件完成inference模型到Paddle-Lite优化模型的转换。
``` ```
# 【推荐】 下载 PP-OCRv2版本的中英文 inference模型 # 【推荐】 下载 PP-OCRv3版本的中英文 inference模型
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar
# 转换检测模型 # 转换检测模型
./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer paddle_lite_opt --model_file=./ch_PP-OCRv3_det_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_det_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# 转换识别模型 # 转换识别模型
./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer paddle_lite_opt --model_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# 转换方向分类器模型 # 转换方向分类器模型
./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer paddle_lite_opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
``` ```
...@@ -194,15 +196,15 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls ...@@ -194,15 +196,15 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls
``` ```
准备测试图像,以`PaddleOCR/doc/imgs/11.jpg`为例,将测试的图像复制到`demo/cxx/ocr/debug/`文件夹下。 准备测试图像,以`PaddleOCR/doc/imgs/11.jpg`为例,将测试的图像复制到`demo/cxx/ocr/debug/`文件夹下。
准备lite opt工具优化后的模型文件,比如使用`ch_PP-OCRv2_det_slim_opt.ch_PP-OCRv2_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`,模型文件放置在`demo/cxx/ocr/debug/`文件夹下。 准备lite opt工具优化后的模型文件,比如使用`ch_PP-OCRv3_det_slim_opt.ch_PP-OCRv3_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`,模型文件放置在`demo/cxx/ocr/debug/`文件夹下。
执行完成后,ocr文件夹下将有如下文件格式: 执行完成后,ocr文件夹下将有如下文件格式:
``` ```
demo/cxx/ocr/ demo/cxx/ocr/
|-- debug/ |-- debug/
| |--ch_PP-OCRv2_det_slim_opt.nb 优化后的检测模型文件 | |--ch_PP-OCRv3_det_slim_opt.nb 优化后的检测模型文件
| |--ch_PP-OCRv2_rec_slim_opt.nb 优化后的识别模型文件 | |--ch_PP-OCRv3_rec_slim_opt.nb 优化后的识别模型文件
| |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb 优化后的文字方向分类器模型文件 | |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb 优化后的文字方向分类器模型文件
| |--11.jpg 待测试图像 | |--11.jpg 待测试图像
| |--ppocr_keys_v1.txt 中文字典文件 | |--ppocr_keys_v1.txt 中文字典文件
...@@ -239,7 +241,7 @@ det_db_thresh 0.3 # 用于过滤DB预测的二值化图像,设置为0. ...@@ -239,7 +241,7 @@ det_db_thresh 0.3 # 用于过滤DB预测的二值化图像,设置为0.
det_db_box_thresh 0.5 # 检测器后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小 det_db_box_thresh 0.5 # 检测器后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小
det_db_unclip_ratio 1.6 # 表示文本框的紧致程度,越小则文本框更靠近文本 det_db_unclip_ratio 1.6 # 表示文本框的紧致程度,越小则文本框更靠近文本
use_direction_classify 0 # 是否使用方向分类器,0表示不使用,1表示使用 use_direction_classify 0 # 是否使用方向分类器,0表示不使用,1表示使用
rec_image_height 32 # 识别模型输入图像的高度,PP-OCRv3模型设置为48,PP-OCRv2模型需要设置为32 rec_image_height 48 # 识别模型输入图像的高度,PP-OCRv3模型设置为48,PP-OCRv2模型需要设置为32
``` ```
5. 启动调试 5. 启动调试
...@@ -259,13 +261,13 @@ rec_image_height 32 # 识别模型输入图像的高度,PP-OCRv3模型 ...@@ -259,13 +261,13 @@ rec_image_height 32 # 识别模型输入图像的高度,PP-OCRv3模型
export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
# 开始使用,ocr_db_crnn可执行文件的使用方式为: # 开始使用,ocr_db_crnn可执行文件的使用方式为:
# ./ocr_db_crnn 预测模式 检测模型文件 方向分类器模型文件 识别模型文件 运行硬件 运行精度 线程数 batchsize 测试图像路径 参数配置路径 字典文件路径 是否使用benchmark参数 # ./ocr_db_crnn 预测模式 检测模型文件 方向分类器模型文件 识别模型文件 运行硬件 运行精度 线程数 batchsize 测试图像路径 参数配置路径 字典文件路径 是否使用benchmark参数
./ocr_db_crnn system ch_PP-OCRv2_det_slim_opt.nb ch_PP-OCRv2_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True ./ocr_db_crnn system ch_PP-OCRv3_det_slim_opt.nb ch_PP-OCRv3_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True
# 仅使用文本检测模型,使用方式如下: # 仅使用文本检测模型,使用方式如下:
./ocr_db_crnn det ch_PP-OCRv2_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ./ocr_db_crnn det ch_PP-OCRv3_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt
# 仅使用文本识别模型,使用方式如下: # 仅使用文本识别模型,使用方式如下:
./ocr_db_crnn rec ch_PP-OCRv2_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt ./ocr_db_crnn rec ch_PP-OCRv3_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
``` ```
如果对代码做了修改,则需要重新编译并push到手机上。 如果对代码做了修改,则需要重新编译并push到手机上。
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
### 1. 安装PaddleSlim ### 1. 安装PaddleSlim
```bash ```bash
pip3 install paddleslim==2.2.2 pip3 install paddleslim==2.3.2
``` ```
### 2. 准备训练好的模型 ### 2. 准备训练好的模型
...@@ -33,17 +33,7 @@ PaddleOCR提供了一系列训练好的[模型](../../../doc/doc_ch/models_list. ...@@ -33,17 +33,7 @@ PaddleOCR提供了一系列训练好的[模型](../../../doc/doc_ch/models_list.
量化训练包括离线量化训练和在线量化训练,在线量化训练效果更好,需加载预训练模型,在定义好量化策略后即可对模型进行量化。 量化训练包括离线量化训练和在线量化训练,在线量化训练效果更好,需加载预训练模型,在定义好量化策略后即可对模型进行量化。
量化训练的代码位于slim/quantization/quant.py 中,比如训练检测模型,训练指令如下: 量化训练的代码位于slim/quantization/quant.py 中,比如训练检测模型,以PPOCRv3检测模型为例,训练指令如下:
```bash
python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model='your trained model' Global.save_model_dir=./output/quant_model
# 比如下载提供的训练模型
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
tar -xf ch_ppocr_mobile_v2.0_det_train.tar
python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.save_model_dir=./output/quant_model
```
模型蒸馏和模型量化可以同时使用,以PPOCRv3检测模型为例:
``` ```
# 下载检测预训练模型: # 下载检测预训练模型:
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
...@@ -58,7 +48,7 @@ python deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_ ...@@ -58,7 +48,7 @@ python deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_
在得到量化训练保存的模型后,我们可以将其导出为inference_model,用于预测部署: 在得到量化训练保存的模型后,我们可以将其导出为inference_model,用于预测部署:
```bash ```bash
python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
``` ```
### 5. 量化模型部署 ### 5. 量化模型部署
......
...@@ -25,7 +25,7 @@ After training, if you want to further compress the model size and accelerate th ...@@ -25,7 +25,7 @@ After training, if you want to further compress the model size and accelerate th
### 1. Install PaddleSlim ### 1. Install PaddleSlim
```bash ```bash
pip3 install paddleslim==2.2.2 pip3 install paddleslim==2.3.2
``` ```
...@@ -39,18 +39,7 @@ Quantization training includes offline quantization training and online quantiza ...@@ -39,18 +39,7 @@ Quantization training includes offline quantization training and online quantiza
Online quantization training is more effective. It is necessary to load the pre-trained model. Online quantization training is more effective. It is necessary to load the pre-trained model.
After the quantization strategy is defined, the model can be quantified. After the quantization strategy is defined, the model can be quantified.
The code for quantization training is located in `slim/quantization/quant.py`. For example, to train a detection model, the training instructions are as follows: The code for quantization training is located in `slim/quantization/quant.py`. For example, the training instructions of slim PPOCRv3 detection model are as follows:
```bash
python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model='your trained model' Global.save_model_dir=./output/quant_model
# download provided model
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
tar -xf ch_ppocr_mobile_v2.0_det_train.tar
python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.save_model_dir=./output/quant_model
```
Model distillation and model quantization can be used at the same time, taking the PPOCRv3 detection model as an example:
``` ```
# download provided model # download provided model
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
...@@ -66,7 +55,7 @@ If you want to quantify the text recognition model, you can modify the configura ...@@ -66,7 +55,7 @@ If you want to quantify the text recognition model, you can modify the configura
Once we got the model after pruning and fine-tuning, we can export it as an inference model for the deployment of predictive tasks: Once we got the model after pruning and fine-tuning, we can export it as an inference model for the deployment of predictive tasks:
```bash ```bash
python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
``` ```
### 5. Deploy ### 5. Deploy
......
...@@ -151,17 +151,24 @@ def main(): ...@@ -151,17 +151,24 @@ def main():
arch_config = config["Architecture"] arch_config = config["Architecture"]
arch_config = config["Architecture"] if arch_config["algorithm"] == "SVTR" and arch_config["Head"][
"name"] != 'MultiHead':
input_shape = config["Eval"]["dataset"]["transforms"][-2][
'SVTRRecResizeImg']['image_shape']
else:
input_shape = None
if arch_config["algorithm"] in ["Distillation", ]: # distillation model if arch_config["algorithm"] in ["Distillation", ]: # distillation model
archs = list(arch_config["Models"].values()) archs = list(arch_config["Models"].values())
for idx, name in enumerate(model.model_name_list): for idx, name in enumerate(model.model_name_list):
sub_model_save_path = os.path.join(save_path, name, "inference") sub_model_save_path = os.path.join(save_path, name, "inference")
export_single_model(model.model_list[idx], archs[idx], export_single_model(model.model_list[idx], archs[idx],
sub_model_save_path, logger, quanter) sub_model_save_path, logger, input_shape,
quanter)
else: else:
save_path = os.path.join(save_path, "inference") save_path = os.path.join(save_path, "inference")
export_single_model(model, arch_config, save_path, logger, quanter) export_single_model(model, arch_config, save_path, logger, input_shape,
quanter)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -158,8 +158,7 @@ def main(config, device, logger, vdl_writer): ...@@ -158,8 +158,7 @@ def main(config, device, logger, vdl_writer):
pre_best_model_dict = dict() pre_best_model_dict = dict()
# load fp32 model to begin quantization # load fp32 model to begin quantization
if config["Global"]["pretrained_model"] is not None: pre_best_model_dict = load_model(config, model, None, config['Architecture']["model_type"])
pre_best_model_dict = load_model(config, model)
freeze_params = False freeze_params = False
if config['Architecture']["algorithm"] in ["Distillation"]: if config['Architecture']["algorithm"] in ["Distillation"]:
...@@ -184,8 +183,7 @@ def main(config, device, logger, vdl_writer): ...@@ -184,8 +183,7 @@ def main(config, device, logger, vdl_writer):
model=model) model=model)
# resume PACT training process # resume PACT training process
if config["Global"]["checkpoints"] is not None: pre_best_model_dict = load_model(config, model, optimizer, config['Architecture']["model_type"])
pre_best_model_dict = load_model(config, model, optimizer)
# build metric # build metric
eval_class = build_metric(config['Metric']) eval_class = build_metric(config['Metric'])
......
...@@ -97,6 +97,17 @@ def sample_generator(loader): ...@@ -97,6 +97,17 @@ def sample_generator(loader):
return __reader__ return __reader__
def sample_generator_layoutxlm_ser(loader):
def __reader__():
for indx, data in enumerate(loader):
input_ids = np.array(data[0])
bbox = np.array(data[1])
attention_mask = np.array(data[2])
token_type_ids = np.array(data[3])
images = np.array(data[4])
yield [input_ids, bbox, attention_mask, token_type_ids, images]
return __reader__
def main(config, device, logger, vdl_writer): def main(config, device, logger, vdl_writer):
# init dist environment # init dist environment
...@@ -107,16 +118,18 @@ def main(config, device, logger, vdl_writer): ...@@ -107,16 +118,18 @@ def main(config, device, logger, vdl_writer):
# build dataloader # build dataloader
config['Train']['loader']['num_workers'] = 0 config['Train']['loader']['num_workers'] = 0
is_layoutxlm_ser = config['Architecture']['model_type'] =='kie' and config['Architecture']['Backbone']['name'] == 'LayoutXLMForSer'
train_dataloader = build_dataloader(config, 'Train', device, logger) train_dataloader = build_dataloader(config, 'Train', device, logger)
if config['Eval']: if config['Eval']:
config['Eval']['loader']['num_workers'] = 0 config['Eval']['loader']['num_workers'] = 0
valid_dataloader = build_dataloader(config, 'Eval', device, logger) valid_dataloader = build_dataloader(config, 'Eval', device, logger)
if is_layoutxlm_ser:
train_dataloader = valid_dataloader
else: else:
valid_dataloader = None valid_dataloader = None
paddle.enable_static() paddle.enable_static()
place = paddle.CPUPlace() exe = paddle.static.Executor(device)
exe = paddle.static.Executor(place)
if 'inference_model' in global_config.keys(): # , 'inference_model'): if 'inference_model' in global_config.keys(): # , 'inference_model'):
inference_model_dir = global_config['inference_model'] inference_model_dir = global_config['inference_model']
...@@ -128,13 +141,18 @@ def main(config, device, logger, vdl_writer): ...@@ -128,13 +141,18 @@ def main(config, device, logger, vdl_writer):
"Please set inference model dir in Global.inference_model or Global.pretrained_model for post-quantazition" "Please set inference model dir in Global.inference_model or Global.pretrained_model for post-quantazition"
) )
if is_layoutxlm_ser:
generator = sample_generator_layoutxlm_ser(train_dataloader)
else:
generator = sample_generator(train_dataloader)
paddleslim.quant.quant_post_static( paddleslim.quant.quant_post_static(
executor=exe, executor=exe,
model_dir=inference_model_dir, model_dir=inference_model_dir,
model_filename='inference.pdmodel', model_filename='inference.pdmodel',
params_filename='inference.pdiparams', params_filename='inference.pdiparams',
quantize_model_path=global_config['save_inference_dir'], quantize_model_path=global_config['save_inference_dir'],
sample_generator=sample_generator(train_dataloader), sample_generator=generator,
save_model_filename='inference.pdmodel', save_model_filename='inference.pdmodel',
save_params_filename='inference.pdiparams', save_params_filename='inference.pdiparams',
batch_size=1, batch_size=1,
......
# CT
- [1. 算法简介](#1)
- [2. 环境配置](#2)
- [3. 模型训练、评估、预测](#3)
- [3.1 训练](#3-1)
- [3.2 评估](#3-2)
- [3.3 预测](#3-3)
- [4. 推理部署](#4)
- [4.1 Python推理](#4-1)
- [4.2 C++推理](#4-2)
- [4.3 Serving服务化部署](#4-3)
- [4.4 更多推理部署](#4-4)
- [5. FAQ](#5)
<a name="1"></a>
## 1. 算法简介
论文信息:
> [CentripetalText: An Efficient Text Instance Representation for Scene Text Detection](https://arxiv.org/abs/2107.05945)
> Tao Sheng, Jie Chen, Zhouhui Lian
> NeurIPS, 2021
在Total-Text文本检测公开数据集上,算法复现效果如下:
|模型|骨干网络|配置文件|precision|recall|Hmean|下载链接|
| --- | --- | --- | --- | --- | --- | --- |
|CT|ResNet18_vd|[configs/det/det_r18_vd_ct.yml](../../configs/det/det_r18_vd_ct.yml)|88.68%|81.70%|85.05%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r18_ct_train.tar)|
<a name="2"></a>
## 2. 环境配置
请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境,参考[《项目克隆》](./clone.md)克隆项目代码。
<a name="3"></a>
## 3. 模型训练、评估、预测
CT模型使用Total-Text文本检测公开数据集训练得到,数据集下载可参考 [Total-Text-Dataset](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset), 我们将标签文件转成了paddleocr格式,转换好的标签文件下载参考[train.txt](https://paddleocr.bj.bcebos.com/dataset/ct_tipc/train.txt), [text.txt](https://paddleocr.bj.bcebos.com/dataset/ct_tipc/test.txt)
请参考[文本检测训练教程](./detection.md)。PaddleOCR对代码进行了模块化,训练不同的检测模型只需要**更换配置文件**即可。
<a name="4"></a>
## 4. 推理部署
<a name="4-1"></a>
### 4.1 Python推理
首先将CT文本检测训练过程中保存的模型,转换成inference model。以基于Resnet18_vd骨干网络,在Total-Text英文数据集训练的模型为例( [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r18_ct_train.tar) ),可以使用如下命令进行转换:
```shell
python3 tools/export_model.py -c configs/det/det_r18_vd_ct.yml -o Global.pretrained_model=./det_r18_ct_train/best_accuracy Global.save_inference_dir=./inference/det_ct
```
CT文本检测模型推理,可以执行如下命令:
```shell
python3 tools/infer/predict_det.py --image_dir="./doc/imgs_en/img623.jpg" --det_model_dir="./inference/det_ct/" --det_algorithm="CT"
```
可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'det_res'。结果示例如下:
![](../imgs_results/det_res_img623_ct.jpg)
<a name="4-2"></a>
### 4.2 C++推理
暂不支持
<a name="4-3"></a>
### 4.3 Serving服务化部署
暂不支持
<a name="4-4"></a>
### 4.4 更多推理部署
暂不支持
<a name="5"></a>
## 5. FAQ
## 引用
```bibtex
@inproceedings{sheng2021centripetaltext,
title={CentripetalText: An Efficient Text Instance Representation for Scene Text Detection},
author={Tao Sheng and Jie Chen and Zhouhui Lian},
booktitle={Thirty-Fifth Conference on Neural Information Processing Systems},
year={2021}
}
```
...@@ -390,6 +390,7 @@ im_show.save('result.jpg') ...@@ -390,6 +390,7 @@ im_show.save('result.jpg')
| det_db_thresh | DB模型输出预测图的二值化阈值 | 0.3 | | det_db_thresh | DB模型输出预测图的二值化阈值 | 0.3 |
| det_db_box_thresh | DB模型输出框的阈值,低于此值的预测框会被丢弃 | 0.5 | | det_db_box_thresh | DB模型输出框的阈值,低于此值的预测框会被丢弃 | 0.5 |
| det_db_unclip_ratio | DB模型输出框扩大的比例 | 2 | | det_db_unclip_ratio | DB模型输出框扩大的比例 | 2 |
| det_db_score_mode | 计算检测框score的方式,有'fast'和'slow',如果要检测的文字有弯曲,建议用'slow','slow'模式计算的box的score偏大,box不容易被过滤掉 | 'fast' |
| det_east_score_thresh | EAST模型输出预测图的二值化阈值 | 0.8 | | det_east_score_thresh | EAST模型输出预测图的二值化阈值 | 0.8 |
| det_east_cover_thresh | EAST模型输出框的阈值,低于此值的预测框会被丢弃 | 0.1 | | det_east_cover_thresh | EAST模型输出框的阈值,低于此值的预测框会被丢弃 | 0.1 |
| det_east_nms_thresh | EAST模型输出框NMS的阈值 | 0.2 | | det_east_nms_thresh | EAST模型输出框NMS的阈值 | 0.2 |
......
# CT
- [1. Introduction](#1)
- [2. Environment](#2)
- [3. Model Training / Evaluation / Prediction](#3)
- [3.1 Training](#3-1)
- [3.2 Evaluation](#3-2)
- [3.3 Prediction](#3-3)
- [4. Inference and Deployment](#4)
- [4.1 Python Inference](#4-1)
- [4.2 C++ Inference](#4-2)
- [4.3 Serving](#4-3)
- [4.4 More](#4-4)
- [5. FAQ](#5)
<a name="1"></a>
## 1. Introduction
Paper:
> [CentripetalText: An Efficient Text Instance Representation for Scene Text Detection](https://arxiv.org/abs/2107.05945)
> Tao Sheng, Jie Chen, Zhouhui Lian
> NeurIPS, 2021
On the Total-Text dataset, the text detection result is as follows:
|Model|Backbone|Configuration|Precision|Recall|Hmean|Download|
| --- | --- | --- | --- | --- | --- | --- |
|CT|ResNet18_vd|[configs/det/det_r18_vd_ct.yml](../../configs/det/det_r18_vd_ct.yml)|88.68%|81.70%|85.05%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r18_ct_train.tar)|
<a name="2"></a>
## 2. Environment
Please prepare your environment referring to [prepare the environment](./environment_en.md) and [clone the repo](./clone_en.md).
<a name="3"></a>
## 3. Model Training / Evaluation / Prediction
The above CT model is trained using the Total-Text text detection public dataset. For the download of the dataset, please refer to [Total-Text-Dataset](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset). PaddleOCR format annotation download link [train.txt](https://paddleocr.bj.bcebos.com/dataset/ct_tipc/train.txt), [test.txt](https://paddleocr.bj.bcebos.com/dataset/ct_tipc/test.txt).
Please refer to [text detection training tutorial](./detection_en.md). PaddleOCR has modularized the code structure, so that you only need to **replace the configuration file** to train different detection models.
<a name="4"></a>
## 4. Inference and Deployment
<a name="4-1"></a>
### 4.1 Python Inference
First, convert the model saved in the CT text detection training process into an inference model. Taking the model based on the Resnet18_vd backbone network and trained on the Total Text English dataset as example ([model download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r18_ct_train.tar)), you can use the following command to convert:
```shell
python3 tools/export_model.py -c configs/det/det_r18_vd_ct.yml -o Global.pretrained_model=./det_r18_ct_train/best_accuracy Global.save_inference_dir=./inference/det_ct
```
CT text detection model inference, you can execute the following command:
```shell
python3 tools/infer/predict_det.py --image_dir="./doc/imgs_en/img623.jpg" --det_model_dir="./inference/det_ct/" --det_algorithm="CT"
```
The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows:
![](../imgs_results/det_res_img623_ct.jpg)
<a name="4-2"></a>
### 4.2 C++ Inference
Not supported
<a name="4-3"></a>
### 4.3 Serving
Not supported
<a name="4-4"></a>
### 4.4 More
Not supported
<a name="5"></a>
## 5. FAQ
## Citation
```bibtex
@inproceedings{sheng2021centripetaltext,
title={CentripetalText: An Efficient Text Instance Representation for Scene Text Detection},
author={Tao Sheng and Jie Chen and Zhouhui Lian},
booktitle={Thirty-Fifth Conference on Neural Information Processing Systems},
year={2021}
}
```
...@@ -342,6 +342,7 @@ im_show.save('result.jpg') ...@@ -342,6 +342,7 @@ im_show.save('result.jpg')
| det_db_thresh | Binarization threshold value of DB output map | 0.3 | | det_db_thresh | Binarization threshold value of DB output map | 0.3 |
| det_db_box_thresh | The threshold value of the DB output box. Boxes score lower than this value will be discarded | 0.5 | | det_db_box_thresh | The threshold value of the DB output box. Boxes score lower than this value will be discarded | 0.5 |
| det_db_unclip_ratio | The expanded ratio of DB output box | 2 | | det_db_unclip_ratio | The expanded ratio of DB output box | 2 |
| det_db_score_mode | The parameter that control how the score of the detection frame is calculated. There are 'fast' and 'slow' options. If the text to be detected is curved, it is recommended to use 'slow' | 'fast' |
| det_east_score_thresh | Binarization threshold value of EAST output map | 0.8 | | det_east_score_thresh | Binarization threshold value of EAST output map | 0.8 |
| det_east_cover_thresh | The threshold value of the EAST output box. Boxes score lower than this value will be discarded | 0.1 | | det_east_cover_thresh | The threshold value of the EAST output box. Boxes score lower than this value will be discarded | 0.1 |
| det_east_nms_thresh | The NMS threshold value of EAST model output box | 0.2 | | det_east_nms_thresh | The NMS threshold value of EAST model output box | 0.2 |
......
...@@ -414,6 +414,33 @@ def get_model_config(type, version, model_type, lang): ...@@ -414,6 +414,33 @@ def get_model_config(type, version, model_type, lang):
return model_urls[version][model_type][lang] return model_urls[version][model_type][lang]
def img_decode(content: bytes):
np_arr = np.frombuffer(content, dtype=np.uint8)
return cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
def check_img(img):
if isinstance(img, bytes):
img = img_decode(img)
if isinstance(img, str):
# download net image
if is_link(img):
download_with_progressbar(img, 'tmp.jpg')
img = 'tmp.jpg'
image_file = img
img, flag, _ = check_and_read(image_file)
if not flag:
with open(image_file, 'rb') as f:
img = img_decode(f.read())
if img is None:
logger.error("error in loading image:{}".format(image_file))
return None
if isinstance(img, np.ndarray) and len(img.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
return img
class PaddleOCR(predict_system.TextSystem): class PaddleOCR(predict_system.TextSystem):
def __init__(self, **kwargs): def __init__(self, **kwargs):
""" """
...@@ -482,7 +509,7 @@ class PaddleOCR(predict_system.TextSystem): ...@@ -482,7 +509,7 @@ class PaddleOCR(predict_system.TextSystem):
rec: use text recognition or not. If false, only det will be exec. Default is True rec: use text recognition or not. If false, only det will be exec. Default is True
cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False. cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
""" """
assert isinstance(img, (np.ndarray, list, str)) assert isinstance(img, (np.ndarray, list, str, bytes))
if isinstance(img, list) and det == True: if isinstance(img, list) and det == True:
logger.error('When input a list of images, det must be false') logger.error('When input a list of images, det must be false')
exit(0) exit(0)
...@@ -491,22 +518,8 @@ class PaddleOCR(predict_system.TextSystem): ...@@ -491,22 +518,8 @@ class PaddleOCR(predict_system.TextSystem):
'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process' 'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
) )
if isinstance(img, str): img = check_img(img)
# download net image
if img.startswith('http'):
download_with_progressbar(img, 'tmp.jpg')
img = 'tmp.jpg'
image_file = img
img, flag, _ = check_and_read(image_file)
if not flag:
with open(image_file, 'rb') as f:
np_arr = np.frombuffer(f.read(), dtype=np.uint8)
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
if img is None:
logger.error("error in loading image:{}".format(image_file))
return None
if isinstance(img, np.ndarray) and len(img.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
if det and rec: if det and rec:
dt_boxes, rec_res, _ = self.__call__(img, cls) dt_boxes, rec_res, _ = self.__call__(img, cls)
return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)] return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
...@@ -585,23 +598,7 @@ class PPStructure(StructureSystem): ...@@ -585,23 +598,7 @@ class PPStructure(StructureSystem):
super().__init__(params) super().__init__(params)
def __call__(self, img, return_ocr_result_in_table=False, img_idx=0): def __call__(self, img, return_ocr_result_in_table=False, img_idx=0):
if isinstance(img, str): img = check_img(img)
# download net image
if img.startswith('http'):
download_with_progressbar(img, 'tmp.jpg')
img = 'tmp.jpg'
image_file = img
img, flag, _ = check_and_read(image_file)
if not flag:
with open(image_file, 'rb') as f:
np_arr = np.frombuffer(f.read(), dtype=np.uint8)
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
if img is None:
logger.error("error in loading image:{}".format(image_file))
return None
if isinstance(img, np.ndarray) and len(img.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
res, _ = super().__call__( res, _ = super().__call__(
img, return_ocr_result_in_table, img_idx=img_idx) img, return_ocr_result_in_table, img_idx=img_idx)
return res return res
...@@ -644,7 +641,7 @@ def main(): ...@@ -644,7 +641,7 @@ def main():
if not flag_pdf: if not flag_pdf:
if img is None: if img is None:
logger.error("error in loading image:{}".format(image_file)) logger.error("error in loading image:{}".format(img_path))
continue continue
img_paths = [[img_path, img]] img_paths = [[img_path, img]]
else: else:
......
...@@ -43,6 +43,7 @@ from .vqa import * ...@@ -43,6 +43,7 @@ from .vqa import *
from .fce_aug import * from .fce_aug import *
from .fce_targets import FCENetTargets from .fce_targets import FCENetTargets
from .ct_process import *
def transform(data, ops=None): def transform(data, ops=None):
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import cv2
import random
import pyclipper
import paddle
import numpy as np
import Polygon as plg
import scipy.io as scio
from PIL import Image
import paddle.vision.transforms as transforms
class RandomScale():
def __init__(self, short_size=640, **kwargs):
self.short_size = short_size
def scale_aligned(self, img, scale):
oh, ow = img.shape[0:2]
h = int(oh * scale + 0.5)
w = int(ow * scale + 0.5)
if h % 32 != 0:
h = h + (32 - h % 32)
if w % 32 != 0:
w = w + (32 - w % 32)
img = cv2.resize(img, dsize=(w, h))
factor_h = h / oh
factor_w = w / ow
return img, factor_h, factor_w
def __call__(self, data):
img = data['image']
h, w = img.shape[0:2]
random_scale = np.array([0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3])
scale = (np.random.choice(random_scale) * self.short_size) / min(h, w)
img, factor_h, factor_w = self.scale_aligned(img, scale)
data['scale_factor'] = (factor_w, factor_h)
data['image'] = img
return data
class MakeShrink():
def __init__(self, kernel_scale=0.7, **kwargs):
self.kernel_scale = kernel_scale
def dist(self, a, b):
return np.linalg.norm((a - b), ord=2, axis=0)
def perimeter(self, bbox):
peri = 0.0
for i in range(bbox.shape[0]):
peri += self.dist(bbox[i], bbox[(i + 1) % bbox.shape[0]])
return peri
def shrink(self, bboxes, rate, max_shr=20):
rate = rate * rate
shrinked_bboxes = []
for bbox in bboxes:
area = plg.Polygon(bbox).area()
peri = self.perimeter(bbox)
try:
pco = pyclipper.PyclipperOffset()
pco.AddPath(bbox, pyclipper.JT_ROUND,
pyclipper.ET_CLOSEDPOLYGON)
offset = min(
int(area * (1 - rate) / (peri + 0.001) + 0.5), max_shr)
shrinked_bbox = pco.Execute(-offset)
if len(shrinked_bbox) == 0:
shrinked_bboxes.append(bbox)
continue
shrinked_bbox = np.array(shrinked_bbox[0])
if shrinked_bbox.shape[0] <= 2:
shrinked_bboxes.append(bbox)
continue
shrinked_bboxes.append(shrinked_bbox)
except Exception as e:
shrinked_bboxes.append(bbox)
return shrinked_bboxes
def __call__(self, data):
img = data['image']
bboxes = data['polys']
words = data['texts']
scale_factor = data['scale_factor']
gt_instance = np.zeros(img.shape[0:2], dtype='uint8') # h,w
training_mask = np.ones(img.shape[0:2], dtype='uint8')
training_mask_distance = np.ones(img.shape[0:2], dtype='uint8')
for i in range(len(bboxes)):
bboxes[i] = np.reshape(bboxes[i] * (
[scale_factor[0], scale_factor[1]] * (bboxes[i].shape[0] // 2)),
(bboxes[i].shape[0] // 2, 2)).astype('int32')
for i in range(len(bboxes)):
#different value for different bbox
cv2.drawContours(gt_instance, [bboxes[i]], -1, i + 1, -1)
# set training mask to 0
cv2.drawContours(training_mask, [bboxes[i]], -1, 0, -1)
# for not accurate annotation, use training_mask_distance
if words[i] == '###' or words[i] == '???':
cv2.drawContours(training_mask_distance, [bboxes[i]], -1, 0, -1)
# make shrink
gt_kernel_instance = np.zeros(img.shape[0:2], dtype='uint8')
kernel_bboxes = self.shrink(bboxes, self.kernel_scale)
for i in range(len(bboxes)):
cv2.drawContours(gt_kernel_instance, [kernel_bboxes[i]], -1, i + 1,
-1)
# for training mask, kernel and background= 1, box region=0
if words[i] != '###' and words[i] != '???':
cv2.drawContours(training_mask, [kernel_bboxes[i]], -1, 1, -1)
gt_kernel = gt_kernel_instance.copy()
# for gt_kernel, kernel = 1
gt_kernel[gt_kernel > 0] = 1
# shrink 2 times
tmp1 = gt_kernel_instance.copy()
erode_kernel = np.ones((3, 3), np.uint8)
tmp1 = cv2.erode(tmp1, erode_kernel, iterations=1)
tmp2 = tmp1.copy()
tmp2 = cv2.erode(tmp2, erode_kernel, iterations=1)
# compute text region
gt_kernel_inner = tmp1 - tmp2
# gt_instance: text instance, bg=0, diff word use diff value
# training_mask: text instance mask, word=0,kernel and bg=1
# gt_kernel_instance: text kernel instance, bg=0, diff word use diff value
# gt_kernel: text_kernel, bg=0,diff word use same value
# gt_kernel_inner: text kernel reference
# training_mask_distance: word without anno = 0, else 1
data['image'] = [
img, gt_instance, training_mask, gt_kernel_instance, gt_kernel,
gt_kernel_inner, training_mask_distance
]
return data
class GroupRandomHorizontalFlip():
def __init__(self, p=0.5, **kwargs):
self.p = p
def __call__(self, data):
imgs = data['image']
if random.random() < self.p:
for i in range(len(imgs)):
imgs[i] = np.flip(imgs[i], axis=1).copy()
data['image'] = imgs
return data
class GroupRandomRotate():
def __init__(self, **kwargs):
pass
def __call__(self, data):
imgs = data['image']
max_angle = 10
angle = random.random() * 2 * max_angle - max_angle
for i in range(len(imgs)):
img = imgs[i]
w, h = img.shape[:2]
rotation_matrix = cv2.getRotationMatrix2D((h / 2, w / 2), angle, 1)
img_rotation = cv2.warpAffine(
img, rotation_matrix, (h, w), flags=cv2.INTER_NEAREST)
imgs[i] = img_rotation
data['image'] = imgs
return data
class GroupRandomCropPadding():
def __init__(self, target_size=(640, 640), **kwargs):
self.target_size = target_size
def __call__(self, data):
imgs = data['image']
h, w = imgs[0].shape[0:2]
t_w, t_h = self.target_size
p_w, p_h = self.target_size
if w == t_w and h == t_h:
return data
t_h = t_h if t_h < h else h
t_w = t_w if t_w < w else w
if random.random() > 3.0 / 8.0 and np.max(imgs[1]) > 0:
# make sure to crop the text region
tl = np.min(np.where(imgs[1] > 0), axis=1) - (t_h, t_w)
tl[tl < 0] = 0
br = np.max(np.where(imgs[1] > 0), axis=1) - (t_h, t_w)
br[br < 0] = 0
br[0] = min(br[0], h - t_h)
br[1] = min(br[1], w - t_w)
i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0
j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0
else:
i = random.randint(0, h - t_h) if h - t_h > 0 else 0
j = random.randint(0, w - t_w) if w - t_w > 0 else 0
n_imgs = []
for idx in range(len(imgs)):
if len(imgs[idx].shape) == 3:
s3_length = int(imgs[idx].shape[-1])
img = imgs[idx][i:i + t_h, j:j + t_w, :]
img_p = cv2.copyMakeBorder(
img,
0,
p_h - t_h,
0,
p_w - t_w,
borderType=cv2.BORDER_CONSTANT,
value=tuple(0 for i in range(s3_length)))
else:
img = imgs[idx][i:i + t_h, j:j + t_w]
img_p = cv2.copyMakeBorder(
img,
0,
p_h - t_h,
0,
p_w - t_w,
borderType=cv2.BORDER_CONSTANT,
value=(0, ))
n_imgs.append(img_p)
data['image'] = n_imgs
return data
class MakeCentripetalShift():
def __init__(self, **kwargs):
pass
def jaccard(self, As, Bs):
A = As.shape[0] # small
B = Bs.shape[0] # large
dis = np.sqrt(
np.sum((As[:, np.newaxis, :].repeat(
B, axis=1) - Bs[np.newaxis, :, :].repeat(
A, axis=0))**2,
axis=-1))
ind = np.argmin(dis, axis=-1)
return ind
def __call__(self, data):
imgs = data['image']
img, gt_instance, training_mask, gt_kernel_instance, gt_kernel, gt_kernel_inner, training_mask_distance = \
imgs[0], imgs[1], imgs[2], imgs[3], imgs[4], imgs[5], imgs[6]
max_instance = np.max(gt_instance) # num bbox
# make centripetal shift
gt_distance = np.zeros((2, *img.shape[0:2]), dtype=np.float32)
for i in range(1, max_instance + 1):
# kernel_reference
ind = (gt_kernel_inner == i)
if np.sum(ind) == 0:
training_mask[gt_instance == i] = 0
training_mask_distance[gt_instance == i] = 0
continue
kpoints = np.array(np.where(ind)).transpose(
(1, 0))[:, ::-1].astype('float32')
ind = (gt_instance == i) * (gt_kernel_instance == 0)
if np.sum(ind) == 0:
continue
pixels = np.where(ind)
points = np.array(pixels).transpose(
(1, 0))[:, ::-1].astype('float32')
bbox_ind = self.jaccard(points, kpoints)
offset_gt = kpoints[bbox_ind] - points
gt_distance[:, pixels[0], pixels[1]] = offset_gt.T * 0.1
img = Image.fromarray(img)
img = img.convert('RGB')
data["image"] = img
data["gt_kernel"] = gt_kernel.astype("int64")
data["training_mask"] = training_mask.astype("int64")
data["gt_instance"] = gt_instance.astype("int64")
data["gt_kernel_instance"] = gt_kernel_instance.astype("int64")
data["training_mask_distance"] = training_mask_distance.astype("int64")
data["gt_distance"] = gt_distance.astype("float32")
return data
class ScaleAlignedShort():
def __init__(self, short_size=640, **kwargs):
self.short_size = short_size
def __call__(self, data):
img = data['image']
org_img_shape = img.shape
h, w = img.shape[0:2]
scale = self.short_size * 1.0 / min(h, w)
h = int(h * scale + 0.5)
w = int(w * scale + 0.5)
if h % 32 != 0:
h = h + (32 - h % 32)
if w % 32 != 0:
w = w + (32 - w % 32)
img = cv2.resize(img, dsize=(w, h))
new_img_shape = img.shape
img_shape = np.array(org_img_shape + new_img_shape)
data['shape'] = img_shape
data['image'] = img
return data
\ No newline at end of file
...@@ -1395,3 +1395,29 @@ class VLLabelEncode(BaseRecLabelEncode): ...@@ -1395,3 +1395,29 @@ class VLLabelEncode(BaseRecLabelEncode):
data['label_res'] = np.array(label_res) data['label_res'] = np.array(label_res)
data['label_sub'] = np.array(label_sub) data['label_sub'] = np.array(label_sub)
return data return data
class CTLabelEncode(object):
def __init__(self, **kwargs):
pass
def __call__(self, data):
label = data['label']
label = json.loads(label)
nBox = len(label)
boxes, txts = [], []
for bno in range(0, nBox):
box = label[bno]['points']
box = np.array(box)
boxes.append(box)
txt = label[bno]['transcription']
txts.append(txt)
if len(boxes) == 0:
return None
data['polys'] = boxes
data['texts'] = txts
return data
\ No newline at end of file
...@@ -225,6 +225,8 @@ class DetResizeForTest(object): ...@@ -225,6 +225,8 @@ class DetResizeForTest(object):
def __call__(self, data): def __call__(self, data):
img = data['image'] img = data['image']
src_h, src_w, _ = img.shape src_h, src_w, _ = img.shape
if sum([src_h, src_w]) < 64:
img = self.image_padding(img)
if self.resize_type == 0: if self.resize_type == 0:
# img, shape = self.resize_image_type0(img) # img, shape = self.resize_image_type0(img)
...@@ -238,6 +240,12 @@ class DetResizeForTest(object): ...@@ -238,6 +240,12 @@ class DetResizeForTest(object):
data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
return data return data
def image_padding(self, im, value=0):
h, w, c = im.shape
im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
im_pad[:h, :w, :] = im
return im_pad
def resize_image_type1(self, img): def resize_image_type1(self, img):
resize_h, resize_w = self.image_shape resize_h, resize_w = self.image_shape
ori_h, ori_w = img.shape[:2] # (h, w, c) ori_h, ori_w = img.shape[:2] # (h, w, c)
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
import math import math
import cv2 import cv2
import numpy as np import numpy as np
from skimage.morphology._skeletonize import thin
from ppocr.utils.e2e_utils.extract_textpoint_fast import sort_and_expand_with_direction_v2
__all__ = ['PGProcessTrain'] __all__ = ['PGProcessTrain']
...@@ -26,17 +28,24 @@ class PGProcessTrain(object): ...@@ -26,17 +28,24 @@ class PGProcessTrain(object):
max_text_nums, max_text_nums,
tcl_len, tcl_len,
batch_size=14, batch_size=14,
use_resize=True,
use_random_crop=False,
min_crop_size=24, min_crop_size=24,
min_text_size=4, min_text_size=4,
max_text_size=512, max_text_size=512,
point_gather_mode=None,
**kwargs): **kwargs):
self.tcl_len = tcl_len self.tcl_len = tcl_len
self.max_text_length = max_text_length self.max_text_length = max_text_length
self.max_text_nums = max_text_nums self.max_text_nums = max_text_nums
self.batch_size = batch_size self.batch_size = batch_size
if use_random_crop is True:
self.min_crop_size = min_crop_size self.min_crop_size = min_crop_size
self.use_random_crop = use_random_crop
self.min_text_size = min_text_size self.min_text_size = min_text_size
self.max_text_size = max_text_size self.max_text_size = max_text_size
self.use_resize = use_resize
self.point_gather_mode = point_gather_mode
self.Lexicon_Table = self.get_dict(character_dict_path) self.Lexicon_Table = self.get_dict(character_dict_path)
self.pad_num = len(self.Lexicon_Table) self.pad_num = len(self.Lexicon_Table)
self.img_id = 0 self.img_id = 0
...@@ -282,6 +291,95 @@ class PGProcessTrain(object): ...@@ -282,6 +291,95 @@ class PGProcessTrain(object):
pos_m[:keep] = 1.0 pos_m[:keep] = 1.0
return pos_l, pos_m return pos_l, pos_m
def fit_and_gather_tcl_points_v3(self,
min_area_quad,
poly,
max_h,
max_w,
fixed_point_num=64,
img_id=0,
reference_height=3):
"""
Find the center point of poly as key_points, then fit and gather.
"""
det_mask = np.zeros((int(max_h / self.ds_ratio),
int(max_w / self.ds_ratio))).astype(np.float32)
# score_big_map
cv2.fillPoly(det_mask,
np.round(poly / self.ds_ratio).astype(np.int32), 1.0)
det_mask = cv2.resize(
det_mask, dsize=None, fx=self.ds_ratio, fy=self.ds_ratio)
det_mask = np.array(det_mask > 1e-3, dtype='float32')
f_direction = self.f_direction
skeleton_map = thin(det_mask.astype(np.uint8))
instance_count, instance_label_map = cv2.connectedComponents(
skeleton_map.astype(np.uint8), connectivity=8)
ys, xs = np.where(instance_label_map == 1)
pos_list = list(zip(ys, xs))
if len(pos_list) < 3:
return None
pos_list_sorted = sort_and_expand_with_direction_v2(
pos_list, f_direction, det_mask)
pos_list_sorted = np.array(pos_list_sorted)
length = len(pos_list_sorted) - 1
insert_num = 0
for index in range(length):
stride_y = np.abs(pos_list_sorted[index + insert_num][0] -
pos_list_sorted[index + 1 + insert_num][0])
stride_x = np.abs(pos_list_sorted[index + insert_num][1] -
pos_list_sorted[index + 1 + insert_num][1])
max_points = int(max(stride_x, stride_y))
stride = (pos_list_sorted[index + insert_num] -
pos_list_sorted[index + 1 + insert_num]) / (max_points)
insert_num_temp = max_points - 1
for i in range(int(insert_num_temp)):
insert_value = pos_list_sorted[index + insert_num] - (i + 1
) * stride
insert_index = index + i + 1 + insert_num
pos_list_sorted = np.insert(
pos_list_sorted, insert_index, insert_value, axis=0)
insert_num += insert_num_temp
pos_info = np.array(pos_list_sorted).reshape(-1, 2).astype(
np.float32) # xy-> yx
point_num = len(pos_info)
if point_num > fixed_point_num:
keep_ids = [
int((point_num * 1.0 / fixed_point_num) * x)
for x in range(fixed_point_num)
]
pos_info = pos_info[keep_ids, :]
keep = int(min(len(pos_info), fixed_point_num))
reference_width = (np.abs(poly[0, 0, 0] - poly[-1, 1, 0]) +
np.abs(poly[0, 3, 0] - poly[-1, 2, 0])) // 2
if np.random.rand() < 1:
dh = (np.random.rand(keep) - 0.5) * reference_height
offset = np.random.rand() - 0.5
dw = np.array([[0, offset * reference_width * 0.2]])
random_float_h = np.array([1, 0]).reshape([1, 2]) * dh.reshape(
[keep, 1])
random_float_w = dw.repeat(keep, axis=0)
pos_info += random_float_h
pos_info += random_float_w
pos_info[:, 0] = np.clip(pos_info[:, 0], 0, max_h - 1)
pos_info[:, 1] = np.clip(pos_info[:, 1], 0, max_w - 1)
# padding to fixed length
pos_l = np.zeros((self.tcl_len, 3), dtype=np.int32)
pos_l[:, 0] = np.ones((self.tcl_len, )) * img_id
pos_m = np.zeros((self.tcl_len, 1), dtype=np.float32)
pos_l[:keep, 1:] = np.round(pos_info).astype(np.int32)
pos_m[:keep] = 1.0
return pos_l, pos_m
def generate_direction_map(self, poly_quads, n_char, direction_map): def generate_direction_map(self, poly_quads, n_char, direction_map):
""" """
""" """
...@@ -334,6 +432,7 @@ class PGProcessTrain(object): ...@@ -334,6 +432,7 @@ class PGProcessTrain(object):
""" """
Generate polygon. Generate polygon.
""" """
self.ds_ratio = ds_ratio
score_map_big = np.zeros( score_map_big = np.zeros(
( (
h, h,
...@@ -384,7 +483,6 @@ class PGProcessTrain(object): ...@@ -384,7 +483,6 @@ class PGProcessTrain(object):
text_label = text_strs[poly_idx] text_label = text_strs[poly_idx]
text_label = self.prepare_text_label(text_label, text_label = self.prepare_text_label(text_label,
self.Lexicon_Table) self.Lexicon_Table)
text_label_index_list = [[self.Lexicon_Table.index(c_)] text_label_index_list = [[self.Lexicon_Table.index(c_)]
for c_ in text_label for c_ in text_label
if c_ in self.Lexicon_Table] if c_ in self.Lexicon_Table]
...@@ -432,6 +530,22 @@ class PGProcessTrain(object): ...@@ -432,6 +530,22 @@ class PGProcessTrain(object):
# pos info # pos info
average_shrink_height = self.calculate_average_height( average_shrink_height = self.calculate_average_height(
stcl_quads) stcl_quads)
if self.point_gather_mode == 'align':
self.f_direction = direction_map[:, :, :-1].copy()
pos_res = self.fit_and_gather_tcl_points_v3(
min_area_quad,
stcl_quads,
max_h=h,
max_w=w,
fixed_point_num=64,
img_id=self.img_id,
reference_height=average_shrink_height)
if pos_res is None:
continue
pos_l, pos_m = pos_res[0], pos_res[1]
else:
pos_l, pos_m = self.fit_and_gather_tcl_points_v2( pos_l, pos_m = self.fit_and_gather_tcl_points_v2(
min_area_quad, min_area_quad,
poly, poly,
...@@ -770,6 +884,20 @@ class PGProcessTrain(object): ...@@ -770,6 +884,20 @@ class PGProcessTrain(object):
text_polys[:, :, 0] *= asp_wx text_polys[:, :, 0] *= asp_wx
text_polys[:, :, 1] *= asp_hy text_polys[:, :, 1] *= asp_hy
if self.use_resize is True:
ori_h, ori_w, _ = im.shape
if max(ori_h, ori_w) < 200:
ratio = 200 / max(ori_h, ori_w)
im = cv2.resize(im, (int(ori_w * ratio), int(ori_h * ratio)))
text_polys[:, :, 0] *= ratio
text_polys[:, :, 1] *= ratio
if max(ori_h, ori_w) > 512:
ratio = 512 / max(ori_h, ori_w)
im = cv2.resize(im, (int(ori_w * ratio), int(ori_h * ratio)))
text_polys[:, :, 0] *= ratio
text_polys[:, :, 1] *= ratio
elif self.use_random_crop is True:
h, w, _ = im.shape h, w, _ = im.shape
if max(h, w) > 2048: if max(h, w) > 2048:
rd_scale = 2048.0 / max(h, w) rd_scale = 2048.0 / max(h, w)
...@@ -790,7 +918,7 @@ class PGProcessTrain(object): ...@@ -790,7 +918,7 @@ class PGProcessTrain(object):
if text_polys.shape[0] == 0: if text_polys.shape[0] == 0:
return None return None
# # continue for all ignore case # continue for all ignore case
if np.sum((text_tags * 1.0)) >= text_tags.size: if np.sum((text_tags * 1.0)) >= text_tags.size:
return None return None
new_h, new_w, _ = im.shape new_h, new_w, _ = im.shape
......
...@@ -502,7 +502,7 @@ def resize_norm_img_chinese(img, image_shape): ...@@ -502,7 +502,7 @@ def resize_norm_img_chinese(img, image_shape):
max_wh_ratio = imgW * 1.0 / imgH max_wh_ratio = imgW * 1.0 / imgH
h, w = img.shape[0], img.shape[1] h, w = img.shape[0], img.shape[1]
ratio = w * 1.0 / h ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, ratio) max_wh_ratio = min(max(max_wh_ratio, ratio), max_wh_ratio)
imgW = int(imgH * max_wh_ratio) imgW = int(imgH * max_wh_ratio)
if math.ceil(imgH * ratio) > imgW: if math.ceil(imgH * ratio) > imgW:
resized_w = imgW resized_w = imgW
......
...@@ -25,6 +25,7 @@ from .det_east_loss import EASTLoss ...@@ -25,6 +25,7 @@ from .det_east_loss import EASTLoss
from .det_sast_loss import SASTLoss from .det_sast_loss import SASTLoss
from .det_pse_loss import PSELoss from .det_pse_loss import PSELoss
from .det_fce_loss import FCELoss from .det_fce_loss import FCELoss
from .det_ct_loss import CTLoss
# rec loss # rec loss
from .rec_ctc_loss import CTCLoss from .rec_ctc_loss import CTCLoss
...@@ -68,7 +69,7 @@ def build_loss(config): ...@@ -68,7 +69,7 @@ def build_loss(config):
'CELoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss', 'CELoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss', 'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss',
'TableMasterLoss', 'SPINAttentionLoss', 'VLLoss', 'StrokeFocusLoss', 'TableMasterLoss', 'SPINAttentionLoss', 'VLLoss', 'StrokeFocusLoss',
'SLALoss' 'SLALoss', 'CTLoss'
] ]
config = copy.deepcopy(config) config = copy.deepcopy(config)
module_name = config.pop('name') module_name = config.pop('name')
......
...@@ -60,19 +60,19 @@ class KLJSLoss(object): ...@@ -60,19 +60,19 @@ class KLJSLoss(object):
], "mode can only be one of ['kl', 'KL', 'js', 'JS']" ], "mode can only be one of ['kl', 'KL', 'js', 'JS']"
self.mode = mode self.mode = mode
def __call__(self, p1, p2, reduction="mean"): def __call__(self, p1, p2, reduction="mean", eps=1e-5):
if self.mode.lower() == 'kl': if self.mode.lower() == 'kl':
loss = paddle.multiply(p2, loss = paddle.multiply(p2,
paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5)) paddle.log((p2 + eps) / (p1 + eps) + eps))
loss += paddle.multiply( loss += paddle.multiply(p1,
p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5)) paddle.log((p1 + eps) / (p2 + eps) + eps))
loss *= 0.5 loss *= 0.5
elif self.mode.lower() == "js": elif self.mode.lower() == "js":
loss = paddle.multiply( loss = paddle.multiply(
p2, paddle.log((2 * p2 + 1e-5) / (p1 + p2 + 1e-5) + 1e-5)) p2, paddle.log((2 * p2 + eps) / (p1 + p2 + eps) + eps))
loss += paddle.multiply( loss += paddle.multiply(
p1, paddle.log((2 * p1 + 1e-5) / (p1 + p2 + 1e-5) + 1e-5)) p1, paddle.log((2 * p1 + eps) / (p1 + p2 + eps) + eps))
loss *= 0.5 loss *= 0.5
else: else:
raise ValueError( raise ValueError(
......
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/shengtao96/CentripetalText/tree/main/models/loss
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
import numpy as np
def ohem_single(score, gt_text, training_mask):
# online hard example mining
pos_num = int(paddle.sum(gt_text > 0.5)) - int(
paddle.sum((gt_text > 0.5) & (training_mask <= 0.5)))
if pos_num == 0:
# selected_mask = gt_text.copy() * 0 # may be not good
selected_mask = training_mask
selected_mask = paddle.cast(
selected_mask.reshape(
(1, selected_mask.shape[0], selected_mask.shape[1])), "float32")
return selected_mask
neg_num = int(paddle.sum((gt_text <= 0.5) & (training_mask > 0.5)))
neg_num = int(min(pos_num * 3, neg_num))
if neg_num == 0:
selected_mask = training_mask
selected_mask = paddle.cast(
selected_mask.reshape(
(1, selected_mask.shape[0], selected_mask.shape[1])), "float32")
return selected_mask
# hard example
neg_score = score[(gt_text <= 0.5) & (training_mask > 0.5)]
neg_score_sorted = paddle.sort(-neg_score)
threshold = -neg_score_sorted[neg_num - 1]
selected_mask = ((score >= threshold) |
(gt_text > 0.5)) & (training_mask > 0.5)
selected_mask = paddle.cast(
selected_mask.reshape(
(1, selected_mask.shape[0], selected_mask.shape[1])), "float32")
return selected_mask
def ohem_batch(scores, gt_texts, training_masks):
selected_masks = []
for i in range(scores.shape[0]):
selected_masks.append(
ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[
i, :, :]))
selected_masks = paddle.cast(paddle.concat(selected_masks, 0), "float32")
return selected_masks
def iou_single(a, b, mask, n_class):
EPS = 1e-6
valid = mask == 1
a = a[valid]
b = b[valid]
miou = []
# iou of each class
for i in range(n_class):
inter = paddle.cast(((a == i) & (b == i)), "float32")
union = paddle.cast(((a == i) | (b == i)), "float32")
miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS))
miou = sum(miou) / len(miou)
return miou
def iou(a, b, mask, n_class=2, reduce=True):
batch_size = a.shape[0]
a = a.reshape((batch_size, -1))
b = b.reshape((batch_size, -1))
mask = mask.reshape((batch_size, -1))
iou = paddle.zeros((batch_size, ), dtype="float32")
for i in range(batch_size):
iou[i] = iou_single(a[i], b[i], mask[i], n_class)
if reduce:
iou = paddle.mean(iou)
return iou
class DiceLoss(nn.Layer):
def __init__(self, loss_weight=1.0):
super(DiceLoss, self).__init__()
self.loss_weight = loss_weight
def forward(self, input, target, mask, reduce=True):
batch_size = input.shape[0]
input = F.sigmoid(input) # scale to 0-1
input = input.reshape((batch_size, -1))
target = paddle.cast(target.reshape((batch_size, -1)), "float32")
mask = paddle.cast(mask.reshape((batch_size, -1)), "float32")
input = input * mask
target = target * mask
a = paddle.sum(input * target, axis=1)
b = paddle.sum(input * input, axis=1) + 0.001
c = paddle.sum(target * target, axis=1) + 0.001
d = (2 * a) / (b + c)
loss = 1 - d
loss = self.loss_weight * loss
if reduce:
loss = paddle.mean(loss)
return loss
class SmoothL1Loss(nn.Layer):
def __init__(self, beta=1.0, loss_weight=1.0):
super(SmoothL1Loss, self).__init__()
self.beta = beta
self.loss_weight = loss_weight
np_coord = np.zeros(shape=[640, 640, 2], dtype=np.int64)
for i in range(640):
for j in range(640):
np_coord[i, j, 0] = j
np_coord[i, j, 1] = i
np_coord = np_coord.reshape((-1, 2))
self.coord = self.create_parameter(
shape=[640 * 640, 2],
dtype="int32", # NOTE: not support "int64" before paddle 2.3.1
default_initializer=nn.initializer.Assign(value=np_coord))
self.coord.stop_gradient = True
def forward_single(self, input, target, mask, beta=1.0, eps=1e-6):
batch_size = input.shape[0]
diff = paddle.abs(input - target) * mask.unsqueeze(1)
loss = paddle.where(diff < beta, 0.5 * diff * diff / beta,
diff - 0.5 * beta)
loss = paddle.cast(loss.reshape((batch_size, -1)), "float32")
mask = paddle.cast(mask.reshape((batch_size, -1)), "float32")
loss = paddle.sum(loss, axis=-1)
loss = loss / (mask.sum(axis=-1) + eps)
return loss
def select_single(self, distance, gt_instance, gt_kernel_instance,
training_mask):
with paddle.no_grad():
# paddle 2.3.1, paddle.slice not support:
# distance[:, self.coord[:, 1], self.coord[:, 0]]
select_distance_list = []
for i in range(2):
tmp1 = distance[i, :]
tmp2 = tmp1[self.coord[:, 1], self.coord[:, 0]]
select_distance_list.append(tmp2.unsqueeze(0))
select_distance = paddle.concat(select_distance_list, axis=0)
off_points = paddle.cast(
self.coord, "float32") + 10 * select_distance.transpose((1, 0))
off_points = paddle.cast(off_points, "int64")
off_points = paddle.clip(off_points, 0, distance.shape[-1] - 1)
selected_mask = (
gt_instance[self.coord[:, 1], self.coord[:, 0]] !=
gt_kernel_instance[off_points[:, 1], off_points[:, 0]])
selected_mask = paddle.cast(
selected_mask.reshape((1, -1, distance.shape[-1])), "int64")
selected_training_mask = selected_mask * training_mask
return selected_training_mask
def forward(self,
distances,
gt_instances,
gt_kernel_instances,
training_masks,
gt_distances,
reduce=True):
selected_training_masks = []
for i in range(distances.shape[0]):
selected_training_masks.append(
self.select_single(distances[i, :, :, :], gt_instances[i, :, :],
gt_kernel_instances[i, :, :], training_masks[
i, :, :]))
selected_training_masks = paddle.cast(
paddle.concat(selected_training_masks, 0), "float32")
loss = self.forward_single(distances, gt_distances,
selected_training_masks, self.beta)
loss = self.loss_weight * loss
with paddle.no_grad():
batch_size = distances.shape[0]
false_num = selected_training_masks.reshape((batch_size, -1))
false_num = false_num.sum(axis=-1)
total_num = paddle.cast(
training_masks.reshape((batch_size, -1)), "float32")
total_num = total_num.sum(axis=-1)
iou_text = (total_num - false_num) / (total_num + 1e-6)
if reduce:
loss = paddle.mean(loss)
return loss, iou_text
class CTLoss(nn.Layer):
def __init__(self):
super(CTLoss, self).__init__()
self.kernel_loss = DiceLoss()
self.loc_loss = SmoothL1Loss(beta=0.1, loss_weight=0.05)
def forward(self, preds, batch):
imgs = batch[0]
out = preds['maps']
gt_kernels, training_masks, gt_instances, gt_kernel_instances, training_mask_distances, gt_distances = batch[
1:]
kernels = out[:, 0, :, :]
distances = out[:, 1:, :, :]
# kernel loss
selected_masks = ohem_batch(kernels, gt_kernels, training_masks)
loss_kernel = self.kernel_loss(
kernels, gt_kernels, selected_masks, reduce=False)
iou_kernel = iou(paddle.cast((kernels > 0), "int64"),
gt_kernels,
training_masks,
reduce=False)
losses = dict(loss_kernels=loss_kernel, )
# loc loss
loss_loc, iou_text = self.loc_loss(
distances,
gt_instances,
gt_kernel_instances,
training_mask_distances,
gt_distances,
reduce=False)
losses.update(dict(loss_loc=loss_loc, ))
loss_all = loss_kernel + loss_loc
losses = {'loss': loss_all}
return losses
...@@ -89,12 +89,13 @@ class PGLoss(nn.Layer): ...@@ -89,12 +89,13 @@ class PGLoss(nn.Layer):
tcl_pos = paddle.reshape(tcl_pos, [-1, 3]) tcl_pos = paddle.reshape(tcl_pos, [-1, 3])
tcl_pos = paddle.cast(tcl_pos, dtype=int) tcl_pos = paddle.cast(tcl_pos, dtype=int)
f_tcl_char = paddle.gather_nd(f_char, tcl_pos) f_tcl_char = paddle.gather_nd(f_char, tcl_pos)
f_tcl_char = paddle.reshape(f_tcl_char, f_tcl_char = paddle.reshape(
[-1, 64, 37]) # len(Lexicon_Table)+1 f_tcl_char, [-1, 64, self.pad_num + 1]) # len(Lexicon_Table)+1
f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2) f_tcl_char_fg, f_tcl_char_bg = paddle.split(
f_tcl_char, [self.pad_num, 1], axis=2)
f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0 f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0
b, c, l = tcl_mask.shape b, c, l = tcl_mask.shape
tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l]) tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, self.pad_num * l])
tcl_mask_fg.stop_gradient = True tcl_mask_fg.stop_gradient = True
f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * ( f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * (
-20.0) -20.0)
......
...@@ -31,12 +31,14 @@ from .kie_metric import KIEMetric ...@@ -31,12 +31,14 @@ from .kie_metric import KIEMetric
from .vqa_token_ser_metric import VQASerTokenMetric from .vqa_token_ser_metric import VQASerTokenMetric
from .vqa_token_re_metric import VQAReTokenMetric from .vqa_token_re_metric import VQAReTokenMetric
from .sr_metric import SRMetric from .sr_metric import SRMetric
from .ct_metric import CTMetric
def build_metric(config): def build_metric(config):
support_dict = [ support_dict = [
"DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric", "DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric",
"DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric', "DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric',
'VQAReTokenMetric', 'SRMetric' 'VQAReTokenMetric', 'SRMetric', 'CTMetric'
] ]
config = copy.deepcopy(config) config = copy.deepcopy(config)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from scipy import io
import numpy as np
from ppocr.utils.e2e_metric.Deteval import combine_results, get_score_C
class CTMetric(object):
def __init__(self, main_indicator, delimiter='\t', **kwargs):
self.delimiter = delimiter
self.main_indicator = main_indicator
self.reset()
def reset(self):
self.results = [] # clear results
def __call__(self, preds, batch, **kwargs):
# NOTE: only support bs=1 now, as the label length of different sample is Unequal
assert len(
preds) == 1, "CentripetalText test now only suuport batch_size=1."
label = batch[2]
text = batch[3]
pred = preds[0]['points']
result = get_score_C(label, text, pred)
self.results.append(result)
def get_metric(self):
"""
Input format: y0,x0, ..... yn,xn. Each detection is separated by the end of line token ('\n')'
"""
metrics = combine_results(self.results, rec_flag=False)
self.reset()
return metrics
...@@ -23,6 +23,7 @@ def build_head(config): ...@@ -23,6 +23,7 @@ def build_head(config):
from .det_pse_head import PSEHead from .det_pse_head import PSEHead
from .det_fce_head import FCEHead from .det_fce_head import FCEHead
from .e2e_pg_head import PGHead from .e2e_pg_head import PGHead
from .det_ct_head import CT_Head
# rec head # rec head
from .rec_ctc_head import CTCHead from .rec_ctc_head import CTCHead
...@@ -52,7 +53,7 @@ def build_head(config): ...@@ -52,7 +53,7 @@ def build_head(config):
'ClsHead', 'AttentionHead', 'SRNHead', 'PGHead', 'Transformer', 'ClsHead', 'AttentionHead', 'SRNHead', 'PGHead', 'Transformer',
'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead', 'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead',
'MultiHead', 'ABINetHead', 'TableMasterHead', 'SPINAttentionHead', 'MultiHead', 'ABINetHead', 'TableMasterHead', 'SPINAttentionHead',
'VLHead', 'SLAHead', 'RobustScannerHead' 'VLHead', 'SLAHead', 'RobustScannerHead', 'CT_Head'
] ]
#table head #table head
......
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
import math
from paddle.nn.initializer import TruncatedNormal, Constant, Normal
ones_ = Constant(value=1.)
zeros_ = Constant(value=0.)
class CT_Head(nn.Layer):
def __init__(self,
in_channels,
hidden_dim,
num_classes,
loss_kernel=None,
loss_loc=None):
super(CT_Head, self).__init__()
self.conv1 = nn.Conv2D(
in_channels, hidden_dim, kernel_size=3, stride=1, padding=1)
self.bn1 = nn.BatchNorm2D(hidden_dim)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv2D(
hidden_dim, num_classes, kernel_size=1, stride=1, padding=0)
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
normal_(m.weight)
elif isinstance(m, nn.BatchNorm2D):
zeros_(m.bias)
ones_(m.weight)
def _upsample(self, x, scale=1):
return F.upsample(x, scale_factor=scale, mode='bilinear')
def forward(self, f, targets=None):
out = self.conv1(f)
out = self.relu1(self.bn1(out))
out = self.conv2(out)
if self.training:
out = self._upsample(out, scale=4)
return {'maps': out}
else:
score = F.sigmoid(out[:, 0, :, :])
return {'maps': out, 'score': score}
...@@ -66,8 +66,17 @@ class PGHead(nn.Layer): ...@@ -66,8 +66,17 @@ class PGHead(nn.Layer):
""" """
""" """
def __init__(self, in_channels, **kwargs): def __init__(self,
in_channels,
character_dict_path='ppocr/utils/ic15_dict.txt',
**kwargs):
super(PGHead, self).__init__() super(PGHead, self).__init__()
# get character_length
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
character_length = len(lines) + 1
self.conv_f_score1 = ConvBNLayer( self.conv_f_score1 = ConvBNLayer(
in_channels=in_channels, in_channels=in_channels,
out_channels=64, out_channels=64,
...@@ -178,7 +187,7 @@ class PGHead(nn.Layer): ...@@ -178,7 +187,7 @@ class PGHead(nn.Layer):
name="conv_f_char{}".format(5)) name="conv_f_char{}".format(5))
self.conv3 = nn.Conv2D( self.conv3 = nn.Conv2D(
in_channels=256, in_channels=256,
out_channels=37, out_channels=character_length,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
padding=1, padding=1,
......
...@@ -166,6 +166,7 @@ class SLAHead(nn.Layer): ...@@ -166,6 +166,7 @@ class SLAHead(nn.Layer):
self.max_text_length = max_text_length self.max_text_length = max_text_length
self.emb = self._char_to_onehot self.emb = self._char_to_onehot
self.num_embeddings = out_channels self.num_embeddings = out_channels
self.loc_reg_num = loc_reg_num
# structure # structure
self.structure_attention_cell = AttentionGRUCell( self.structure_attention_cell = AttentionGRUCell(
...@@ -213,15 +214,17 @@ class SLAHead(nn.Layer): ...@@ -213,15 +214,17 @@ class SLAHead(nn.Layer):
fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels) fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels)
hidden = paddle.zeros((batch_size, self.hidden_size)) hidden = paddle.zeros((batch_size, self.hidden_size))
structure_preds = [] structure_preds = paddle.zeros((batch_size, self.max_text_length + 1, self.num_embeddings))
loc_preds = [] loc_preds = paddle.zeros((batch_size, self.max_text_length + 1, self.loc_reg_num))
structure_preds.stop_gradient = True
loc_preds.stop_gradient = True
if self.training and targets is not None: if self.training and targets is not None:
structure = targets[0] structure = targets[0]
for i in range(self.max_text_length + 1): for i in range(self.max_text_length + 1):
hidden, structure_step, loc_step = self._decode(structure[:, i], hidden, structure_step, loc_step = self._decode(structure[:, i],
fea, hidden) fea, hidden)
structure_preds.append(structure_step) structure_preds[:, i, :] = structure_step
loc_preds.append(loc_step) loc_preds[:, i, :] = loc_step
else: else:
pre_chars = paddle.zeros(shape=[batch_size], dtype="int32") pre_chars = paddle.zeros(shape=[batch_size], dtype="int32")
max_text_length = paddle.to_tensor(self.max_text_length) max_text_length = paddle.to_tensor(self.max_text_length)
...@@ -231,10 +234,8 @@ class SLAHead(nn.Layer): ...@@ -231,10 +234,8 @@ class SLAHead(nn.Layer):
hidden, structure_step, loc_step = self._decode(pre_chars, fea, hidden, structure_step, loc_step = self._decode(pre_chars, fea,
hidden) hidden)
pre_chars = structure_step.argmax(axis=1, dtype="int32") pre_chars = structure_step.argmax(axis=1, dtype="int32")
structure_preds.append(structure_step) structure_preds[:, i, :] = structure_step
loc_preds.append(loc_step) loc_preds[:, i, :] = loc_step
structure_preds = paddle.stack(structure_preds, axis=1)
loc_preds = paddle.stack(loc_preds, axis=1)
if not self.training: if not self.training:
structure_preds = F.softmax(structure_preds) structure_preds = F.softmax(structure_preds)
return {'structure_probs': structure_preds, 'loc_preds': loc_preds} return {'structure_probs': structure_preds, 'loc_preds': loc_preds}
......
...@@ -26,13 +26,15 @@ def build_neck(config): ...@@ -26,13 +26,15 @@ def build_neck(config):
from .fce_fpn import FCEFPN from .fce_fpn import FCEFPN
from .pren_fpn import PRENFPN from .pren_fpn import PRENFPN
from .csp_pan import CSPPAN from .csp_pan import CSPPAN
from .ct_fpn import CTFPN
support_dict = [ support_dict = [
'FPN', 'FCEFPN', 'LKPAN', 'DBFPN', 'RSEFPN', 'EASTFPN', 'SASTFPN', 'FPN', 'FCEFPN', 'LKPAN', 'DBFPN', 'RSEFPN', 'EASTFPN', 'SASTFPN',
'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN', 'CSPPAN' 'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN', 'CSPPAN', 'CTFPN'
] ]
module_name = config.pop('name') module_name = config.pop('name')
assert module_name in support_dict, Exception('neck only support {}'.format( assert module_name in support_dict, Exception('neck only support {}'.format(
support_dict)) support_dict))
module_class = eval(module_name)(**config) module_class = eval(module_name)(**config)
return module_class return module_class
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
import os
import sys
import math
from paddle.nn.initializer import TruncatedNormal, Constant, Normal
ones_ = Constant(value=1.)
zeros_ = Constant(value=0.)
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..')))
class Conv_BN_ReLU(nn.Layer):
def __init__(self,
in_planes,
out_planes,
kernel_size=1,
stride=1,
padding=0):
super(Conv_BN_ReLU, self).__init__()
self.conv = nn.Conv2D(
in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias_attr=False)
self.bn = nn.BatchNorm2D(out_planes)
self.relu = nn.ReLU()
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
normal_(m.weight)
elif isinstance(m, nn.BatchNorm2D):
zeros_(m.bias)
ones_(m.weight)
def forward(self, x):
return self.relu(self.bn(self.conv(x)))
class FPEM(nn.Layer):
def __init__(self, in_channels, out_channels):
super(FPEM, self).__init__()
planes = out_channels
self.dwconv3_1 = nn.Conv2D(
planes,
planes,
kernel_size=3,
stride=1,
padding=1,
groups=planes,
bias_attr=False)
self.smooth_layer3_1 = Conv_BN_ReLU(planes, planes)
self.dwconv2_1 = nn.Conv2D(
planes,
planes,
kernel_size=3,
stride=1,
padding=1,
groups=planes,
bias_attr=False)
self.smooth_layer2_1 = Conv_BN_ReLU(planes, planes)
self.dwconv1_1 = nn.Conv2D(
planes,
planes,
kernel_size=3,
stride=1,
padding=1,
groups=planes,
bias_attr=False)
self.smooth_layer1_1 = Conv_BN_ReLU(planes, planes)
self.dwconv2_2 = nn.Conv2D(
planes,
planes,
kernel_size=3,
stride=2,
padding=1,
groups=planes,
bias_attr=False)
self.smooth_layer2_2 = Conv_BN_ReLU(planes, planes)
self.dwconv3_2 = nn.Conv2D(
planes,
planes,
kernel_size=3,
stride=2,
padding=1,
groups=planes,
bias_attr=False)
self.smooth_layer3_2 = Conv_BN_ReLU(planes, planes)
self.dwconv4_2 = nn.Conv2D(
planes,
planes,
kernel_size=3,
stride=2,
padding=1,
groups=planes,
bias_attr=False)
self.smooth_layer4_2 = Conv_BN_ReLU(planes, planes)
def _upsample_add(self, x, y):
return F.upsample(x, scale_factor=2, mode='bilinear') + y
def forward(self, f1, f2, f3, f4):
# up-down
f3 = self.smooth_layer3_1(self.dwconv3_1(self._upsample_add(f4, f3)))
f2 = self.smooth_layer2_1(self.dwconv2_1(self._upsample_add(f3, f2)))
f1 = self.smooth_layer1_1(self.dwconv1_1(self._upsample_add(f2, f1)))
# down-up
f2 = self.smooth_layer2_2(self.dwconv2_2(self._upsample_add(f2, f1)))
f3 = self.smooth_layer3_2(self.dwconv3_2(self._upsample_add(f3, f2)))
f4 = self.smooth_layer4_2(self.dwconv4_2(self._upsample_add(f4, f3)))
return f1, f2, f3, f4
class CTFPN(nn.Layer):
def __init__(self, in_channels, out_channel=128):
super(CTFPN, self).__init__()
self.out_channels = out_channel * 4
self.reduce_layer1 = Conv_BN_ReLU(in_channels[0], 128)
self.reduce_layer2 = Conv_BN_ReLU(in_channels[1], 128)
self.reduce_layer3 = Conv_BN_ReLU(in_channels[2], 128)
self.reduce_layer4 = Conv_BN_ReLU(in_channels[3], 128)
self.fpem1 = FPEM(in_channels=(64, 128, 256, 512), out_channels=128)
self.fpem2 = FPEM(in_channels=(64, 128, 256, 512), out_channels=128)
def _upsample(self, x, scale=1):
return F.upsample(x, scale_factor=scale, mode='bilinear')
def forward(self, f):
# # reduce channel
f1 = self.reduce_layer1(f[0]) # N,64,160,160 --> N, 128, 160, 160
f2 = self.reduce_layer2(f[1]) # N, 128, 80, 80 --> N, 128, 80, 80
f3 = self.reduce_layer3(f[2]) # N, 256, 40, 40 --> N, 128, 40, 40
f4 = self.reduce_layer4(f[3]) # N, 512, 20, 20 --> N, 128, 20, 20
# FPEM
f1_1, f2_1, f3_1, f4_1 = self.fpem1(f1, f2, f3, f4)
f1_2, f2_2, f3_2, f4_2 = self.fpem2(f1_1, f2_1, f3_1, f4_1)
# FFM
f1 = f1_1 + f1_2
f2 = f2_1 + f2_2
f3 = f3_1 + f3_2
f4 = f4_1 + f4_2
f2 = self._upsample(f2, scale=2)
f3 = self._upsample(f3, scale=4)
f4 = self._upsample(f4, scale=8)
ff = paddle.concat((f1, f2, f3, f4), 1) # N,512, 160,160
return ff
...@@ -35,6 +35,7 @@ from .vqa_token_ser_layoutlm_postprocess import VQASerTokenLayoutLMPostProcess, ...@@ -35,6 +35,7 @@ from .vqa_token_ser_layoutlm_postprocess import VQASerTokenLayoutLMPostProcess,
from .vqa_token_re_layoutlm_postprocess import VQAReTokenLayoutLMPostProcess, DistillationRePostProcess from .vqa_token_re_layoutlm_postprocess import VQAReTokenLayoutLMPostProcess, DistillationRePostProcess
from .table_postprocess import TableMasterLabelDecode, TableLabelDecode from .table_postprocess import TableMasterLabelDecode, TableLabelDecode
from .picodet_postprocess import PicoDetPostProcess from .picodet_postprocess import PicoDetPostProcess
from .ct_postprocess import CTPostProcess
def build_post_process(config, global_config=None): def build_post_process(config, global_config=None):
...@@ -48,7 +49,7 @@ def build_post_process(config, global_config=None): ...@@ -48,7 +49,7 @@ def build_post_process(config, global_config=None):
'DistillationSARLabelDecode', 'ViTSTRLabelDecode', 'ABINetLabelDecode', 'DistillationSARLabelDecode', 'ViTSTRLabelDecode', 'ABINetLabelDecode',
'TableMasterLabelDecode', 'SPINLabelDecode', 'TableMasterLabelDecode', 'SPINLabelDecode',
'DistillationSerPostProcess', 'DistillationRePostProcess', 'DistillationSerPostProcess', 'DistillationRePostProcess',
'VLLabelDecode', 'PicoDetPostProcess' 'VLLabelDecode', 'PicoDetPostProcess', 'CTPostProcess'
] ]
if config['name'] == 'PSEPostProcess': if config['name'] == 'PSEPostProcess':
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refered from:
https://github.com/shengtao96/CentripetalText/blob/main/test.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import os.path as osp
import numpy as np
import cv2
import paddle
import pyclipper
class CTPostProcess(object):
"""
The post process for Centripetal Text (CT).
"""
def __init__(self, min_score=0.88, min_area=16, box_type='poly', **kwargs):
self.min_score = min_score
self.min_area = min_area
self.box_type = box_type
self.coord = np.zeros((2, 300, 300), dtype=np.int32)
for i in range(300):
for j in range(300):
self.coord[0, i, j] = j
self.coord[1, i, j] = i
def __call__(self, preds, batch):
outs = preds['maps']
out_scores = preds['score']
if isinstance(outs, paddle.Tensor):
outs = outs.numpy()
if isinstance(out_scores, paddle.Tensor):
out_scores = out_scores.numpy()
batch_size = outs.shape[0]
boxes_batch = []
for idx in range(batch_size):
bboxes = []
scores = []
img_shape = batch[idx]
org_img_size = img_shape[:3]
img_shape = img_shape[3:]
img_size = img_shape[:2]
out = np.expand_dims(outs[idx], axis=0)
outputs = dict()
score = np.expand_dims(out_scores[idx], axis=0)
kernel = out[:, 0, :, :] > 0.2
loc = out[:, 1:, :, :].astype("float32")
score = score[0].astype(np.float32)
kernel = kernel[0].astype(np.uint8)
loc = loc[0].astype(np.float32)
label_num, label_kernel = cv2.connectedComponents(
kernel, connectivity=4)
for i in range(1, label_num):
ind = (label_kernel == i)
if ind.sum(
) < 10: # pixel number less than 10, treated as background
label_kernel[ind] = 0
label = np.zeros_like(label_kernel)
h, w = label_kernel.shape
pixels = self.coord[:, :h, :w].reshape(2, -1)
points = pixels.transpose([1, 0]).astype(np.float32)
off_points = (points + 10. / 4. * loc[:, pixels[1], pixels[0]].T
).astype(np.int32)
off_points[:, 0] = np.clip(off_points[:, 0], 0, label.shape[1] - 1)
off_points[:, 1] = np.clip(off_points[:, 1], 0, label.shape[0] - 1)
label[pixels[1], pixels[0]] = label_kernel[off_points[:, 1],
off_points[:, 0]]
label[label_kernel > 0] = label_kernel[label_kernel > 0]
score_pocket = [0.0]
for i in range(1, label_num):
ind = (label_kernel == i)
if ind.sum() == 0:
score_pocket.append(0.0)
continue
score_i = np.mean(score[ind])
score_pocket.append(score_i)
label_num = np.max(label) + 1
label = cv2.resize(
label, (img_size[1], img_size[0]),
interpolation=cv2.INTER_NEAREST)
scale = (float(org_img_size[1]) / float(img_size[1]),
float(org_img_size[0]) / float(img_size[0]))
for i in range(1, label_num):
ind = (label == i)
points = np.array(np.where(ind)).transpose((1, 0))
if points.shape[0] < self.min_area:
continue
score_i = score_pocket[i]
if score_i < self.min_score:
continue
if self.box_type == 'rect':
rect = cv2.minAreaRect(points[:, ::-1])
bbox = cv2.boxPoints(rect) * scale
z = bbox.mean(0)
bbox = z + (bbox - z) * 0.85
elif self.box_type == 'poly':
binary = np.zeros(label.shape, dtype='uint8')
binary[ind] = 1
try:
_, contours, _ = cv2.findContours(
binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
except BaseException:
contours, _ = cv2.findContours(
binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
bbox = contours[0] * scale
bbox = bbox.astype('int32')
bboxes.append(bbox.reshape(-1, 2))
scores.append(score_i)
boxes_batch.append({'points': bboxes})
return boxes_batch
...@@ -30,12 +30,18 @@ class PGPostProcess(object): ...@@ -30,12 +30,18 @@ class PGPostProcess(object):
The post process for PGNet. The post process for PGNet.
""" """
def __init__(self, character_dict_path, valid_set, score_thresh, mode, def __init__(self,
character_dict_path,
valid_set,
score_thresh,
mode,
point_gather_mode=None,
**kwargs): **kwargs):
self.character_dict_path = character_dict_path self.character_dict_path = character_dict_path
self.valid_set = valid_set self.valid_set = valid_set
self.score_thresh = score_thresh self.score_thresh = score_thresh
self.mode = mode self.mode = mode
self.point_gather_mode = point_gather_mode
# c++ la-nms is faster, but only support python 3.5 # c++ la-nms is faster, but only support python 3.5
self.is_python35 = False self.is_python35 = False
...@@ -43,8 +49,13 @@ class PGPostProcess(object): ...@@ -43,8 +49,13 @@ class PGPostProcess(object):
self.is_python35 = True self.is_python35 = True
def __call__(self, outs_dict, shape_list): def __call__(self, outs_dict, shape_list):
post = PGNet_PostProcess(self.character_dict_path, self.valid_set, post = PGNet_PostProcess(
self.score_thresh, outs_dict, shape_list) self.character_dict_path,
self.valid_set,
self.score_thresh,
outs_dict,
shape_list,
point_gather_mode=self.point_gather_mode)
if self.mode == 'fast': if self.mode == 'fast':
data = post.pg_postprocess_fast() data = post.pg_postprocess_fast()
else: else:
......
...@@ -12,8 +12,10 @@ ...@@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import json
import numpy as np import numpy as np
import scipy.io as io import scipy.io as io
import Polygon as plg
from ppocr.utils.e2e_metric.polygon_fast import iod, area_of_intersection, area from ppocr.utils.e2e_metric.polygon_fast import iod, area_of_intersection, area
...@@ -269,7 +271,124 @@ def get_socre_B(gt_dir, img_id, pred_dict): ...@@ -269,7 +271,124 @@ def get_socre_B(gt_dir, img_id, pred_dict):
return single_data return single_data
def combine_results(all_data): def get_score_C(gt_label, text, pred_bboxes):
"""
get score for CentripetalText (CT) prediction.
"""
def gt_reading_mod(gt_label, text):
"""This helper reads groundtruths from mat files"""
groundtruths = []
nbox = len(gt_label)
for i in range(nbox):
label = {"transcription": text[i][0], "points": gt_label[i].numpy()}
groundtruths.append(label)
return groundtruths
def get_union(pD, pG):
areaA = pD.area()
areaB = pG.area()
return areaA + areaB - get_intersection(pD, pG)
def get_intersection(pD, pG):
pInt = pD & pG
if len(pInt) == 0:
return 0
return pInt.area()
def detection_filtering(detections, groundtruths, threshold=0.5):
for gt in groundtruths:
point_num = gt['points'].shape[1] // 2
if gt['transcription'] == '###' and (point_num > 1):
gt_p = np.array(gt['points']).reshape(point_num,
2).astype('int32')
gt_p = plg.Polygon(gt_p)
for det_id, detection in enumerate(detections):
det_y = detection[0::2]
det_x = detection[1::2]
det_p = np.concatenate((np.array(det_x), np.array(det_y)))
det_p = det_p.reshape(2, -1).transpose()
det_p = plg.Polygon(det_p)
try:
det_gt_iou = get_intersection(det_p,
gt_p) / det_p.area()
except:
print(det_x, det_y, gt_p)
if det_gt_iou > threshold:
detections[det_id] = []
detections[:] = [item for item in detections if item != []]
return detections
def sigma_calculation(det_p, gt_p):
"""
sigma = inter_area / gt_area
"""
if gt_p.area() == 0.:
return 0
return get_intersection(det_p, gt_p) / gt_p.area()
def tau_calculation(det_p, gt_p):
"""
tau = inter_area / det_area
"""
if det_p.area() == 0.:
return 0
return get_intersection(det_p, gt_p) / det_p.area()
detections = []
for item in pred_bboxes:
detections.append(item[:, ::-1].reshape(-1))
groundtruths = gt_reading_mod(gt_label, text)
detections = detection_filtering(
detections, groundtruths) # filters detections overlapping with DC area
for idx in range(len(groundtruths) - 1, -1, -1):
#NOTE: source code use 'orin' to indicate '#', here we use 'anno',
# which may cause slight drop in fscore, about 0.12
if groundtruths[idx]['transcription'] == '###':
groundtruths.pop(idx)
local_sigma_table = np.zeros((len(groundtruths), len(detections)))
local_tau_table = np.zeros((len(groundtruths), len(detections)))
for gt_id, gt in enumerate(groundtruths):
if len(detections) > 0:
for det_id, detection in enumerate(detections):
point_num = gt['points'].shape[1] // 2
gt_p = np.array(gt['points']).reshape(point_num,
2).astype('int32')
gt_p = plg.Polygon(gt_p)
det_y = detection[0::2]
det_x = detection[1::2]
det_p = np.concatenate((np.array(det_x), np.array(det_y)))
det_p = det_p.reshape(2, -1).transpose()
det_p = plg.Polygon(det_p)
local_sigma_table[gt_id, det_id] = sigma_calculation(det_p,
gt_p)
local_tau_table[gt_id, det_id] = tau_calculation(det_p, gt_p)
data = {}
data['sigma'] = local_sigma_table
data['global_tau'] = local_tau_table
data['global_pred_str'] = ''
data['global_gt_str'] = ''
return data
def combine_results(all_data, rec_flag=True):
tr = 0.7 tr = 0.7
tp = 0.6 tp = 0.6
fsc_k = 0.8 fsc_k = 0.8
...@@ -278,6 +397,7 @@ def combine_results(all_data): ...@@ -278,6 +397,7 @@ def combine_results(all_data):
global_tau = [] global_tau = []
global_pred_str = [] global_pred_str = []
global_gt_str = [] global_gt_str = []
for data in all_data: for data in all_data:
global_sigma.append(data['sigma']) global_sigma.append(data['sigma'])
global_tau.append(data['global_tau']) global_tau.append(data['global_tau'])
...@@ -294,7 +414,7 @@ def combine_results(all_data): ...@@ -294,7 +414,7 @@ def combine_results(all_data):
def one_to_one(local_sigma_table, local_tau_table, def one_to_one(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision, local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
gt_flag, det_flag, idy): gt_flag, det_flag, idy, rec_flag):
hit_str_num = 0 hit_str_num = 0
for gt_id in range(num_gt): for gt_id in range(num_gt):
gt_matching_qualified_sigma_candidates = np.where( gt_matching_qualified_sigma_candidates = np.where(
...@@ -328,9 +448,10 @@ def combine_results(all_data): ...@@ -328,9 +448,10 @@ def combine_results(all_data):
gt_flag[0, gt_id] = 1 gt_flag[0, gt_id] = 1
matched_det_id = np.where(local_sigma_table[gt_id, :] > tr) matched_det_id = np.where(local_sigma_table[gt_id, :] > tr)
# recg start # recg start
if rec_flag:
gt_str_cur = global_gt_str[idy][gt_id] gt_str_cur = global_gt_str[idy][gt_id]
pred_str_cur = global_pred_str[idy][matched_det_id[0].tolist()[ pred_str_cur = global_pred_str[idy][matched_det_id[0]
0]] .tolist()[0]]
if pred_str_cur == gt_str_cur: if pred_str_cur == gt_str_cur:
hit_str_num += 1 hit_str_num += 1
else: else:
...@@ -343,7 +464,7 @@ def combine_results(all_data): ...@@ -343,7 +464,7 @@ def combine_results(all_data):
def one_to_many(local_sigma_table, local_tau_table, def one_to_many(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision, local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
gt_flag, det_flag, idy): gt_flag, det_flag, idy, rec_flag):
hit_str_num = 0 hit_str_num = 0
for gt_id in range(num_gt): for gt_id in range(num_gt):
# skip the following if the groundtruth was matched # skip the following if the groundtruth was matched
...@@ -374,6 +495,7 @@ def combine_results(all_data): ...@@ -374,6 +495,7 @@ def combine_results(all_data):
gt_flag[0, gt_id] = 1 gt_flag[0, gt_id] = 1
det_flag[0, qualified_tau_candidates] = 1 det_flag[0, qualified_tau_candidates] = 1
# recg start # recg start
if rec_flag:
gt_str_cur = global_gt_str[idy][gt_id] gt_str_cur = global_gt_str[idy][gt_id]
pred_str_cur = global_pred_str[idy][ pred_str_cur = global_pred_str[idy][
qualified_tau_candidates[0].tolist()[0]] qualified_tau_candidates[0].tolist()[0]]
...@@ -388,6 +510,7 @@ def combine_results(all_data): ...@@ -388,6 +510,7 @@ def combine_results(all_data):
gt_flag[0, gt_id] = 1 gt_flag[0, gt_id] = 1
det_flag[0, qualified_tau_candidates] = 1 det_flag[0, qualified_tau_candidates] = 1
# recg start # recg start
if rec_flag:
gt_str_cur = global_gt_str[idy][gt_id] gt_str_cur = global_gt_str[idy][gt_id]
pred_str_cur = global_pred_str[idy][ pred_str_cur = global_pred_str[idy][
qualified_tau_candidates[0].tolist()[0]] qualified_tau_candidates[0].tolist()[0]]
...@@ -409,7 +532,7 @@ def combine_results(all_data): ...@@ -409,7 +532,7 @@ def combine_results(all_data):
def many_to_one(local_sigma_table, local_tau_table, def many_to_one(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision, local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
gt_flag, det_flag, idy): gt_flag, det_flag, idy, rec_flag):
hit_str_num = 0 hit_str_num = 0
for det_id in range(num_det): for det_id in range(num_det):
# skip the following if the detection was matched # skip the following if the detection was matched
...@@ -440,11 +563,12 @@ def combine_results(all_data): ...@@ -440,11 +563,12 @@ def combine_results(all_data):
gt_flag[0, qualified_sigma_candidates] = 1 gt_flag[0, qualified_sigma_candidates] = 1
det_flag[0, det_id] = 1 det_flag[0, det_id] = 1
# recg start # recg start
if rec_flag:
pred_str_cur = global_pred_str[idy][det_id] pred_str_cur = global_pred_str[idy][det_id]
gt_len = len(qualified_sigma_candidates[0]) gt_len = len(qualified_sigma_candidates[0])
for idx in range(gt_len): for idx in range(gt_len):
ele_gt_id = qualified_sigma_candidates[0].tolist()[ ele_gt_id = qualified_sigma_candidates[
idx] 0].tolist()[idx]
if ele_gt_id not in global_gt_str[idy]: if ele_gt_id not in global_gt_str[idy]:
continue continue
gt_str_cur = global_gt_str[idy][ele_gt_id] gt_str_cur = global_gt_str[idy][ele_gt_id]
...@@ -452,7 +576,8 @@ def combine_results(all_data): ...@@ -452,7 +576,8 @@ def combine_results(all_data):
hit_str_num += 1 hit_str_num += 1
break break
else: else:
if pred_str_cur.lower() == gt_str_cur.lower(): if pred_str_cur.lower() == gt_str_cur.lower(
):
hit_str_num += 1 hit_str_num += 1
break break
# recg end # recg end
...@@ -461,10 +586,12 @@ def combine_results(all_data): ...@@ -461,10 +586,12 @@ def combine_results(all_data):
det_flag[0, det_id] = 1 det_flag[0, det_id] = 1
gt_flag[0, qualified_sigma_candidates] = 1 gt_flag[0, qualified_sigma_candidates] = 1
# recg start # recg start
if rec_flag:
pred_str_cur = global_pred_str[idy][det_id] pred_str_cur = global_pred_str[idy][det_id]
gt_len = len(qualified_sigma_candidates[0]) gt_len = len(qualified_sigma_candidates[0])
for idx in range(gt_len): for idx in range(gt_len):
ele_gt_id = qualified_sigma_candidates[0].tolist()[idx] ele_gt_id = qualified_sigma_candidates[0].tolist()[
idx]
if ele_gt_id not in global_gt_str[idy]: if ele_gt_id not in global_gt_str[idy]:
continue continue
gt_str_cur = global_gt_str[idy][ele_gt_id] gt_str_cur = global_gt_str[idy][ele_gt_id]
...@@ -504,7 +631,7 @@ def combine_results(all_data): ...@@ -504,7 +631,7 @@ def combine_results(all_data):
gt_flag, det_flag, hit_str_num = one_to_one(local_sigma_table, local_tau_table, gt_flag, det_flag, hit_str_num = one_to_one(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision, local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
gt_flag, det_flag, idx) gt_flag, det_flag, idx, rec_flag)
hit_str_count += hit_str_num hit_str_count += hit_str_num
#######then check for one-to-many case########## #######then check for one-to-many case##########
...@@ -512,14 +639,14 @@ def combine_results(all_data): ...@@ -512,14 +639,14 @@ def combine_results(all_data):
gt_flag, det_flag, hit_str_num = one_to_many(local_sigma_table, local_tau_table, gt_flag, det_flag, hit_str_num = one_to_many(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision, local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
gt_flag, det_flag, idx) gt_flag, det_flag, idx, rec_flag)
hit_str_count += hit_str_num hit_str_count += hit_str_num
#######then check for many-to-one case########## #######then check for many-to-one case##########
local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \ local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
gt_flag, det_flag, hit_str_num = many_to_one(local_sigma_table, local_tau_table, gt_flag, det_flag, hit_str_num = many_to_one(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision, local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision, global_accumulative_recall, global_accumulative_precision,
gt_flag, det_flag, idx) gt_flag, det_flag, idx, rec_flag)
hit_str_count += hit_str_num hit_str_count += hit_str_num
try: try:
......
...@@ -88,8 +88,35 @@ def ctc_greedy_decoder(probs_seq, blank=95, keep_blank_in_idxs=True): ...@@ -88,8 +88,35 @@ def ctc_greedy_decoder(probs_seq, blank=95, keep_blank_in_idxs=True):
return dst_str, keep_idx_list return dst_str, keep_idx_list
def instance_ctc_greedy_decoder(gather_info, logits_map, pts_num=4): def instance_ctc_greedy_decoder(gather_info,
logits_map,
pts_num=4,
point_gather_mode=None):
_, _, C = logits_map.shape _, _, C = logits_map.shape
if point_gather_mode == 'align':
insert_num = 0
gather_info = np.array(gather_info)
length = len(gather_info) - 1
for index in range(length):
stride_y = np.abs(gather_info[index + insert_num][0] - gather_info[
index + 1 + insert_num][0])
stride_x = np.abs(gather_info[index + insert_num][1] - gather_info[
index + 1 + insert_num][1])
max_points = int(max(stride_x, stride_y))
stride = (gather_info[index + insert_num] -
gather_info[index + 1 + insert_num]) / (max_points)
insert_num_temp = max_points - 1
for i in range(int(insert_num_temp)):
insert_value = gather_info[index + insert_num] - (i + 1
) * stride
insert_index = index + i + 1 + insert_num
gather_info = np.insert(
gather_info, insert_index, insert_value, axis=0)
insert_num += insert_num_temp
gather_info = gather_info.tolist()
else:
pass
ys, xs = zip(*gather_info) ys, xs = zip(*gather_info)
logits_seq = logits_map[list(ys), list(xs)] logits_seq = logits_map[list(ys), list(xs)]
probs_seq = logits_seq probs_seq = logits_seq
...@@ -104,7 +131,8 @@ def instance_ctc_greedy_decoder(gather_info, logits_map, pts_num=4): ...@@ -104,7 +131,8 @@ def instance_ctc_greedy_decoder(gather_info, logits_map, pts_num=4):
def ctc_decoder_for_image(gather_info_list, def ctc_decoder_for_image(gather_info_list,
logits_map, logits_map,
Lexicon_Table, Lexicon_Table,
pts_num=6): pts_num=6,
point_gather_mode=None):
""" """
CTC decoder using multiple processes. CTC decoder using multiple processes.
""" """
...@@ -114,7 +142,10 @@ def ctc_decoder_for_image(gather_info_list, ...@@ -114,7 +142,10 @@ def ctc_decoder_for_image(gather_info_list,
if len(gather_info) < pts_num: if len(gather_info) < pts_num:
continue continue
dst_str, xys_list = instance_ctc_greedy_decoder( dst_str, xys_list = instance_ctc_greedy_decoder(
gather_info, logits_map, pts_num=pts_num) gather_info,
logits_map,
pts_num=pts_num,
point_gather_mode=point_gather_mode)
dst_str_readable = ''.join([Lexicon_Table[idx] for idx in dst_str]) dst_str_readable = ''.join([Lexicon_Table[idx] for idx in dst_str])
if len(dst_str_readable) < 2: if len(dst_str_readable) < 2:
continue continue
...@@ -356,7 +387,8 @@ def generate_pivot_list_fast(p_score, ...@@ -356,7 +387,8 @@ def generate_pivot_list_fast(p_score,
p_char_maps, p_char_maps,
f_direction, f_direction,
Lexicon_Table, Lexicon_Table,
score_thresh=0.5): score_thresh=0.5,
point_gather_mode=None):
""" """
return center point and end point of TCL instance; filter with the char maps; return center point and end point of TCL instance; filter with the char maps;
""" """
...@@ -384,7 +416,10 @@ def generate_pivot_list_fast(p_score, ...@@ -384,7 +416,10 @@ def generate_pivot_list_fast(p_score,
p_char_maps = p_char_maps.transpose([1, 2, 0]) p_char_maps = p_char_maps.transpose([1, 2, 0])
decoded_str, keep_yxs_list = ctc_decoder_for_image( decoded_str, keep_yxs_list = ctc_decoder_for_image(
all_pos_yxs, logits_map=p_char_maps, Lexicon_Table=Lexicon_Table) all_pos_yxs,
logits_map=p_char_maps,
Lexicon_Table=Lexicon_Table,
point_gather_mode=point_gather_mode)
return keep_yxs_list, decoded_str return keep_yxs_list, decoded_str
......
...@@ -28,13 +28,19 @@ from extract_textpoint_fast import generate_pivot_list_fast, restore_poly ...@@ -28,13 +28,19 @@ from extract_textpoint_fast import generate_pivot_list_fast, restore_poly
class PGNet_PostProcess(object): class PGNet_PostProcess(object):
# two different post-process # two different post-process
def __init__(self, character_dict_path, valid_set, score_thresh, outs_dict, def __init__(self,
shape_list): character_dict_path,
valid_set,
score_thresh,
outs_dict,
shape_list,
point_gather_mode=None):
self.Lexicon_Table = get_dict(character_dict_path) self.Lexicon_Table = get_dict(character_dict_path)
self.valid_set = valid_set self.valid_set = valid_set
self.score_thresh = score_thresh self.score_thresh = score_thresh
self.outs_dict = outs_dict self.outs_dict = outs_dict
self.shape_list = shape_list self.shape_list = shape_list
self.point_gather_mode = point_gather_mode
def pg_postprocess_fast(self): def pg_postprocess_fast(self):
p_score = self.outs_dict['f_score'] p_score = self.outs_dict['f_score']
...@@ -58,7 +64,8 @@ class PGNet_PostProcess(object): ...@@ -58,7 +64,8 @@ class PGNet_PostProcess(object):
p_char, p_char,
p_direction, p_direction,
self.Lexicon_Table, self.Lexicon_Table,
score_thresh=self.score_thresh) score_thresh=self.score_thresh,
point_gather_mode=self.point_gather_mode)
poly_list, keep_str_list = restore_poly(instance_yxs_list, seq_strs, poly_list, keep_str_list = restore_poly(instance_yxs_list, seq_strs,
p_border, ratio_w, ratio_h, p_border, ratio_w, ratio_h,
src_w, src_h, self.valid_set) src_w, src_h, self.valid_set)
......
...@@ -172,16 +172,16 @@ If you want to use OCR engine to obtain end-to-end prediction results, you can u ...@@ -172,16 +172,16 @@ If you want to use OCR engine to obtain end-to-end prediction results, you can u
# just predict using SER trained model # just predict using SER trained model
python3 tools/infer_kie_token_ser.py \ python3 tools/infer_kie_token_ser.py \
-c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \ -c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
-o Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \ -o Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./ppstructure/docs/kie/input/zh_val_42.jpg Global.infer_img=./ppstructure/docs/kie/input/zh_val_42.jpg
# predict using SER and RE trained model at the same time # predict using SER and RE trained model at the same time
python3 ./tools/infer_kie_token_ser_re.py \ python3 ./tools/infer_kie_token_ser_re.py \
-c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \ -c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \
-o Architecture.Backbone.checkpoints=./pretrain_models/re_vi_layoutxlm_xfund_pretrained/best_accuracy \ -o Architecture.Backbone.checkpoints=./pretrained_model/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/image/zh_val_42.jpg \ Global.infer_img=./train_data/XFUND/zh_val/image/zh_val_42.jpg \
-c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \ -c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
-o_ser Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy -o_ser Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
``` ```
The visual result images and the predicted text file will be saved in the `Global.save_res_path` directory. The visual result images and the predicted text file will be saved in the `Global.save_res_path` directory.
...@@ -193,18 +193,18 @@ If you want to load the text detection and recognition results collected before, ...@@ -193,18 +193,18 @@ If you want to load the text detection and recognition results collected before,
# just predict using SER trained model # just predict using SER trained model
python3 tools/infer_kie_token_ser.py \ python3 tools/infer_kie_token_ser.py \
-c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \ -c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
-o Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \ -o Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/val.json \ Global.infer_img=./train_data/XFUND/zh_val/val.json \
Global.infer_mode=False Global.infer_mode=False
# predict using SER and RE trained model at the same time # predict using SER and RE trained model at the same time
python3 ./tools/infer_kie_token_ser_re.py \ python3 ./tools/infer_kie_token_ser_re.py \
-c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \ -c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \
-o Architecture.Backbone.checkpoints=./pretrain_models/re_vi_layoutxlm_xfund_pretrained/best_accuracy \ -o Architecture.Backbone.checkpoints=./pretrained_model/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/val.json \ Global.infer_img=./train_data/XFUND/zh_val/val.json \
Global.infer_mode=False \ Global.infer_mode=False \
-c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \ -c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
-o_ser Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy -o_ser Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
``` ```
#### 4.2.3 Inference using PaddleInference #### 4.2.3 Inference using PaddleInference
......
...@@ -156,16 +156,16 @@ wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/re_vi_layou ...@@ -156,16 +156,16 @@ wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/re_vi_layou
# 仅预测SER模型 # 仅预测SER模型
python3 tools/infer_kie_token_ser.py \ python3 tools/infer_kie_token_ser.py \
-c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \ -c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
-o Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \ -o Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./ppstructure/docs/kie/input/zh_val_42.jpg Global.infer_img=./ppstructure/docs/kie/input/zh_val_42.jpg
# SER + RE模型串联 # SER + RE模型串联
python3 ./tools/infer_kie_token_ser_re.py \ python3 ./tools/infer_kie_token_ser_re.py \
-c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \ -c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \
-o Architecture.Backbone.checkpoints=./pretrain_models/re_vi_layoutxlm_xfund_pretrained/best_accuracy \ -o Architecture.Backbone.checkpoints=./pretrained_model/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/image/zh_val_42.jpg \ Global.infer_img=./train_data/XFUND/zh_val/image/zh_val_42.jpg \
-c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \ -c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
-o_ser Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy -o_ser Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
``` ```
`Global.save_res_path`目录中会保存可视化的结果图像以及预测的文本文件。 `Global.save_res_path`目录中会保存可视化的结果图像以及预测的文本文件。
...@@ -177,18 +177,18 @@ python3 ./tools/infer_kie_token_ser_re.py \ ...@@ -177,18 +177,18 @@ python3 ./tools/infer_kie_token_ser_re.py \
# 仅预测SER模型 # 仅预测SER模型
python3 tools/infer_kie_token_ser.py \ python3 tools/infer_kie_token_ser.py \
-c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \ -c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
-o Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \ -o Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/val.json \ Global.infer_img=./train_data/XFUND/zh_val/val.json \
Global.infer_mode=False Global.infer_mode=False
# SER + RE模型串联 # SER + RE模型串联
python3 ./tools/infer_kie_token_ser_re.py \ python3 ./tools/infer_kie_token_ser_re.py \
-c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \ -c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \
-o Architecture.Backbone.checkpoints=./pretrain_models/re_vi_layoutxlm_xfund_pretrained/best_accuracy \ -o Architecture.Backbone.checkpoints=./pretrained_model/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/val.json \ Global.infer_img=./train_data/XFUND/zh_val/val.json \
Global.infer_mode=False \ Global.infer_mode=False \
-c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \ -c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
-o_ser Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy -o_ser Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
``` ```
#### 4.2.3 基于PaddleInference的预测 #### 4.2.3 基于PaddleInference的预测
......
...@@ -68,6 +68,7 @@ def build_pre_process_list(args): ...@@ -68,6 +68,7 @@ def build_pre_process_list(args):
class TableStructurer(object): class TableStructurer(object):
def __init__(self, args): def __init__(self, args):
self.use_onnx = args.use_onnx
pre_process_list = build_pre_process_list(args) pre_process_list = build_pre_process_list(args)
if args.table_algorithm not in ['TableMaster']: if args.table_algorithm not in ['TableMaster']:
postprocess_params = { postprocess_params = {
...@@ -98,7 +99,11 @@ class TableStructurer(object): ...@@ -98,7 +99,11 @@ class TableStructurer(object):
return None, 0 return None, 0
img = np.expand_dims(img, axis=0) img = np.expand_dims(img, axis=0)
img = img.copy() img = img.copy()
if self.use_onnx:
input_dict = {}
input_dict[self.input_tensor.name] = img
outputs = self.predictor.run(self.output_tensors, input_dict)
else:
self.input_tensor.copy_from_cpu(img) self.input_tensor.copy_from_cpu(img)
self.predictor.run() self.predictor.run()
outputs = [] outputs = []
......
#!/bin/bash #!/bin/bash
source test_tipc/common_func.sh source test_tipc/common_func.sh
# set env
python=python
export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
export frame_version=${str_tmp%%.post*}
export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
# run benchmark sh # run benchmark sh
# Usage: # Usage:
# bash run_benchmark_train.sh config.txt params # bash run_benchmark_train.sh config.txt params
...@@ -86,6 +80,13 @@ dataline=`cat $FILENAME` ...@@ -86,6 +80,13 @@ dataline=`cat $FILENAME`
IFS=$'\n' IFS=$'\n'
lines=(${dataline}) lines=(${dataline})
model_name=$(func_parser_value "${lines[1]}") model_name=$(func_parser_value "${lines[1]}")
python_name=$(func_parser_value "${lines[2]}")
# set env
python=${python_name}
export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
export frame_version=${str_tmp%%.post*}
export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
# 获取benchmark_params所在的行数 # 获取benchmark_params所在的行数
line_num=`grep -n "train_benchmark_params" $FILENAME | cut -d ":" -f 1` line_num=`grep -n "train_benchmark_params" $FILENAME | cut -d ":" -f 1`
......
...@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ ...@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
null:null null:null
## ##
trainer:norm_train trainer:norm_train
norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained Global.print_batch_step=1 Train.loader.shuffle=false norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained Global.print_batch_step=2 Train.loader.shuffle=false
pact_train:null pact_train:null
fpgm_train:null fpgm_train:null
distill_train:null distill_train:null
......
===========================train_params===========================
model_name:det_r18_ct
python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_lite_infer=4
Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./train_data/total_text/test/rgb/
null:null
##
trainer:norm_train
norm_train:tools/train.py -c configs/det/det_r18_vd_ct.yml -o Global.print_batch_step=1 Train.loader.shuffle=false
quant_export:null
fpgm_export:null
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:tools/eval.py -c configs/det/det_r18_vd_ct.yml -o
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.checkpoints:
norm_export:tools/export_model.py -c configs/det/det_r18_vd_ct.yml -o
quant_export:null
fpgm_export:null
distill_export:null
export1:null
export2:null
##
train_model:./inference/det_r18_vd_ct/best_accuracy
infer_export:tools/export_model.py -c configs/det/det_r18_vd_ct.yml -o
infer_quant:False
inference:tools/infer/predict_det.py
--use_gpu:True|False
--enable_mkldnn:False
--cpu_threads:6
--rec_batch_num:1
--use_tensorrt:False
--precision:fp32
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
--save_log_path:null
--benchmark:True
null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
\ No newline at end of file
===========================cpp_infer_params===========================
model_name:en_table_structure
use_opencv:True
infer_model:./inference/en_ppocr_mobile_v2.0_table_structure_infer/
infer_quant:False
inference:./deploy/cpp_infer/build/ppocr --rec_img_h=32 --det_model_dir=./inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=./inference/en_ppocr_mobile_v2.0_table_rec_infer --rec_char_dict_path=./ppocr/utils/dict/table_dict.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict.txt --limit_side_len=736 --limit_type=min --output=./output/table --merge_no_span_structure=False --type=structure --table=True
--use_gpu:True|False
--enable_mkldnn:False
--cpu_threads:6
--rec_batch_num:6
--use_tensorrt:False
--precision:fp32
--table_model_dir:
--image_dir:./ppstructure/docs/table/table.jpg
null:null
--benchmark:True
--det:True
--rec:True
--cls:False
--use_angle_cls:False
\ No newline at end of file
Global:
use_gpu: True
epoch_num: &epoch_num 200
log_smooth_window: 10
print_batch_step: 10
save_model_dir: ./output/ser_layoutxlm_xfund_zh
save_epoch_step: 2000
# evaluation is run every 10 iterations after the 0th iteration
eval_batch_step: [ 0, 187 ]
cal_metric_during_train: False
save_inference_dir:
use_visualdl: False
seed: 2022
infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
save_res_path: ./output/ser_layoutxlm_xfund_zh/res
Architecture:
model_type: kie
algorithm: &algorithm "LayoutXLM"
Transform:
Backbone:
name: LayoutXLMForSer
pretrained: True
checkpoints:
num_classes: &num_classes 7
Loss:
name: VQASerTokenLayoutLMLoss
num_classes: *num_classes
key: "backbone_out"
Optimizer:
name: AdamW
beta1: 0.9
beta2: 0.999
lr:
name: Linear
learning_rate: 0.00005
epochs: *epoch_num
warmup_epoch: 2
regularizer:
name: L2
factor: 0.00000
PostProcess:
name: VQASerTokenLayoutLMPostProcess
class_path: &class_path train_data/XFUND/class_list_xfun.txt
Metric:
name: VQASerTokenMetric
main_indicator: hmean
Train:
dataset:
name: SimpleDataSet
data_dir: train_data/XFUND/zh_train/image
label_file_list:
- train_data/XFUND/zh_train/train.json
ratio_list: [ 1.0 ]
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- VQATokenLabelEncode: # Class handling label
contains_re: False
algorithm: *algorithm
class_path: *class_path
- VQATokenPad:
max_seq_len: &max_seq_len 512
return_attention_mask: True
- VQASerTokenChunk:
max_seq_len: *max_seq_len
- Resize:
size: [224,224]
- NormalizeImage:
scale: 1
mean: [ 123.675, 116.28, 103.53 ]
std: [ 58.395, 57.12, 57.375 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order
loader:
shuffle: True
drop_last: False
batch_size_per_card: 8
num_workers: 4
Eval:
dataset:
name: SimpleDataSet
data_dir: train_data/XFUND/zh_val/image
label_file_list:
- train_data/XFUND/zh_val/val.json
transforms:
- DecodeImage: # load image
img_mode: RGB
channel_first: False
- VQATokenLabelEncode: # Class handling label
contains_re: False
algorithm: *algorithm
class_path: *class_path
- VQATokenPad:
max_seq_len: *max_seq_len
return_attention_mask: True
- VQASerTokenChunk:
max_seq_len: *max_seq_len
- Resize:
size: [224,224]
- NormalizeImage:
scale: 1
mean: [ 123.675, 116.28, 103.53 ]
std: [ 58.395, 57.12, 57.375 ]
order: 'hwc'
- ToCHWImage:
- KeepKeys:
keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size_per_card: 8
num_workers: 4
...@@ -13,7 +13,7 @@ train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg ...@@ -13,7 +13,7 @@ train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg
null:null null:null
## ##
trainer:norm_train trainer:norm_train
norm_train:tools/train.py -c configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml -o Global.print_batch_step=1 Global.eval_batch_step=[1000,1000] Train.loader.shuffle=false norm_train:tools/train.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Global.print_batch_step=1 Global.eval_batch_step=[1000,1000] Train.loader.shuffle=false
pact_train:null pact_train:null
fpgm_train:null fpgm_train:null
distill_train:null distill_train:null
...@@ -27,7 +27,7 @@ null:null ...@@ -27,7 +27,7 @@ null:null
===========================infer_params=========================== ===========================infer_params===========================
Global.save_inference_dir:./output/ Global.save_inference_dir:./output/
Architecture.Backbone.checkpoints: Architecture.Backbone.checkpoints:
norm_export:tools/export_model.py -c configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml -o norm_export:tools/export_model.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
quant_export: quant_export:
fpgm_export: fpgm_export:
distill_export:null distill_export:null
......
===========================train_params===========================
model_name:layoutxlm_ser_PACT
python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:fp32
Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8
Architecture.Backbone.checkpoints:pretrain_models/ser_LayoutXLM_xfun_zh
train_model_name:latest
train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg
null:null
##
trainer:pact_train
norm_train:null
pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
fpgm_train:null
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:null
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Architecture.Backbone.checkpoints:
norm_export:null
quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
fpgm_export: null
distill_export:null
export1:null
export2:null
##
infer_model:null
infer_export:null
infer_quant:False
inference:ppstructure/kie/predict_kie_token_ser.py --kie_algorithm=LayoutXLM --ser_dict_path=train_data/XFUND/class_list_xfun.txt --output=output
--use_gpu:True|False
--enable_mkldnn:False
--cpu_threads:6
--rec_batch_num:1
--use_tensorrt:False
--precision:fp32
--ser_model_dir:
--image_dir:./ppstructure/docs/kie/input/zh_val_42.jpg
null:null
--benchmark:False
null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,224,224]}]
===========================train_params===========================
model_name:layoutxlm_ser_KL
python:python3.7
Global.pretrained_model:
Global.save_inference_dir:null
infer_model:./inference/ser_LayoutXLM_xfun_zh_infer/
infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Train.loader.batch_size_per_card=1 Eval.loader.batch_size_per_card=1
infer_quant:True
inference:ppstructure/kie/predict_kie_token_ser.py --kie_algorithm=LayoutXLM --ser_dict_path=./train_data/XFUND/class_list_xfun.txt
--use_gpu:True|False
--enable_mkldnn:False
--cpu_threads:6
--rec_batch_num:1
--use_tensorrt:False
--precision:int8
--ser_model_dir:
--image_dir:./ppstructure/docs/kie/input/zh_val_42.jpg
null:null
--benchmark:False
null:null
null:null
===========================cpp_infer_params===========================
model_name:slanet
use_opencv:True
infer_model:./inference/ch_ppstructure_mobile_v2.0_SLANet_infer/
infer_quant:False
inference:./deploy/cpp_infer/build/ppocr --det_model_dir=./inference/ch_PP-OCRv3_det_infer --rec_model_dir=./inference/ch_PP-OCRv3_rec_infer --output=./output/table --type=structure --table=True --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict_ch.txt
--use_gpu:True|False
--enable_mkldnn:False
--cpu_threads:6
--rec_batch_num:6
--use_tensorrt:False
--precision:fp32
--table_model_dir:
--image_dir:./ppstructure/docs/table/table.jpg
null:null
--benchmark:True
--det:True
--rec:True
--cls:False
--use_angle_cls:False
\ No newline at end of file
...@@ -37,8 +37,8 @@ export2:null ...@@ -37,8 +37,8 @@ export2:null
infer_model:null infer_model:null
infer_export:null infer_export:null
infer_quant:False infer_quant:False
inference:ppstructure/table/predict_structure.py --table_char_dict_path=./ppocr/utils/dict/table_master_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --output ./output/table --table_algorithm=TableMaster --table_max_len=480 inference:ppstructure/table/predict_structure.py --table_char_dict_path=./ppocr/utils/dict/table_master_structure_dict.txt --output ./output/table --table_algorithm=TableMaster --table_max_len=480
--use_gpu:True|False --use_gpu:True
--enable_mkldnn:False --enable_mkldnn:False
--cpu_threads:6 --cpu_threads:6
--rec_batch_num:1 --rec_batch_num:1
......
...@@ -24,12 +24,7 @@ Jetson端基础训练预测功能测试的主程序为`test_inference_inference. ...@@ -24,12 +24,7 @@ Jetson端基础训练预测功能测试的主程序为`test_inference_inference.
``` ```
- 安装autolog(规范化日志输出工具) - 安装autolog(规范化日志输出工具)
``` ```
git clone https://github.com/LDOUBLEV/AutoLog pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
cd AutoLog
pip install -r requirements.txt
python setup.py bdist_wheel
pip install ./dist/auto_log-1.0.0-py3-none-any.whl
cd ../
``` ```
- 安装PaddleSlim (可选) - 安装PaddleSlim (可选)
``` ```
......
# Mac端基础训练预测功能测试 # Mac端基础训练预测功能测试
Mac端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,可以测试基于Python的模型CPU训练,包括裁剪、量化、蒸馏训练,以及评估、CPU推理等基本功能。 Mac端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,可以测试基于Python的模型CPU训练,包括裁剪、PACT在线量化、蒸馏训练,以及评估、CPU推理等基本功能。
注:Mac端测试用法同linux端测试方法类似,但是无需测试需要在GPU上运行的测试。 注:Mac端测试用法同linux端测试方法类似,但是无需测试需要在GPU上运行的测试。
...@@ -10,7 +10,7 @@ Mac端基础训练预测功能测试的主程序为`test_train_inference_python. ...@@ -10,7 +10,7 @@ Mac端基础训练预测功能测试的主程序为`test_train_inference_python.
| 算法名称 | 模型名称 | 单机单卡(CPU) | 单机多卡 | 多机多卡 | 模型压缩(CPU) | | 算法名称 | 模型名称 | 单机单卡(CPU) | 单机多卡 | 多机多卡 | 模型压缩(CPU) |
| :---- | :---- | :---- | :---- | :---- | :---- | | :---- | :---- | :---- | :---- | :---- | :---- |
| DB | ch_ppocr_mobile_v2.0_det| 正常训练 | - | - | 正常训练:FPGM裁剪、PACT量化 <br> 离线量化(无需训练) | | DB | ch_ppocr_mobile_v2.0_det| 正常训练 | - | - | 正常训练:FPGM裁剪、PACT量化 |
- 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型``量化模型`,这两类模型对应的预测功能汇总如下, - 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型``量化模型`,这两类模型对应的预测功能汇总如下,
...@@ -26,19 +26,14 @@ Mac端基础训练预测功能测试的主程序为`test_train_inference_python. ...@@ -26,19 +26,14 @@ Mac端基础训练预测功能测试的主程序为`test_train_inference_python.
Mac端无GPU,环境准备只需要Python环境即可,安装PaddlePaddle等依赖参考下述文档。 Mac端无GPU,环境准备只需要Python环境即可,安装PaddlePaddle等依赖参考下述文档。
### 2.1 安装依赖 ### 2.1 安装依赖
- 安装PaddlePaddle >= 2.0 - 安装PaddlePaddle >= 2.3
- 安装PaddleOCR依赖 - 安装PaddleOCR依赖
``` ```
pip install -r ../requirements.txt pip install -r ../requirements.txt
``` ```
- 安装autolog(规范化日志输出工具) - 安装autolog(规范化日志输出工具)
``` ```
git clone https://github.com/LDOUBLEV/AutoLog pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
cd AutoLog
pip install -r requirements.txt
python setup.py bdist_wheel
pip install ./dist/auto_log-1.0.0-py3-none-any.whl
cd ../
``` ```
- 安装PaddleSlim (可选) - 安装PaddleSlim (可选)
``` ```
...@@ -49,53 +44,46 @@ Mac端无GPU,环境准备只需要Python环境即可,安装PaddlePaddle等 ...@@ -49,53 +44,46 @@ Mac端无GPU,环境准备只需要Python环境即可,安装PaddlePaddle等
### 2.2 功能测试 ### 2.2 功能测试
先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`,model_name/lite_train_lite_infer/*.log`格式的日志文件。
`test_train_inference_python.sh`包含5种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是: `test_train_inference_python.sh`包含基础链条的4种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
- 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; - 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度;
```shell ```shell
# 同linux端运行不同的是,Mac端测试使用新的配置文件mac_ppocr_det_mobile_params.txt, # 同linux端运行不同的是,Mac端测试使用新的配置文件mac_ppocr_det_mobile_params.txt,
# 配置文件中默认去掉了GPU和mkldnn相关的测试链条 # 配置文件中默认去掉了GPU和mkldnn相关的测试链条
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_lite_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_lite_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_lite_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_lite_infer'
``` ```
- 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; - 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理;
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_whole_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_whole_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_whole_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_whole_infer'
``` ```
- 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; - 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度;
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer'
# 用法1: # 用法1:
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer'
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer' '1' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer' '1'
``` ```
- 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度;(Mac端不建议运行此模式) - 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度;(Mac端不建议运行此模式)
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_train_whole_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_train_whole_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_train_whole_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_train_whole_infer'
```
- 模式5:klquant_whole_infer,测试离线量化;
```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_mac_cpu.txt 'klquant_whole_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_mac_cpu.txt 'klquant_whole_infer'
``` ```
运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如`lite_train_lite_infer`模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: 运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如`lite_train_lite_infer`模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件:
``` ```
test_tipc/output/ test_tipc/output/model_name/lite_train_lite_infer/
|- results_python.log # 运行指令状态的日志 |- results_python.log # 运行指令状态的日志
|- norm_train_gpus_-1_autocast_null/ # CPU上正常训练的训练日志和模型保存文件夹 |- norm_train_gpus_-1_autocast_null/ # CPU上正常训练的训练日志和模型保存文件夹
|- pact_train_gpus_-1_autocast_null/ # CPU上量化训练的训练日志和模型保存文件夹
...... ......
|- python_infer_cpu_usemkldnn_False_threads_1_batchsize_1.log # CPU上关闭Mkldnn线程数设置为1,测试batch_size=1条件下的预测运行日志 |- python_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log # CPU上关闭Mkldnn线程数设置为1,测试batch_size=1条件下的fp32精度预测运行日志
...... ......
``` ```
......
...@@ -17,15 +17,15 @@ C++预测功能测试的主程序为`test_inference_cpp.sh`,可以测试基于 ...@@ -17,15 +17,15 @@ C++预测功能测试的主程序为`test_inference_cpp.sh`,可以测试基于
运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。 运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。
### 2.1 功能测试 ### 2.1 功能测试
先运行`prepare.sh`准备数据和模型,然后运行`test_inference_cpp.sh`进行测试,最终在```test_tipc/output```目录下生成`cpp_infer_*.log`后缀的日志文件。 先运行`prepare.sh`准备数据和模型,然后运行`test_inference_cpp.sh`进行测试,最终在```test_tipc/output/{model_name}/cpp_infer```目录下生成`cpp_infer_*.log`后缀的日志文件。
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt "cpp_infer" bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2_rec/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt "cpp_infer"
# 用法1: # 用法1:
bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_PP-OCRv2_rec/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt '1' bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_PP-OCRv2_rec/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt '1'
``` ```
运行预测指令后,在`test_tipc/output`文件夹下自动会保存运行日志,包括以下文件: 运行预测指令后,在`test_tipc/output`文件夹下自动会保存运行日志,包括以下文件:
...@@ -33,23 +33,21 @@ bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_ppocr_mobile_v2.0_det/ ...@@ -33,23 +33,21 @@ bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_ppocr_mobile_v2.0_det/
```shell ```shell
test_tipc/output/ test_tipc/output/
|- results_cpp.log # 运行指令状态的日志 |- results_cpp.log # 运行指令状态的日志
|- cpp_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log # CPU上不开启Mkldnn,线程数设置为1,测试batch_size=1条件下的预测运行日志 |- cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_6.log # CPU上不开启Mkldnn,线程数设置为6,测试batch_size=6条件下的预测运行日志
|- cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log # CPU上不开启Mkldnn,线程数设置为6,测试batch_size=1条件下的预测运行日志 |- cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_6.log # GPU上不开启TensorRT,测试batch_size=6的fp32精度预测日志
|- cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log # GPU上不开启TensorRT,测试batch_size=1的fp32精度预测日志
|- cpp_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log # GPU上开启TensorRT,测试batch_size=1的fp16精度预测日志
...... ......
``` ```
其中results_cpp.log中包含了每条指令的运行状态,如果运行成功会输出: 其中results_cpp.log中包含了每条指令的运行状态,如果运行成功会输出:
``` ```
Run successfully with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmar k=True > ./test_tipc/output/cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log 2>&1 ! [33m Run successfully with command - ch_PP-OCRv2_rec - ./deploy/cpp_infer/build/ppocr --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_img_h=32 --use_gpu=True --use_tensorrt=False --precision=fp32 --rec_model_dir=./inference/ch_PP-OCRv2_rec_infer/ --rec_batch_num=6 --image_dir=./inference/rec_inference/ --benchmark=True --det=False --rec=True --cls=False --use_angle_cls=False > ./test_tipc/output/ch_PP-OCRv2_rec/cpp_infer/cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_6.log 2>&1 ! 
Run successfully with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=True --use_tensorrt=False --precision=fp32 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark =True > ./test_tipc/output/cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log 2>&1 !  Run successfully with command - ch_PP-OCRv2_rec - ./deploy/cpp_infer/build/ppocr --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_img_h=32 --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --rec_model_dir=./inference/ch_PP-OCRv2_rec_infer/ --rec_batch_num=6 --image_dir=./inference/rec_inference/ --benchmark=True --det=False --rec=True --cls=False --use_angle_cls=False > ./test_tipc/output/ch_PP-OCRv2_rec/cpp_infer/cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_6.log 2>&1 ! 
...... ......
``` ```
如果运行失败,会输出: 如果运行失败,会输出:
``` ```
Run failed with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=True --use_tensorrt=True --precision=fp32 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/cpp_infer_gpu_usetrt_True_precision_fp32_batchsize_1.log 2>&1 ! Run failed with command - ch_PP-OCRv2_rec - ./deploy/cpp_infer/build/ppocr --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_img_h=32 --use_gpu=True --use_tensorrt=False --precision=fp32 --rec_model_dir=./inference/ch_PP-OCRv2_rec_infer/ --rec_batch_num=6 --image_dir=./inference/rec_inference/ --benchmark=True --det=False --rec=True --cls=False --use_angle_cls=False > ./test_tipc/output/ch_PP-OCRv2_rec/cpp_infer/cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_6.log 2>&1 !
Run failed with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=True --use_tensorrt=True --precision=fp16 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/cpp_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log 2>&1 ! Run failed with command - ch_PP-OCRv2_rec - ./deploy/cpp_infer/build/ppocr --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_img_h=32 --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --rec_model_dir=./inference/ch_PP-OCRv2_rec_infer/ --rec_batch_num=6 --image_dir=./inference/rec_inference/ --benchmark=True --det=False --rec=True --cls=False --use_angle_cls=False > ./test_tipc/output/ch_PP-OCRv2_rec/cpp_infer/cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_6.log 2>&1 !
...... ......
``` ```
可以很方便的根据results_cpp.log中的内容判定哪一个指令运行错误。 可以很方便的根据results_cpp.log中的内容判定哪一个指令运行错误。
......
...@@ -15,29 +15,30 @@ PaddleServing预测功能测试的主程序为`test_paddle2onnx.sh`,可以测 ...@@ -15,29 +15,30 @@ PaddleServing预测功能测试的主程序为`test_paddle2onnx.sh`,可以测
## 2. 测试流程 ## 2. 测试流程
### 2.1 功能测试 ### 2.1 功能测试
先运行`prepare.sh`准备数据和模型,然后运行`test_paddle2onnx.sh`进行测试,最终在```test_tipc/output```目录下生成`paddle2onnx_infer_*.log`后缀的日志文件。 先运行`prepare.sh`准备数据和模型,然后运行`test_paddle2onnx.sh`进行测试,最终在```test_tipc/output/{model_name}/paddle2onnx```目录下生成`paddle2onnx_infer_*.log`后缀的日志文件。
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt "paddle2onnx_infer" bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt "paddle2onnx_infer"
# 用法: # 用法:
bash test_tipc/test_paddle2onnx.sh ./test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt bash test_tipc/test_paddle2onnx.sh ./test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt
``` ```
#### 运行结果 #### 运行结果
各测试的运行情况会打印在 `test_tipc/output/results_paddle2onnx.log` 中: 各测试的运行情况会打印在 `test_tipc/output/{model_name}/paddle2onnx/results_paddle2onnx.log` 中:
运行成功时会输出: 运行成功时会输出:
``` ```
Run successfully with command - paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --model_filename=inference.pdmodel --params_filename=inference.pdiparams --save_file=./inference/det_mobile_onnx/model.onnx --opset_version=10 --enable_onnx_checker=True! Run successfully with command - ch_PP-OCRv2_det - paddle2onnx --model_dir=./inference/ch_PP-OCRv2_det_infer/ --model_filename=inference.pdmodel --params_filename=inference.pdiparams --save_file=./inference/det_v2_onnx/model.onnx --opset_version=10 --enable_onnx_checker=True!
Run successfully with command - python test_tipc/onnx_inference/predict_det.py --use_gpu=False --image_dir=./inference/ch_det_data_50/all-sum-510/ --det_model_dir=./inference/det_mobile_onnx/model.onnx 2>&1 ! Run successfully with command - ch_PP-OCRv2_det - python3.7 tools/infer/predict_det.py --use_gpu=True --image_dir=./inference/ch_det_data_50/all-sum-510/ --det_model_dir=./inference/det_v2_onnx/model.onnx --use_onnx=True > ./test_tipc/output/ch_PP-OCRv2_det/paddle2onnx/paddle2onnx_infer_gpu.log 2>&1 !
Run successfully with command - ch_PP-OCRv2_det - python3.7 tools/infer/predict_det.py --use_gpu=False --image_dir=./inference/ch_det_data_50/all-sum-510/ --det_model_dir=./inference/det_v2_onnx/model.onnx --use_onnx=True > ./test_tipc/output/ch_PP-OCRv2_det/paddle2onnx/paddle2onnx_infer_cpu.log 2>&1 !
``` ```
运行失败时会输出: 运行失败时会输出:
``` ```
Run failed with command - paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --model_filename=inference.pdmodel --params_filename=inference.pdiparams --save_file=./inference/det_mobile_onnx/model.onnx --opset_version=10 --enable_onnx_checker=True! Run failed with command - ch_PP-OCRv2_det - paddle2onnx --model_dir=./inference/ch_PP-OCRv2_det_infer/ --model_filename=inference.pdmodel --params_filename=inference.pdiparams --save_file=./inference/det_v2_onnx/model.onnx --opset_version=10 --enable_onnx_checker=True!
... ...
``` ```
......
# Linux GPU/CPU KL离线量化训练推理测试
Linux GPU/CPU KL离线量化训练推理测试的主程序为`test_ptq_inference_python.sh`,可以测试基于Python的模型训练、评估、推理等基本功能。
## 1. 测试结论汇总
- 训练相关:
| 算法名称 | 模型名称 | 单机单卡 |
| :----: | :----: | :----: |
| | model_name | KL离线量化训练 |
- 推理相关:
| 算法名称 | 模型名称 | device_CPU | device_GPU | batchsize |
| :----: | :----: | :----: | :----: | :----: |
| | model_name | 支持 | 支持 | 1 |
## 2. 测试流程
### 2.1 准备数据和模型
先运行`prepare.sh`准备数据和模型,然后运行`test_ptq_inference_python.sh`进行测试,最终在```test_tipc/output/{model_name}/whole_infer```目录下生成`python_infer_*.log`后缀的日志文件。
```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_ptq_infer_python.txt "whole_infer"
# 用法:
bash test_tipc/test_ptq_inference_python.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_ptq_infer_python.txt "whole_infer"
```
#### 运行结果
各测试的运行情况会打印在 `test_tipc/output/{model_name}/paddle2onnx/results_paddle2onnx.log` 中:
运行成功时会输出:
```
Run successfully with command - ch_PP-OCRv2_det_KL - python3.7 deploy/slim/quantization/quant_kl.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o Global.pretrained_model=./inference/ch_PP-OCRv2_det_infer/ Global.save_inference_dir=./inference/ch_PP-OCRv2_det_infer/_klquant > ./test_tipc/output/ch_PP-OCRv2_det_KL/whole_infer/whole_infer_export_0.log 2>&1 !
Run successfully with command - ch_PP-OCRv2_det_KL - python3.7 tools/infer/predict_det.py --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --det_model_dir=./inference/ch_PP-OCRv2_det_infer/_klquant --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --precision=int8 > ./test_tipc/output/ch_PP-OCRv2_det_KL/whole_infer/python_infer_cpu_usemkldnn_False_threads_6_precision_int8_batchsize_1.log 2>&1 !
Run successfully with command - ch_PP-OCRv2_det_KL - python3.7 tools/infer/predict_det.py --use_gpu=True --use_tensorrt=False --precision=int8 --det_model_dir=./inference/ch_PP-OCRv2_det_infer/_klquant --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ > ./test_tipc/output/ch_PP-OCRv2_det_KL/whole_infer/python_infer_gpu_usetrt_False_precision_int8_batchsize_1.log 2>&1 !
```
运行失败时会输出:
```
Run failed with command - ch_PP-OCRv2_det_KL - python3.7 deploy/slim/quantization/quant_kl.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o Global.pretrained_model=./inference/ch_PP-OCRv2_det_infer/ Global.save_inference_dir=./inference/ch_PP-OCRv2_det_infer/_klquant > ./test_tipc/output/ch_PP-OCRv2_det_KL/whole_infer/whole_infer_export_0.log 2>&1 !
...
```
## 3. 更多教程
本文档为功能测试用,更详细的量化使用教程请参考:[量化](../../deploy/slim/quantization/README.md)
...@@ -18,71 +18,44 @@ PaddleServing预测功能测试的主程序为`test_serving_infer_python.sh`和` ...@@ -18,71 +18,44 @@ PaddleServing预测功能测试的主程序为`test_serving_infer_python.sh`和`
### 2.1 功能测试 ### 2.1 功能测试
**python serving** **python serving**
先运行`prepare.sh`准备数据和模型,然后运行`test_serving_infer_python.sh`进行测试,最终在```test_tipc/output```目录下生成`serving_infer_python*.log`后缀的日志文件。 先运行`prepare.sh`准备数据和模型,然后运行`test_serving_infer_python.sh`进行测试,最终在```test_tipc/output/{model_name}/serving_infer/python```目录下生成`python_*.log`后缀的日志文件。
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt "serving_infer" bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt "serving_infer"
# 用法: # 用法:
bash test_tipc/test_serving_infer_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt "serving_infer" bash test_tipc/test_serving_infer_python.sh ./test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt "serving_infer"
``` ```
**cpp serving** **cpp serving**
先运行`prepare.sh`准备数据和模型,然后运行`test_serving_infer_cpp.sh`进行测试,最终在```test_tipc/output```目录下生成`serving_infer_cpp*.log`后缀的日志文件。 先运行`prepare.sh`准备数据和模型,然后运行`test_serving_infer_cpp.sh`进行测试,最终在```test_tipc/output/{model_name}/serving_infer/cpp```目录下生成`cpp_*.log`后缀的日志文件。
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt "serving_infer" bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt "serving_infer"
# 用法: # 用法:
bash test_tipc/test_serving_infer_cpp.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt "serving_infer" bash test_tipc/test_serving_infer_cpp.sh ./test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt "serving_infer"
``` ```
#### 运行结果 #### 运行结果
各测试的运行情况会打印在 `test_tipc/output/results_serving.log` 中: 各测试的运行情况会打印在 `test_tipc/output/{model_name}/serving_infer/python(cpp)/results_python(cpp)_serving.log` 中:
运行成功时会输出: 运行成功时会输出:
``` ```
Run successfully with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log 2>&1 ! Run successfully with command - ch_PP-OCRv2_rec - nohup python3.7 web_service_rec.py --config=config.yml --opt op.rec.concurrency="1" op.det.local_service_conf.devices= op.det.local_service_conf.use_mkldnn=False op.det.local_service_conf.thread_num=6 op.rec.local_service_conf.model_config=ppocr_rec_v2_serving > ./test_tipc/output/ch_PP-OCRv2_rec/serving_infer/python/python_server_cpu_usemkldnn_False_threads_6.log 2>&1 &!
Run successfully with command - xxxxx Run successfully with command - ch_PP-OCRv2_rec - python3.7 pipeline_http_client.py --det=False --image_dir=../../inference/rec_inference > ./test_tipc/output/ch_PP-OCRv2_rec/serving_infer/python/python_client_cpu_pipeline_http_usemkldnn_False_threads_6_batchsize_1.log 2>&1 !
... ...
``` ```
运行失败时会输出: 运行失败时会输出:
``` ```
Run failed with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log 2>&1 ! Run failed with command - ch_PP-OCRv2_rec - nohup python3.7 web_service_rec.py --config=config.yml --opt op.rec.concurrency="1" op.det.local_service_conf.devices= op.det.local_service_conf.use_mkldnn=False op.det.local_service_conf.thread_num=6 op.rec.local_service_conf.model_config=ppocr_rec_v2_serving > ./test_tipc/output/ch_PP-OCRv2_rec/serving_infer/python/python_server_cpu_usemkldnn_False_threads_6.log 2>&1 &!
Run failed with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_6_batchsize_1.log 2>&1 ! Run failed with command - ch_PP-OCRv2_rec - python3.7 pipeline_http_client.py --det=False --image_dir=../../inference/rec_inference > ./test_tipc/output/ch_PP-OCRv2_rec/serving_infer/python/python_client_cpu_pipeline_http_usemkldnn_False_threads_6_batchsize_1.log 2>&1 !
Run failed with command - xxxxx
... ...
``` ```
详细的预测结果会存在 test_tipc/output/ 文件夹下,例如`server_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log`中会返回检测框的坐标: 详细的预测结果会存在 test_tipc/output/{model_name}/serving_infer/python(cpp)/ 文件夹下
```
{'err_no': 0, 'err_msg': '', 'key': ['dt_boxes'], 'value': ['[[[ 78. 642.]\n [409. 640.]\n [409. 657.]\n
[ 78. 659.]]\n\n [[ 75. 614.]\n [211. 614.]\n [211. 635.]\n [ 75. 635.]]\n\n
[[103. 554.]\n [135. 554.]\n [135. 575.]\n [103. 575.]]\n\n [[ 75. 531.]\n
[347. 531.]\n [347. 549.]\n [ 75. 549.] ]\n\n [[ 76. 503.]\n [309. 498.]\n
[309. 521.]\n [ 76. 526.]]\n\n [[163. 462.]\n [317. 462.]\n [317. 493.]\n
[163. 493.]]\n\n [[324. 431.]\n [414. 431.]\n [414. 452.]\n [324. 452.]]\n\n
[[ 76. 412.]\n [208. 408.]\n [209. 424.]\n [ 76. 428.]]\n\n [[307. 409.]\n
[428. 409.]\n [428. 426.]\n [307 . 426.]]\n\n [[ 74. 385.]\n [217. 382.]\n
[217. 400.]\n [ 74. 403.]]\n\n [[308. 381.]\n [427. 380.]\n [427. 400.]\n
[308. 401.]]\n\n [[ 74. 363.]\n [195. 362.]\n [195. 378.]\n [ 74. 379.]]\n\n
[[303. 359.]\n [423. 357.]\n [423. 375.]\n [303. 377.]]\n\n [[ 70. 336.]\n
[239. 334.]\n [239. 354.]\ n [ 70. 356.]]\n\n [[ 70. 312.]\n [204. 310.]\n
[204. 327.]\n [ 70. 330.]]\n\n [[303. 308.]\n [419. 306.]\n [419. 326.]\n
[303. 328.]]\n\n [[113. 2 72.]\n [246. 270.]\n [247. 299.]\n [113. 301.]]\n\n
[[361. 269.]\n [384. 269.]\n [384. 296.]\n [361. 296.]]\n\n [[ 70. 250.]\n
[243. 246.]\n [243. 265.]\n [ 70. 269.]]\n\n [[ 65. 221.]\n [187. 220.]\n
[187. 240.]\n [ 65. 241.]]\n\n [[337. 216.]\n [382. 216.]\n [382. 240.]\n
[337. 240.]]\n\n [ [ 65. 196.]\n [247. 193.]\n [247. 213.]\n [ 65. 216.]]\n\n
[[296. 197.]\n [423. 191.]\n [424. 209.]\n [296. 215.]]\n\n [[ 65. 167.]\n [244. 167.]\n
[244. 186.]\n [ 65. 186.]]\n\n [[ 67. 139.]\n [290. 139.]\n [290. 159.]\n [ 67. 159.]]\n\n
[[ 68. 113.]\n [410. 113.]\n [410. 128.]\n [ 68. 129.] ]\n\n [[277. 87.]\n [416. 87.]\n
[416. 108.]\n [277. 108.]]\n\n [[ 79. 28.]\n [132. 28.]\n [132. 62.]\n [ 79. 62.]]\n\n
[[163. 17.]\n [410. 14.]\n [410. 50.]\n [163. 53.]]]']}
```
## 3. 更多教程 ## 3. 更多教程
......
# Linux端基础训练预测功能测试 # Linux端基础训练预测功能测试
Linux端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,可以测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏 Linux端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,可以测试基于Python的模型训练、评估、推理等基本功能,包括PACT在线量化
- Mac端基础训练预测功能测试参考[链接](./mac_test_train_inference_python.md) - Mac端基础训练预测功能测试参考[链接](./mac_test_train_inference_python.md)
- Windows端基础训练预测功能测试参考[链接](./win_test_train_inference_python.md) - Windows端基础训练预测功能测试参考[链接](./win_test_train_inference_python.md)
...@@ -11,13 +11,14 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho ...@@ -11,13 +11,14 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
| 算法名称 | 模型名称 | 单机单卡 | 单机多卡 | 多机多卡 | 模型压缩(单机多卡) | | 算法名称 | 模型名称 | 单机单卡 | 单机多卡 | 多机多卡 | 模型压缩(单机多卡) |
| :---- | :---- | :---- | :---- | :---- | :---- | | :---- | :---- | :---- | :---- | :---- | :---- |
| DB | ch_ppocr_mobile_v2.0_det| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 <br> 离线量化(无需训练) | | DB | ch_ppocr_mobile_v2_0_det| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 |
| DB | ch_ppocr_server_v2.0_det| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 <br> 离线量化(无需训练) | | DB | ch_ppocr_server_v2_0_det| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:FPGM裁剪、PACT量化 |
| CRNN | ch_ppocr_mobile_v2.0_rec| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:PACT量化 <br> 离线量化(无需训练) | | CRNN | ch_ppocr_mobile_v2_0_rec| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:PACT量化 |
| CRNN | ch_ppocr_server_v2.0_rec| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:PACT量化 <br> 离线量化(无需训练) | | CRNN | ch_ppocr_server_v2_0_rec| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练:PACT量化 |
|PP-OCR| ch_ppocr_mobile_v2.0| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | - | |PP-OCR| ch_ppocr_mobile_v2_0| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | - |
|PP-OCR| ch_ppocr_server_v2.0| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | - | |PP-OCR| ch_ppocr_server_v2_0| 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | - |
|PP-OCRv2| ch_PP-OCRv2 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | - | |PP-OCRv2| ch_PP-OCRv2 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | - |
|PP-OCRv3| ch_PP-OCRv3 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | 正常训练 <br> 混合精度 | - |
- 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型``量化模型`,这两类模型对应的预测功能汇总如下, - 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型``量化模型`,这两类模型对应的预测功能汇总如下,
...@@ -35,19 +36,14 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho ...@@ -35,19 +36,14 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。 运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。
### 2.1 安装依赖 ### 2.1 安装依赖
- 安装PaddlePaddle >= 2.0 - 安装PaddlePaddle >= 2.3
- 安装PaddleOCR依赖 - 安装PaddleOCR依赖
``` ```
pip3 install -r ../requirements.txt pip3 install -r ../requirements.txt
``` ```
- 安装autolog(规范化日志输出工具) - 安装autolog(规范化日志输出工具)
``` ```
git clone https://github.com/LDOUBLEV/AutoLog pip3 install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
cd AutoLog
pip3 install -r requirements.txt
python3 setup.py bdist_wheel
pip3 install ./dist/auto_log-1.0.0-py3-none-any.whl
cd ../
``` ```
- 安装PaddleSlim (可选) - 安装PaddleSlim (可选)
``` ```
...@@ -57,60 +53,57 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho ...@@ -57,60 +53,57 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
### 2.2 功能测试 ### 2.2 功能测试
先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 #### 2.2.1 基础训练推理链条
先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`,model_name/lite_train_lite_infer/*.log`格式的日志文件。
`test_train_inference_python.sh`包含5种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是: `test_train_inference_python.sh`包含基础链条的4种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
- 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; - 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度;
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'lite_train_lite_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'lite_train_lite_infer'
``` ```
- 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; - 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理;
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_whole_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'lite_train_whole_infer'
bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_whole_infer' bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'lite_train_whole_infer'
``` ```
- 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; - 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度;
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'whole_infer'
# 用法1: # 用法1:
bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_infer' bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'whole_infer'
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_infer' '1' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'whole_infer' '1'
``` ```
- 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度; - 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度;
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_train_whole_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'whole_train_whole_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_train_whole_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'whole_train_whole_infer'
```
- 模式5:klquant_whole_infer,测试离线量化;
```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt 'klquant_whole_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt 'klquant_whole_infer'
``` ```
运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: 运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件:
``` ```
test_tipc/output/ test_tipc/output/model_name/lite_train_lite_infer/
|- results_python.log # 运行指令状态的日志 |- results_python.log # 运行指令状态的日志
|- norm_train_gpus_0_autocast_null/ # GPU 0号卡上正常训练的训练日志和模型保存文件夹 |- norm_train_gpus_0_autocast_null/ # GPU 0号卡上正常单机单卡训练的训练日志和模型保存文件夹
|- pact_train_gpus_0_autocast_null/ # GPU 0号卡上量化训练的训练日志和模型保存文件夹 |- norm_train_gpus_0,1_autocast_null/ # GPU 0,1号卡上正常单机多卡训练的训练日志和模型保存文件夹
...... ......
|- python_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log # CPU上开启Mkldnn线程数设置为1,测试batch_size=1条件下的预测运行日志 |- python_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log # CPU上关闭Mkldnn线程数设置为6,测试batch_size=1条件下的fp32精度预测运行日志
|- python_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log # GPU上开启TensorRT,测试batch_size=1的半精度预测日志 |- python_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log # GPU上关闭TensorRT,测试batch_size=1的fp32精度预测日志
...... ......
``` ```
其中`results_python.log`中包含了每条指令的运行状态,如果运行成功会输出: 其中`results_python.log`中包含了每条指令的运行状态,如果运行成功会输出:
``` ```
Run successfully with command - python3.7 tools/train.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained Global.use_gpu=True Global.save_model_dir=./tests/output/norm_train_gpus_0_autocast_null Global.epoch_num=1 Train.loader.batch_size_per_card=2 ! [33m Run successfully with command - ch_ppocr_mobile_v2_0_det - python3.7 tools/train.py -c configs/det/ch_ppocr_v2_0/ch_det_mv3_db_v2_0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained Global.use_gpu=True Global.save_model_dir=./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null Global.epoch_num=100 Train.loader.batch_size_per_card=2 ! 
Run successfully with command - python3.7 tools/export_model.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./tests/output/norm_train_gpus_0_autocast_null/latest Global.save_inference_dir=./tests/output/norm_train_gpus_0_autocast_null!  Run successfully with command - ch_ppocr_mobile_v2_0_det - python3.7 tools/export_model.py -c configs/det/ch_ppocr_v2_0/ch_det_mv3_db_v2_0.yml -o Global.checkpoints=./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null/latest Global.save_inference_dir=./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null > ./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null_nodes_1_export.log 2>&1 ! 
 Run successfully with command - ch_ppocr_mobile_v2_0_det - python3.7 tools/infer/predict_det.py --use_gpu=True --use_tensorrt=False --precision=fp32 --det_model_dir=./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null --rec_batch_num=1 --image_dir=./train_data/icdar2015/text_localization/ch4_test_images/ --benchmark=True > ./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/python_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log 2>&1 ! 
 Run successfully with command - ch_ppocr_mobile_v2_0_det - python3.7 tools/infer/predict_det.py --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --det_model_dir=./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null --rec_batch_num=1 --image_dir=./train_data/icdar2015/text_localization/ch4_test_images/ --benchmark=True --precision=fp32 > ./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/python_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log 2>&1 ! 
...... ......
``` ```
如果运行失败,会输出: 如果运行失败,会输出:
...@@ -121,6 +114,22 @@ Run failed with command - python3.7 tools/export_model.py -c tests/configs/det_m ...@@ -121,6 +114,22 @@ Run failed with command - python3.7 tools/export_model.py -c tests/configs/det_m
``` ```
可以很方便的根据`results_python.log`中的内容判定哪一个指令运行错误。 可以很方便的根据`results_python.log`中的内容判定哪一个指令运行错误。
#### 2.2.2 PACT在线量化链条
此外,`test_train_inference_python.sh`还包含PACT在线量化模式,命令如下:
以ch_PP-OCRv2_det为例,如需测试其他模型更换配置即可。
```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_pact_infer_python.txt 'lite_train_lite_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_pact_infer_python.txt 'lite_train_lite_infer'
```
#### 2.2.3 混合精度训练链条
此外,`test_train_inference_python.sh`还包含混合精度训练模式,命令如下:
以ch_PP-OCRv2_det为例,如需测试其他模型更换配置即可。
```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt 'lite_train_lite_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt 'lite_train_lite_infer'
```
### 2.3 精度测试 ### 2.3 精度测试
......
...@@ -8,7 +8,7 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt ...@@ -8,7 +8,7 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt
| 算法名称 | 模型名称 | 单机单卡 | 单机多卡 | 多机多卡 | 模型压缩(单机多卡) | | 算法名称 | 模型名称 | 单机单卡 | 单机多卡 | 多机多卡 | 模型压缩(单机多卡) |
| :---- | :---- | :---- | :---- | :---- | :---- | | :---- | :---- | :---- | :---- | :---- | :---- |
| DB | ch_ppocr_mobile_v2.0_det| 正常训练 <br> 混合精度 | - | - | 正常训练:FPGM裁剪、PACT量化 <br> 离线量化(无需训练) | | DB | ch_ppocr_mobile_v2_0_det| 正常训练 <br> 混合精度 | - | - | 正常训练:FPGM裁剪、PACT量化 |
- 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型``量化模型`,这两类模型对应的预测功能汇总如下: - 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型``量化模型`,这两类模型对应的预测功能汇总如下:
...@@ -29,19 +29,14 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt ...@@ -29,19 +29,14 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt
### 2.1 安装依赖 ### 2.1 安装依赖
- 安装PaddlePaddle >= 2.0 - 安装PaddlePaddle >= 2.3
- 安装PaddleOCR依赖 - 安装PaddleOCR依赖
``` ```
pip install -r ../requirements.txt pip install -r ../requirements.txt
``` ```
- 安装autolog(规范化日志输出工具) - 安装autolog(规范化日志输出工具)
``` ```
git clone https://github.com/LDOUBLEV/AutoLog pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
cd AutoLog
pip install -r requirements.txt
python setup.py bdist_wheel
pip install ./dist/auto_log-1.0.0-py3-none-any.whl
cd ../
``` ```
- 安装PaddleSlim (可选) - 安装PaddleSlim (可选)
``` ```
...@@ -51,54 +46,46 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt ...@@ -51,54 +46,46 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt
### 2.2 功能测试 ### 2.2 功能测试
先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`,model_name/lite_train_lite_infer/*.log`格式的日志文件。
`test_train_inference_python.sh`包含5种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是: `test_train_inference_python.sh`包含基础链条的4种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
- 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; - 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度;
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_lite_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_lite_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_lite_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_lite_infer'
``` ```
- 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; - 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理;
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_whole_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_whole_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_whole_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_whole_infer'
``` ```
- 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; - 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度;
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer'
# 用法1: # 用法1:
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer'
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer' '1' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer' '1'
``` ```
- 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度; - 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度;
```shell ```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_train_whole_infer' bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_train_whole_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_train_whole_infer' bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_train_whole_infer'
``` ```
- 模式5:klquant_whole_infer,测试离线量化;
```shell
bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_windows_gpu_cpu.txt 'klquant_whole_infer'
bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_windows_gpu_cpu.txt 'klquant_whole_infer'
```
运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: 运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件:
``` ```
test_tipc/output/ test_tipc/output/model_name/lite_train_lite_infer/
|- results_python.log # 运行指令状态的日志 |- results_python.log # 运行指令状态的日志
|- norm_train_gpus_0_autocast_null/ # GPU 0号卡上正常训练的训练日志和模型保存文件夹 |- norm_train_gpus_0_autocast_null/ # GPU 0号卡上正常训练的训练日志和模型保存文件夹
|- pact_train_gpus_0_autocast_null/ # GPU 0号卡上量化训练的训练日志和模型保存文件夹
...... ......
|- python_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log # CPU上开启Mkldnn线程数设置为1,测试batch_size=1条件下的预测运行日志 |- python_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log # CPU上关闭Mkldnn线程数设置为6,测试batch_size=1条件下的fp32精度预测运行日志
|- python_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log # GPU上开启TensorRT,测试batch_size=1的半精度预测日志 |- python_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log # GPU上关闭TensorRT,测试batch_size=1的fp32精度预测日志
...... ......
``` ```
......
...@@ -21,7 +21,10 @@ model_name=$(func_parser_value "${lines[1]}") ...@@ -21,7 +21,10 @@ model_name=$(func_parser_value "${lines[1]}")
trainer_list=$(func_parser_value "${lines[14]}") trainer_list=$(func_parser_value "${lines[14]}")
if [ ${MODE} = "benchmark_train" ];then if [ ${MODE} = "benchmark_train" ];then
pip install -r requirements.txt python_name_list=$(func_parser_value "${lines[2]}")
array=(${python_name_list})
python_name=${array[0]}
${python_name} -m pip install -r requirements.txt
if [[ ${model_name} =~ "ch_ppocr_mobile_v2_0_det" || ${model_name} =~ "det_mv3_db_v2_0" ]];then if [[ ${model_name} =~ "ch_ppocr_mobile_v2_0_det" || ${model_name} =~ "det_mv3_db_v2_0" ]];then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
rm -rf ./train_data/icdar2015 rm -rf ./train_data/icdar2015
...@@ -29,6 +32,13 @@ if [ ${MODE} = "benchmark_train" ];then ...@@ -29,6 +32,13 @@ if [ ${MODE} = "benchmark_train" ];then
cd ./train_data/ && tar xf icdar2015_benckmark.tar cd ./train_data/ && tar xf icdar2015_benckmark.tar
ln -s ./icdar2015_benckmark ./icdar2015 ln -s ./icdar2015_benckmark ./icdar2015
cd ../ cd ../
if [[ ${model_name} =~ "ch_ppocr_mobile_v2_0_det" ]];then
# expand gt.txt 2 times
cd ./train_data/icdar2015/text_localization
for i in `seq 2`;do cp train_icdar2015_label.txt dup$i.txt;done
cat dup* > train_icdar2015_label.txt && rm -rf dup*
cd ../../../
fi
fi fi
if [[ ${model_name} =~ "ch_ppocr_server_v2_0_det" || ${model_name} =~ "ch_PP-OCRv3_det" ]];then if [[ ${model_name} =~ "ch_ppocr_server_v2_0_det" || ${model_name} =~ "ch_PP-OCRv3_det" ]];then
rm -rf ./train_data/icdar2015 rm -rf ./train_data/icdar2015
...@@ -97,6 +107,15 @@ if [ ${MODE} = "benchmark_train" ];then ...@@ -97,6 +107,15 @@ if [ ${MODE} = "benchmark_train" ];then
ln -s ./pubtabnet_benckmark ./pubtabnet ln -s ./pubtabnet_benckmark ./pubtabnet
cd ../ cd ../
fi fi
if [[ ${model_name} == "slanet" ]];then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf en_ppstructure_mobile_v2.0_SLANet_train.tar && cd ../
rm -rf ./train_data/pubtabnet
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/pubtabnet_benckmark.tar --no-check-certificate
cd ./train_data/ && tar xf pubtabnet_benckmark.tar
ln -s ./pubtabnet_benckmark ./pubtabnet
cd ../
fi
if [[ ${model_name} == "det_r50_dcn_fce_ctw_v2_0" ]]; then if [[ ${model_name} == "det_r50_dcn_fce_ctw_v2_0" ]]; then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/det_r50_dcn_fce_ctw_v2.0_train.tar --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/det_r50_dcn_fce_ctw_v2.0_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf det_r50_dcn_fce_ctw_v2.0_train.tar && cd ../ cd ./pretrain_models/ && tar xf det_r50_dcn_fce_ctw_v2.0_train.tar && cd ../
...@@ -107,7 +126,8 @@ if [ ${MODE} = "benchmark_train" ];then ...@@ -107,7 +126,8 @@ if [ ${MODE} = "benchmark_train" ];then
cd ../ cd ../
fi fi
if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then
pip install -r ppstructure/kie/requirements.txt ${python_name} -m pip install -r ppstructure/kie/requirements.txt
${python_name} -m pip install opencv-python -U
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
cd ./train_data/ && tar xf XFUND.tar cd ./train_data/ && tar xf XFUND.tar
# expand gt.txt 10 times # expand gt.txt 10 times
...@@ -121,6 +141,11 @@ if [ ${MODE} = "benchmark_train" ];then ...@@ -121,6 +141,11 @@ if [ ${MODE} = "benchmark_train" ];then
fi fi
if [ ${MODE} = "lite_train_lite_infer" ];then if [ ${MODE} = "lite_train_lite_infer" ];then
python_name_list=$(func_parser_value "${lines[2]}")
array=(${python_name_list})
python_name=${array[0]}
${python_name} -m pip install -r requirements.txt
${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
# pretrain lite train data # pretrain lite train data
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate
...@@ -211,6 +236,10 @@ if [ ${MODE} = "lite_train_lite_infer" ];then ...@@ -211,6 +236,10 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
if [ ${model_name} == "ch_ppocr_mobile_v2_0_rec_FPGM" ]; then if [ ${model_name} == "ch_ppocr_mobile_v2_0_rec_FPGM" ]; then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf ch_ppocr_mobile_v2.0_rec_train.tar && cd ../ cd ./pretrain_models/ && tar xf ch_ppocr_mobile_v2.0_rec_train.tar && cd ../
${python_name} -m pip install paddleslim
fi
if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_FPGM" ]; then
${python_name} -m pip install paddleslim
fi fi
if [ ${model_name} == "det_mv3_east_v2_0" ]; then if [ ${model_name} == "det_mv3_east_v2_0" ]; then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar --no-check-certificate
...@@ -228,12 +257,28 @@ if [ ${MODE} = "lite_train_lite_infer" ];then ...@@ -228,12 +257,28 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../ cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
fi fi
if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then if [ ${model_name} == "layoutxlm_ser" ]; then
pip install -r ppstructure/kie/requirements.txt ${python_name} -m pip install -r ppstructure/kie/requirements.txt
${python_name} -m pip install opencv-python -U
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
cd ./train_data/ && tar xf XFUND.tar
cd ../
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh.tar --no-check-certificate
cd ./pretrain_models/ && tar xf ser_LayoutXLM_xfun_zh.tar && cd ../
fi
if [ ${model_name} == "vi_layoutxlm_ser" ]; then
${python_name} -m pip install -r ppstructure/kie/requirements.txt
${python_name} -m pip install opencv-python -U
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
cd ./train_data/ && tar xf XFUND.tar cd ./train_data/ && tar xf XFUND.tar
cd ../ cd ../
fi fi
if [ ${model_name} == "det_r18_ct" ]; then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet18_vd_pretrained.pdparams --no-check-certificate
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/ct_tipc/total_text_lite2.tar --no-check-certificate
cd ./train_data && tar xf total_text_lite2.tar && ln -s total_text_lite2 total_text && cd ../
fi
elif [ ${MODE} = "whole_train_whole_infer" ];then elif [ ${MODE} = "whole_train_whole_infer" ];then
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
...@@ -302,9 +347,18 @@ elif [ ${MODE} = "lite_train_whole_infer" ];then ...@@ -302,9 +347,18 @@ elif [ ${MODE} = "lite_train_whole_infer" ];then
cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../ cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
fi fi
elif [ ${MODE} = "whole_infer" ];then elif [ ${MODE} = "whole_infer" ];then
python_name_list=$(func_parser_value "${lines[2]}")
array=(${python_name_list})
python_name=${array[0]}
${python_name} -m pip install paddleslim
${python_name} -m pip install -r requirements.txt
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate
cd ./inference && tar xf rec_inference.tar && tar xf ch_det_data_50.tar && cd ../ cd ./inference && tar xf rec_inference.tar && tar xf ch_det_data_50.tar && cd ../
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
cd ./train_data/ && tar xf XFUND.tar && cd ../
head -n 2 train_data/XFUND/zh_val/val.json > train_data/XFUND/zh_val/val_lite.json
mv train_data/XFUND/zh_val/val_lite.json train_data/XFUND/zh_val/val.json
if [ ${model_name} = "ch_ppocr_mobile_v2_0_det" ]; then if [ ${model_name} = "ch_ppocr_mobile_v2_0_det" ]; then
eval_model_name="ch_ppocr_mobile_v2.0_det_train" eval_model_name="ch_ppocr_mobile_v2.0_det_train"
rm -rf ./train_data/icdar2015 rm -rf ./train_data/icdar2015
...@@ -470,6 +524,12 @@ elif [ ${MODE} = "whole_infer" ];then ...@@ -470,6 +524,12 @@ elif [ ${MODE} = "whole_infer" ];then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate
cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../ cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
fi fi
if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
${python_name} -m pip install -r ppstructure/kie/requirements.txt
${python_name} -m pip install opencv-python -U
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh_infer.tar --no-check-certificate
cd ./inference/ && tar xf ser_LayoutXLM_xfun_zh_infer.tar & cd ../
fi
fi fi
if [[ ${model_name} =~ "KL" ]]; then if [[ ${model_name} =~ "KL" ]]; then
...@@ -522,6 +582,12 @@ if [[ ${model_name} =~ "KL" ]]; then ...@@ -522,6 +582,12 @@ if [[ ${model_name} =~ "KL" ]]; then
cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../ cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
cd ./train_data/ && tar xf pubtabnet.tar && cd ../ cd ./train_data/ && tar xf pubtabnet.tar && cd ../
fi fi
if [[ ${model_name} =~ "layoutxlm_ser_KL" ]]; then
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
cd ./train_data/ && tar xf XFUND.tar && cd ../
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh_infer.tar --no-check-certificate
cd ./inference/ && tar xf ser_LayoutXLM_xfun_zh_infer.tar & cd ../
fi
fi fi
if [ ${MODE} = "cpp_infer" ];then if [ ${MODE} = "cpp_infer" ];then
...@@ -626,6 +692,12 @@ if [ ${MODE} = "cpp_infer" ];then ...@@ -626,6 +692,12 @@ if [ ${MODE} = "cpp_infer" ];then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar --no-check-certificate
cd ./inference && tar xf ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar && tar xf ch_det_data_50.tar && cd ../ cd ./inference && tar xf ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar && tar xf ch_det_data_50.tar && cd ../
fi fi
elif [ ${model_name} = "en_table_structure_KL" ];then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar --no-check-certificate
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar --no-check-certificate
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate
cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
fi
fi fi
if [ ${MODE} = "serving_infer" ];then if [ ${MODE} = "serving_infer" ];then
...@@ -637,6 +709,7 @@ if [ ${MODE} = "serving_infer" ];then ...@@ -637,6 +709,7 @@ if [ ${MODE} = "serving_infer" ];then
${python_name} -m pip install paddle-serving-server-gpu ${python_name} -m pip install paddle-serving-server-gpu
${python_name} -m pip install paddle_serving_client ${python_name} -m pip install paddle_serving_client
${python_name} -m pip install paddle-serving-app ${python_name} -m pip install paddle-serving-app
${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
# wget model # wget model
if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_KL" ] || [ ${model_name} == "ch_ppocr_mobile_v2.0_rec_KL" ] ; then if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_KL" ] || [ ${model_name} == "ch_ppocr_mobile_v2.0_rec_KL" ] ; then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/tipc_fake_model/ch_ppocr_mobile_v2.0_det_klquant_infer.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/tipc_fake_model/ch_ppocr_mobile_v2.0_det_klquant_infer.tar --no-check-certificate
...@@ -688,8 +761,7 @@ fi ...@@ -688,8 +761,7 @@ fi
if [ ${MODE} = "paddle2onnx_infer" ];then if [ ${MODE} = "paddle2onnx_infer" ];then
# prepare serving env # prepare serving env
python_name=$(func_parser_value "${lines[2]}") python_name=$(func_parser_value "${lines[2]}")
${python_name} -m pip install paddle2onnx ${python_name} -m pip install paddle2onnx onnxruntime onnx
${python_name} -m pip install onnxruntime
# wget model # wget model
if [[ ${model_name} =~ "ch_ppocr_mobile_v2_0" ]]; then if [[ ${model_name} =~ "ch_ppocr_mobile_v2_0" ]]; then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar --no-check-certificate
......
...@@ -63,7 +63,7 @@ function func_paddle2onnx(){ ...@@ -63,7 +63,7 @@ function func_paddle2onnx(){
set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}") set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}")
set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}") set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}")
trans_det_log="${LOG_PATH}/trans_model_det.log" trans_det_log="${LOG_PATH}/trans_model_det.log"
trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} > ${trans_det_log} 2>&1 " trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} --enable_dev_version=False > ${trans_det_log} 2>&1 "
eval $trans_model_cmd eval $trans_model_cmd
last_status=${PIPESTATUS[0]} last_status=${PIPESTATUS[0]}
status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_det_log}" status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_det_log}"
...@@ -75,7 +75,7 @@ function func_paddle2onnx(){ ...@@ -75,7 +75,7 @@ function func_paddle2onnx(){
set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}") set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}")
set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}") set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}")
trans_rec_log="${LOG_PATH}/trans_model_rec.log" trans_rec_log="${LOG_PATH}/trans_model_rec.log"
trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} > ${trans_rec_log} 2>&1 " trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} --enable_dev_version=False > ${trans_rec_log} 2>&1 "
eval $trans_model_cmd eval $trans_model_cmd
last_status=${PIPESTATUS[0]} last_status=${PIPESTATUS[0]}
status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_rec_log}" status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_rec_log}"
...@@ -88,7 +88,7 @@ function func_paddle2onnx(){ ...@@ -88,7 +88,7 @@ function func_paddle2onnx(){
set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}") set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}")
set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}") set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}")
trans_det_log="${LOG_PATH}/trans_model_det.log" trans_det_log="${LOG_PATH}/trans_model_det.log"
trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} > ${trans_det_log} 2>&1 " trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} --enable_dev_version=False > ${trans_det_log} 2>&1 "
eval $trans_model_cmd eval $trans_model_cmd
last_status=${PIPESTATUS[0]} last_status=${PIPESTATUS[0]}
status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_det_log}" status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_det_log}"
...@@ -101,7 +101,7 @@ function func_paddle2onnx(){ ...@@ -101,7 +101,7 @@ function func_paddle2onnx(){
set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}") set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}")
set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}") set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}")
trans_rec_log="${LOG_PATH}/trans_model_rec.log" trans_rec_log="${LOG_PATH}/trans_model_rec.log"
trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} > ${trans_rec_log} 2>&1 " trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} --enable_dev_version=False > ${trans_rec_log} 2>&1 "
eval $trans_model_cmd eval $trans_model_cmd
last_status=${PIPESTATUS[0]} last_status=${PIPESTATUS[0]}
status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_rec_log}" status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_rec_log}"
......
#!/bin/bash
source test_tipc/common_func.sh
function readlinkf() {
perl -MCwd -e 'print Cwd::abs_path shift' "$1";
}
function func_parser_config() {
strs=$1
IFS=" "
array=(${strs})
tmp=${array[2]}
echo ${tmp}
}
BASEDIR=$(dirname "$0")
REPO_ROOT_PATH=$(readlinkf ${BASEDIR}/../)
FILENAME=$1
# disable mkldnn on non x86_64 env
arch=$(uname -i)
if [ $arch != 'x86_64' ]; then
sed -i 's/--enable_mkldnn:True|False/--enable_mkldnn:False/g' $FILENAME
sed -i 's/--enable_mkldnn:True/--enable_mkldnn:False/g' $FILENAME
fi
# change gpu to npu in tipc txt configs
sed -i 's/use_gpu/use_npu/g' $FILENAME
# disable benchmark as AutoLog required nvidia-smi command
sed -i 's/--benchmark:True/--benchmark:False/g' $FILENAME
dataline=`cat $FILENAME`
# parser params
IFS=$'\n'
lines=(${dataline})
# replace training config file
grep -n 'tools/.*yml' $FILENAME | cut -d ":" -f 1 \
| while read line_num ; do
train_cmd=$(func_parser_value "${lines[line_num-1]}")
trainer_config=$(func_parser_config ${train_cmd})
sed -i 's/use_gpu/use_npu/g' "$REPO_ROOT_PATH/$trainer_config"
done
# change gpu to npu in execution script
sed -i 's/\"gpu\"/\"npu\"/g' test_tipc/test_train_inference_python.sh
# pass parameters to test_train_inference_python.sh
cmd='bash test_tipc/test_train_inference_python.sh ${FILENAME} $2'
echo -e '\033[1;32m Started to run command: ${cmd}! \033[0m'
eval $cmd
#!/bin/bash
source test_tipc/common_func.sh
function readlinkf() {
perl -MCwd -e 'print Cwd::abs_path shift' "$1";
}
function func_parser_config() {
strs=$1
IFS=" "
array=(${strs})
tmp=${array[2]}
echo ${tmp}
}
BASEDIR=$(dirname "$0")
REPO_ROOT_PATH=$(readlinkf ${BASEDIR}/../)
FILENAME=$1
# disable mkldnn on non x86_64 env
arch=$(uname -i)
if [ $arch != 'x86_64' ]; then
sed -i 's/--enable_mkldnn:True|False/--enable_mkldnn:False/g' $FILENAME
sed -i 's/--enable_mkldnn:True/--enable_mkldnn:False/g' $FILENAME
fi
# change gpu to xpu in tipc txt configs
sed -i 's/use_gpu/use_xpu/g' $FILENAME
# disable benchmark as AutoLog required nvidia-smi command
sed -i 's/--benchmark:True/--benchmark:False/g' $FILENAME
dataline=`cat $FILENAME`
# parser params
IFS=$'\n'
lines=(${dataline})
# replace training config file
grep -n 'tools/.*yml' $FILENAME | cut -d ":" -f 1 \
| while read line_num ; do
train_cmd=$(func_parser_value "${lines[line_num-1]}")
trainer_config=$(func_parser_config ${train_cmd})
sed -i 's/use_gpu/use_xpu/g' "$REPO_ROOT_PATH/$trainer_config"
done
# change gpu to xpu in execution script
sed -i 's/\"gpu\"/\"xpu\"/g' test_tipc/test_train_inference_python.sh
# pass parameters to test_train_inference_python.sh
cmd='bash test_tipc/test_train_inference_python.sh ${FILENAME} $2'
echo -e '\033[1;32m Started to run command: ${cmd}! \033[0m'
eval $cmd
...@@ -127,6 +127,9 @@ class TextDetector(object): ...@@ -127,6 +127,9 @@ class TextDetector(object):
postprocess_params["beta"] = args.beta postprocess_params["beta"] = args.beta
postprocess_params["fourier_degree"] = args.fourier_degree postprocess_params["fourier_degree"] = args.fourier_degree
postprocess_params["box_type"] = args.det_fce_box_type postprocess_params["box_type"] = args.det_fce_box_type
elif self.det_algorithm == "CT":
pre_process_list[0] = {'ScaleAlignedShort': {'short_size': 640}}
postprocess_params['name'] = 'CTPostProcess'
else: else:
logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
sys.exit(0) sys.exit(0)
...@@ -253,6 +256,9 @@ class TextDetector(object): ...@@ -253,6 +256,9 @@ class TextDetector(object):
elif self.det_algorithm == 'FCE': elif self.det_algorithm == 'FCE':
for i, output in enumerate(outputs): for i, output in enumerate(outputs):
preds['level_{}'.format(i)] = output preds['level_{}'.format(i)] = output
elif self.det_algorithm == "CT":
preds['maps'] = outputs[0]
preds['score'] = outputs[1]
else: else:
raise NotImplementedError raise NotImplementedError
...@@ -260,7 +266,7 @@ class TextDetector(object): ...@@ -260,7 +266,7 @@ class TextDetector(object):
post_result = self.postprocess_op(preds, shape_list) post_result = self.postprocess_op(preds, shape_list)
dt_boxes = post_result[0]['points'] dt_boxes = post_result[0]['points']
if (self.det_algorithm == "SAST" and self.det_sast_polygon) or ( if (self.det_algorithm == "SAST" and self.det_sast_polygon) or (
self.det_algorithm in ["PSE", "FCE"] and self.det_algorithm in ["PSE", "FCE", "CT"] and
self.postprocess_op.box_type == 'poly'): self.postprocess_op.box_type == 'poly'):
dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape) dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
else: else:
......
...@@ -23,6 +23,7 @@ from PIL import Image, ImageDraw, ImageFont ...@@ -23,6 +23,7 @@ from PIL import Image, ImageDraw, ImageFont
import math import math
from paddle import inference from paddle import inference
import time import time
import random
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
...@@ -35,6 +36,7 @@ def init_args(): ...@@ -35,6 +36,7 @@ def init_args():
# params for prediction engine # params for prediction engine
parser.add_argument("--use_gpu", type=str2bool, default=True) parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--use_xpu", type=str2bool, default=False) parser.add_argument("--use_xpu", type=str2bool, default=False)
parser.add_argument("--use_npu", type=str2bool, default=False)
parser.add_argument("--ir_optim", type=str2bool, default=True) parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False) parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--min_subgraph_size", type=int, default=15) parser.add_argument("--min_subgraph_size", type=int, default=15)
...@@ -226,24 +228,25 @@ def create_predictor(args, mode, logger): ...@@ -226,24 +228,25 @@ def create_predictor(args, mode, logger):
use_calib_mode=False) use_calib_mode=False)
# collect shape # collect shape
if args.shape_info_filename is not None: trt_shape_f = f"{os.path.dirname(args.shape_info_filename)}/{mode}_{os.path.basename(args.shape_info_filename)}"
if not os.path.exists(args.shape_info_filename): if trt_shape_f is not None:
config.collect_shape_range_info( if not os.path.exists(trt_shape_f):
args.shape_info_filename) config.collect_shape_range_info(trt_shape_f)
logger.info( logger.info(
f"collect dynamic shape info into : {args.shape_info_filename}" f"collect dynamic shape info into : {trt_shape_f}"
) )
else: else:
logger.info( logger.info(
f"dynamic shape info file( {args.shape_info_filename} ) already exists, not need to generate again." f"dynamic shape info file( {trt_shape_f} ) already exists, not need to generate again."
) )
config.enable_tuned_tensorrt_dynamic_shape( config.enable_tuned_tensorrt_dynamic_shape(trt_shape_f, True)
args.shape_info_filename, True)
else: else:
logger.info( logger.info(
f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning" f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning"
) )
elif args.use_npu:
config.enable_npu()
elif args.use_xpu: elif args.use_xpu:
config.enable_xpu(10 * 1024 * 1024) config.enable_xpu(10 * 1024 * 1024)
else: else:
...@@ -397,56 +400,81 @@ def draw_ocr(image, ...@@ -397,56 +400,81 @@ def draw_ocr(image,
def draw_ocr_box_txt(image, def draw_ocr_box_txt(image,
boxes, boxes,
txts, txts=None,
scores=None, scores=None,
drop_score=0.5, drop_score=0.5,
font_path="./doc/simfang.ttf"): font_path="./doc/fonts/simfang.ttf"):
h, w = image.height, image.width h, w = image.height, image.width
img_left = image.copy() img_left = image.copy()
img_right = Image.new('RGB', (w, h), (255, 255, 255)) img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
import random
random.seed(0) random.seed(0)
draw_left = ImageDraw.Draw(img_left) draw_left = ImageDraw.Draw(img_left)
draw_right = ImageDraw.Draw(img_right) if txts is None or len(txts) != len(boxes):
txts = [None] * len(boxes)
for idx, (box, txt) in enumerate(zip(boxes, txts)): for idx, (box, txt) in enumerate(zip(boxes, txts)):
if scores is not None and scores[idx] < drop_score: if scores is not None and scores[idx] < drop_score:
continue continue
color = (random.randint(0, 255), random.randint(0, 255), color = (random.randint(0, 255), random.randint(0, 255),
random.randint(0, 255)) random.randint(0, 255))
draw_left.polygon(box, fill=color) draw_left.polygon(box, fill=color)
draw_right.polygon( img_right_text = draw_box_txt_fine((w, h), box, txt, font_path)
[ pts = np.array(box, np.int32).reshape((-1, 1, 2))
box[0][0], box[0][1], box[1][0], box[1][1], box[2][0], cv2.polylines(img_right_text, [pts], True, color, 1)
box[2][1], box[3][0], box[3][1] img_right = cv2.bitwise_and(img_right, img_right_text)
],
outline=color)
box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
1])**2)
box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
1])**2)
if box_height > 2 * box_width:
font_size = max(int(box_width * 0.9), 10)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
cur_y = box[0][1]
for c in txt:
char_size = font.getsize(c)
draw_right.text(
(box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
cur_y += char_size[1]
else:
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
draw_right.text(
[box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
img_left = Image.blend(image, img_left, 0.5) img_left = Image.blend(image, img_left, 0.5)
img_show = Image.new('RGB', (w * 2, h), (255, 255, 255)) img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
img_show.paste(img_left, (0, 0, w, h)) img_show.paste(img_left, (0, 0, w, h))
img_show.paste(img_right, (w, 0, w * 2, h)) img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
return np.array(img_show) return np.array(img_show)
def draw_box_txt_fine(img_size, box, txt, font_path="./doc/fonts/simfang.ttf"):
box_height = int(
math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][1])**2))
box_width = int(
math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][1])**2))
if box_height > 2 * box_width and box_height > 30:
img_text = Image.new('RGB', (box_height, box_width), (255, 255, 255))
draw_text = ImageDraw.Draw(img_text)
if txt:
font = create_font(txt, (box_height, box_width), font_path)
draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
img_text = img_text.transpose(Image.ROTATE_270)
else:
img_text = Image.new('RGB', (box_width, box_height), (255, 255, 255))
draw_text = ImageDraw.Draw(img_text)
if txt:
font = create_font(txt, (box_width, box_height), font_path)
draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
pts1 = np.float32(
[[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]])
pts2 = np.array(box, dtype=np.float32)
M = cv2.getPerspectiveTransform(pts1, pts2)
img_text = np.array(img_text, dtype=np.uint8)
img_right_text = cv2.warpPerspective(
img_text,
M,
img_size,
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(255, 255, 255))
return img_right_text
def create_font(txt, sz, font_path="./doc/fonts/simfang.ttf"):
font_size = int(sz[1] * 0.99)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
length = font.getsize(txt)[0]
if length > sz[0]:
font_size = int(font_size * sz[0] / length)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
return font
def str_count(s): def str_count(s):
""" """
Count the number of Chinese characters, Count the number of Chinese characters,
......
...@@ -37,6 +37,46 @@ from ppocr.postprocess import build_post_process ...@@ -37,6 +37,46 @@ from ppocr.postprocess import build_post_process
from ppocr.utils.save_load import load_model from ppocr.utils.save_load import load_model
from ppocr.utils.utility import get_image_file_list from ppocr.utils.utility import get_image_file_list
import tools.program as program import tools.program as program
from PIL import Image, ImageDraw, ImageFont
import math
def draw_e2e_res_for_chinese(image,
boxes,
txts,
config,
img_name,
font_path="./doc/simfang.ttf"):
h, w = image.height, image.width
img_left = image.copy()
img_right = Image.new('RGB', (w, h), (255, 255, 255))
import random
random.seed(0)
draw_left = ImageDraw.Draw(img_left)
draw_right = ImageDraw.Draw(img_right)
for idx, (box, txt) in enumerate(zip(boxes, txts)):
box = np.array(box)
box = [tuple(x) for x in box]
color = (random.randint(0, 255), random.randint(0, 255),
random.randint(0, 255))
draw_left.polygon(box, fill=color)
draw_right.polygon(box, outline=color)
font = ImageFont.truetype(font_path, 15, encoding="utf-8")
draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
img_left = Image.blend(image, img_left, 0.5)
img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
img_show.paste(img_left, (0, 0, w, h))
img_show.paste(img_right, (w, 0, w * 2, h))
save_e2e_path = os.path.dirname(config['Global'][
'save_res_path']) + "/e2e_results/"
if not os.path.exists(save_e2e_path):
os.makedirs(save_e2e_path)
save_path = os.path.join(save_e2e_path, os.path.basename(img_name))
cv2.imwrite(save_path, np.array(img_show)[:, :, ::-1])
logger.info("The e2e Image saved in {}".format(save_path))
def draw_e2e_res(dt_boxes, strs, config, img, img_name): def draw_e2e_res(dt_boxes, strs, config, img, img_name):
...@@ -113,7 +153,19 @@ def main(): ...@@ -113,7 +153,19 @@ def main():
otstr = file + "\t" + json.dumps(dt_boxes_json) + "\n" otstr = file + "\t" + json.dumps(dt_boxes_json) + "\n"
fout.write(otstr.encode()) fout.write(otstr.encode())
src_img = cv2.imread(file) src_img = cv2.imread(file)
if global_config['infer_visual_type'] == 'EN':
draw_e2e_res(points, strs, config, src_img, file) draw_e2e_res(points, strs, config, src_img, file)
elif global_config['infer_visual_type'] == 'CN':
src_img = Image.fromarray(
cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB))
draw_e2e_res_for_chinese(
src_img,
points,
strs,
config,
file,
font_path="./doc/fonts/simfang.ttf")
logger.info("success!") logger.info("success!")
......
...@@ -114,7 +114,7 @@ def merge_config(config, opts): ...@@ -114,7 +114,7 @@ def merge_config(config, opts):
return config return config
def check_device(use_gpu, use_xpu=False): def check_device(use_gpu, use_xpu=False, use_npu=False):
""" """
Log error and exit when set use_gpu=true in paddlepaddle Log error and exit when set use_gpu=true in paddlepaddle
cpu version. cpu version.
...@@ -134,24 +134,8 @@ def check_device(use_gpu, use_xpu=False): ...@@ -134,24 +134,8 @@ def check_device(use_gpu, use_xpu=False):
if use_xpu and not paddle.device.is_compiled_with_xpu(): if use_xpu and not paddle.device.is_compiled_with_xpu():
print(err.format("use_xpu", "xpu", "xpu", "use_xpu")) print(err.format("use_xpu", "xpu", "xpu", "use_xpu"))
sys.exit(1) sys.exit(1)
except Exception as e: if use_npu and not paddle.device.is_compiled_with_npu():
pass print(err.format("use_npu", "npu", "npu", "use_npu"))
def check_xpu(use_xpu):
"""
Log error and exit when set use_xpu=true in paddlepaddle
cpu/gpu version.
"""
err = "Config use_xpu cannot be set as true while you are " \
"using paddlepaddle cpu/gpu version ! \nPlease try: \n" \
"\t1. Install paddlepaddle-xpu to run model on XPU \n" \
"\t2. Set use_xpu as false in config file to run " \
"model on CPU/GPU"
try:
if use_xpu and not paddle.is_compiled_with_xpu():
print(err)
sys.exit(1) sys.exit(1)
except Exception as e: except Exception as e:
pass pass
...@@ -279,7 +263,9 @@ def train(config, ...@@ -279,7 +263,9 @@ def train(config,
model_average = True model_average = True
# use amp # use amp
if scaler: if scaler:
with paddle.amp.auto_cast(level=amp_level, custom_black_list=amp_custom_black_list): with paddle.amp.auto_cast(
level=amp_level,
custom_black_list=amp_custom_black_list):
if model_type == 'table' or extra_input: if model_type == 'table' or extra_input:
preds = model(images, data=batch[1:]) preds = model(images, data=batch[1:])
elif model_type in ["kie"]: elif model_type in ["kie"]:
...@@ -479,7 +465,7 @@ def eval(model, ...@@ -479,7 +465,7 @@ def eval(model,
extra_input=False, extra_input=False,
scaler=None, scaler=None,
amp_level='O2', amp_level='O2',
amp_custom_black_list = []): amp_custom_black_list=[]):
model.eval() model.eval()
with paddle.no_grad(): with paddle.no_grad():
total_frame = 0.0 total_frame = 0.0
...@@ -500,7 +486,9 @@ def eval(model, ...@@ -500,7 +486,9 @@ def eval(model,
# use amp # use amp
if scaler: if scaler:
with paddle.amp.auto_cast(level=amp_level, custom_black_list=amp_custom_black_list): with paddle.amp.auto_cast(
level=amp_level,
custom_black_list=amp_custom_black_list):
if model_type == 'table' or extra_input: if model_type == 'table' or extra_input:
preds = model(images, data=batch[1:]) preds = model(images, data=batch[1:])
elif model_type in ["kie"]: elif model_type in ["kie"]:
...@@ -627,14 +615,9 @@ def preprocess(is_train=False): ...@@ -627,14 +615,9 @@ def preprocess(is_train=False):
logger = get_logger(log_file=log_file) logger = get_logger(log_file=log_file)
# check if set use_gpu=True in paddlepaddle cpu version # check if set use_gpu=True in paddlepaddle cpu version
use_gpu = config['Global']['use_gpu'] use_gpu = config['Global'].get('use_gpu', False)
use_xpu = config['Global'].get('use_xpu', False) use_xpu = config['Global'].get('use_xpu', False)
use_npu = config['Global'].get('use_npu', False)
# check if set use_xpu=True in paddlepaddle cpu/gpu version
use_xpu = False
if 'use_xpu' in config['Global']:
use_xpu = config['Global']['use_xpu']
check_xpu(use_xpu)
alg = config['Architecture']['algorithm'] alg = config['Architecture']['algorithm']
assert alg in [ assert alg in [
...@@ -642,15 +625,17 @@ def preprocess(is_train=False): ...@@ -642,15 +625,17 @@ def preprocess(is_train=False):
'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE', 'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'LayoutLMv2', 'PREN', 'FCE', 'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'LayoutLMv2', 'PREN', 'FCE',
'SVTR', 'ViTSTR', 'ABINet', 'DB++', 'TableMaster', 'SPIN', 'VisionLAN', 'SVTR', 'ViTSTR', 'ABINet', 'DB++', 'TableMaster', 'SPIN', 'VisionLAN',
'Gestalt', 'SLANet', 'RobustScanner' 'Gestalt', 'SLANet', 'RobustScanner', 'CT'
] ]
if use_xpu: if use_xpu:
device = 'xpu:{0}'.format(os.getenv('FLAGS_selected_xpus', 0)) device = 'xpu:{0}'.format(os.getenv('FLAGS_selected_xpus', 0))
elif use_npu:
device = 'npu:{0}'.format(os.getenv('FLAGS_selected_npus', 0))
else: else:
device = 'gpu:{}'.format(dist.ParallelEnv() device = 'gpu:{}'.format(dist.ParallelEnv()
.dev_id) if use_gpu else 'cpu' .dev_id) if use_gpu else 'cpu'
check_device(use_gpu, use_xpu) check_device(use_gpu, use_xpu, use_npu)
device = paddle.set_device(device) device = paddle.set_device(device)
......
...@@ -119,6 +119,7 @@ def main(config, device, logger, vdl_writer): ...@@ -119,6 +119,7 @@ def main(config, device, logger, vdl_writer):
config['Loss']['ignore_index'] = char_num - 1 config['Loss']['ignore_index'] = char_num - 1
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
use_sync_bn = config["Global"].get("use_sync_bn", False) use_sync_bn = config["Global"].get("use_sync_bn", False)
if use_sync_bn: if use_sync_bn:
model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
...@@ -146,7 +147,7 @@ def main(config, device, logger, vdl_writer): ...@@ -146,7 +147,7 @@ def main(config, device, logger, vdl_writer):
use_amp = config["Global"].get("use_amp", False) use_amp = config["Global"].get("use_amp", False)
amp_level = config["Global"].get("amp_level", 'O2') amp_level = config["Global"].get("amp_level", 'O2')
amp_custom_black_list = config['Global'].get('amp_custom_black_list',[]) amp_custom_black_list = config['Global'].get('amp_custom_black_list', [])
if use_amp: if use_amp:
AMP_RELATED_FLAGS_SETTING = { AMP_RELATED_FLAGS_SETTING = {
'FLAGS_cudnn_batchnorm_spatial_persistent': 1, 'FLAGS_cudnn_batchnorm_spatial_persistent': 1,
...@@ -161,7 +162,10 @@ def main(config, device, logger, vdl_writer): ...@@ -161,7 +162,10 @@ def main(config, device, logger, vdl_writer):
use_dynamic_loss_scaling=use_dynamic_loss_scaling) use_dynamic_loss_scaling=use_dynamic_loss_scaling)
if amp_level == "O2": if amp_level == "O2":
model, optimizer = paddle.amp.decorate( model, optimizer = paddle.amp.decorate(
models=model, optimizers=optimizer, level=amp_level, master_weight=True) models=model,
optimizers=optimizer,
level=amp_level,
master_weight=True)
else: else:
scaler = None scaler = None
...@@ -174,7 +178,8 @@ def main(config, device, logger, vdl_writer): ...@@ -174,7 +178,8 @@ def main(config, device, logger, vdl_writer):
# start train # start train
program.train(config, train_dataloader, valid_dataloader, device, model, program.train(config, train_dataloader, valid_dataloader, device, model,
loss_class, optimizer, lr_scheduler, post_process_class, loss_class, optimizer, lr_scheduler, post_process_class,
eval_class, pre_best_model_dict, logger, vdl_writer, scaler,amp_level, amp_custom_black_list) eval_class, pre_best_model_dict, logger, vdl_writer, scaler,
amp_level, amp_custom_black_list)
def test_reader(config, device, logger): def test_reader(config, device, logger):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册