diff --git a/PPOCRLabel/PPOCRLabel.py b/PPOCRLabel/PPOCRLabel.py
index c17db91a5b5cd9d3cbb4b5bf6c87afd745d0870d..390c2b159575bf1c60387e42b5be3d917ba845f7 100644
--- a/PPOCRLabel/PPOCRLabel.py
+++ b/PPOCRLabel/PPOCRLabel.py
@@ -2449,13 +2449,6 @@ class MainWindow(QMainWindow):
export PPLabel and CSV to JSON (PubTabNet)
'''
import pandas as pd
- from libs.dataPartitionDialog import DataPartitionDialog
-
- # data partition user input
- partitionDialog = DataPartitionDialog(parent=self)
- partitionDialog.exec()
- if partitionDialog.getStatus() == False:
- return
# automatically save annotations
self.saveFilestate()
@@ -2478,28 +2471,19 @@ class MainWindow(QMainWindow):
labeldict[file] = eval(label)
else:
labeldict[file] = []
+
+ # read table recognition output
+ TableRec_excel_dir = os.path.join(
+ self.lastOpenDir, 'tableRec_excel_output')
- train_split, val_split, test_split = partitionDialog.getDataPartition()
- # check validate
- if train_split + val_split + test_split > 100:
- msg = "The sum of training, validation and testing data should be less than 100%"
- QMessageBox.information(self, "Information", msg)
- return
- print(train_split, val_split, test_split)
- train_split, val_split, test_split = float(train_split) / 100., float(val_split) / 100., float(test_split) / 100.
- train_id = int(len(labeldict) * train_split)
- val_id = int(len(labeldict) * (train_split + val_split))
- print('Data partition: train:', train_id,
- 'validation:', val_id - train_id,
- 'test:', len(labeldict) - val_id)
-
- TableRec_excel_dir = os.path.join(self.lastOpenDir, 'tableRec_excel_output')
- json_results = []
- imgid = 0
+ # save txt
+ fid = open(
+ "{}/gt.txt".format(self.lastOpenDir), "w", encoding='utf-8')
for image_path in labeldict.keys():
# load csv annotations
filename, _ = os.path.splitext(os.path.basename(image_path))
- csv_path = os.path.join(TableRec_excel_dir, filename + '.xlsx')
+ csv_path = os.path.join(
+ TableRec_excel_dir, filename + '.xlsx')
if not os.path.exists(csv_path):
continue
@@ -2518,28 +2502,31 @@ class MainWindow(QMainWindow):
cells = []
for anno in labeldict[image_path]:
tokens = list(anno['transcription'])
- obb = anno['points']
- hbb = OBB2HBB(np.array(obb)).tolist()
- cells.append({'tokens': tokens, 'bbox': hbb})
-
- # data split
- if imgid < train_id:
- split = 'train'
- elif imgid < val_id:
- split = 'val'
- else:
- split = 'test'
-
- # save dict
- html = {'structure': {'tokens': token_list}, 'cell': cells}
- json_results.append({'filename': os.path.basename(image_path), 'split': split, 'imgid': imgid, 'html': html})
- imgid += 1
-
- # save json
- with open("{}/annotation.json".format(self.lastOpenDir), "w", encoding='utf-8') as fid:
- fid.write(json.dumps(json_results, ensure_ascii=False))
-
- msg = 'JSON sucessfully saved in {}/annotation.json'.format(self.lastOpenDir)
+ cells.append({
+ 'tokens': tokens,
+ 'bbox': anno['points']
+ })
+
+ # 构造标注信息
+ html = {
+ 'structure': {
+ 'tokens': token_list
+ },
+ 'cells': cells
+ }
+ d = {
+ 'filename': os.path.basename(image_path),
+ 'html': html
+ }
+ # 重构HTML
+ d['gt'] = rebuild_html_from_ppstructure_label(d)
+ fid.write('{}\n'.format(
+ json.dumps(
+ d, ensure_ascii=False)))
+
+ # convert to PP-Structure label format
+ fid.close()
+ msg = 'JSON sucessfully saved in {}/gt.txt'.format(self.lastOpenDir)
QMessageBox.information(self, "Information", msg)
def autolcm(self):
diff --git a/PPOCRLabel/README.md b/PPOCRLabel/README.md
index 3bdc336827adb87f52e9baa2c012304595b2c656..089a63fd55bb8c127104e7c404852ba52c3ac88c 100644
--- a/PPOCRLabel/README.md
+++ b/PPOCRLabel/README.md
@@ -1,10 +1,14 @@
English | [简体中文](README_ch.md)
-# PPOCRLabel
+# PPOCRLabelv2
-PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PP-OCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box, table and multi-point annotation modes. Annotations can be directly used for the training of PP-OCR detection and recognition models.
+PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PP-OCR model to automatically detect and re-recognize data. It is written in Python3 and PyQT5, supporting rectangular box, table, irregular text and key information annotation modes. Annotations can be directly used for the training of PP-OCR detection and recognition models.
-
+| regular text annotation | table annotation |
+| :-------------------------------------------------: | :--------------------------------------------: |
+|
|
|
+| **irregular text annotation** | **key information annotation** |
+|
|
|
### Recent Update
diff --git a/PPOCRLabel/README_ch.md b/PPOCRLabel/README_ch.md
index 107f902a68bd68b30d286e8dd88b29752f0c6ad0..3ea684a3f09a6084403fa0b91e2511b7fd790f4b 100644
--- a/PPOCRLabel/README_ch.md
+++ b/PPOCRLabel/README_ch.md
@@ -1,10 +1,14 @@
[English](README.md) | 简体中文
-# PPOCRLabel
+# PPOCRLabelv2
PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置PP-OCR模型对数据自动标注和重新识别。使用Python3和PyQT5编写,支持矩形框标注和四点标注模式,导出格式可直接用于PaddleOCR检测和识别模型的训练。
-
+| 常规标注 | 表格标注 |
+| :-------------------------------------------------: | :--------------------------------------------: |
+|
|
|
+| **不规则文本标注** | **关键信息标注** |
+|
|
|
#### 近期更新
- 2022.05:**新增表格标注**,使用方法见下方`2.2 表格标注`(by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest))
diff --git a/PPOCRLabel/libs/dataPartitionDialog.py b/PPOCRLabel/libs/dataPartitionDialog.py
deleted file mode 100644
index 33bd491552fe773bd07020d82f7ea9bab76e7557..0000000000000000000000000000000000000000
--- a/PPOCRLabel/libs/dataPartitionDialog.py
+++ /dev/null
@@ -1,113 +0,0 @@
-try:
- from PyQt5.QtGui import *
- from PyQt5.QtCore import *
- from PyQt5.QtWidgets import *
-except ImportError:
- from PyQt4.QtGui import *
- from PyQt4.QtCore import *
-
-from libs.utils import newIcon
-
-import time
-import datetime
-import json
-import cv2
-import numpy as np
-
-
-BB = QDialogButtonBox
-
-class DataPartitionDialog(QDialog):
- def __init__(self, parent=None):
- super().__init__()
- self.parnet = parent
- self.title = 'DATA PARTITION'
-
- self.train_ratio = 70
- self.val_ratio = 15
- self.test_ratio = 15
-
- self.initUI()
-
- def initUI(self):
- self.setWindowTitle(self.title)
- self.setWindowModality(Qt.ApplicationModal)
-
- self.flag_accept = True
-
- if self.parnet.lang == 'ch':
- msg = "导出JSON前请保存所有图像的标注且关闭EXCEL!"
- else:
- msg = "Please save all the annotations and close the EXCEL before exporting JSON!"
-
- info_msg = QLabel(msg, self)
- info_msg.setWordWrap(True)
- info_msg.setStyleSheet("color: red")
- info_msg.setFont(QFont('Arial', 12))
-
- train_lbl = QLabel('Train split: ', self)
- train_lbl.setFont(QFont('Arial', 15))
- val_lbl = QLabel('Valid split: ', self)
- val_lbl.setFont(QFont('Arial', 15))
- test_lbl = QLabel('Test split: ', self)
- test_lbl.setFont(QFont('Arial', 15))
-
- self.train_input = QLineEdit(self)
- self.train_input.setFont(QFont('Arial', 15))
- self.val_input = QLineEdit(self)
- self.val_input.setFont(QFont('Arial', 15))
- self.test_input = QLineEdit(self)
- self.test_input.setFont(QFont('Arial', 15))
-
- self.train_input.setText(str(self.train_ratio))
- self.val_input.setText(str(self.val_ratio))
- self.test_input.setText(str(self.test_ratio))
-
- validator = QIntValidator(0, 100)
- self.train_input.setValidator(validator)
- self.val_input.setValidator(validator)
- self.test_input.setValidator(validator)
-
- gridlayout = QGridLayout()
- gridlayout.addWidget(info_msg, 0, 0, 1, 2)
- gridlayout.addWidget(train_lbl, 1, 0)
- gridlayout.addWidget(val_lbl, 2, 0)
- gridlayout.addWidget(test_lbl, 3, 0)
- gridlayout.addWidget(self.train_input, 1, 1)
- gridlayout.addWidget(self.val_input, 2, 1)
- gridlayout.addWidget(self.test_input, 3, 1)
-
- bb = BB(BB.Ok | BB.Cancel, Qt.Horizontal, self)
- bb.button(BB.Ok).setIcon(newIcon('done'))
- bb.button(BB.Cancel).setIcon(newIcon('undo'))
- bb.accepted.connect(self.validate)
- bb.rejected.connect(self.cancel)
- gridlayout.addWidget(bb, 4, 0, 1, 2)
-
- self.setLayout(gridlayout)
-
- self.show()
-
- def validate(self):
- self.flag_accept = True
- self.accept()
-
- def cancel(self):
- self.flag_accept = False
- self.reject()
-
- def getStatus(self):
- return self.flag_accept
-
- def getDataPartition(self):
- self.train_ratio = int(self.train_input.text())
- self.val_ratio = int(self.val_input.text())
- self.test_ratio = int(self.test_input.text())
-
- return self.train_ratio, self.val_ratio, self.test_ratio
-
- def closeEvent(self, event):
- self.flag_accept = False
- self.reject()
-
-
diff --git a/PPOCRLabel/libs/utils.py b/PPOCRLabel/libs/utils.py
index e397f139e0cf34de4fd517f920dd3fef12cc2cd7..1bd46ab4dac65f4e63e4ac4b2af5a8d295d89671 100644
--- a/PPOCRLabel/libs/utils.py
+++ b/PPOCRLabel/libs/utils.py
@@ -176,18 +176,6 @@ def boxPad(box, imgShape, pad : int) -> np.array:
return box
-def OBB2HBB(obb) -> np.array:
- """
- Convert Oriented Bounding Box to Horizontal Bounding Box.
- """
- hbb = np.zeros(4, dtype=np.int32)
- hbb[0] = min(obb[:, 0])
- hbb[1] = min(obb[:, 1])
- hbb[2] = max(obb[:, 0])
- hbb[3] = max(obb[:, 1])
- return hbb
-
-
def expand_list(merged, html_list):
'''
Fill blanks according to merged cells
@@ -232,6 +220,26 @@ def convert_token(html_list):
return token_list
+def rebuild_html_from_ppstructure_label(label_info):
+ from html import escape
+ html_code = label_info['html']['structure']['tokens'].copy()
+ to_insert = [
+ i for i, tag in enumerate(html_code) if tag in ('
', '>')
+ ]
+ for i, cell in zip(to_insert[::-1], label_info['html']['cells'][::-1]):
+ if cell['tokens']:
+ cell = [
+ escape(token) if len(token) == 1 else token
+ for token in cell['tokens']
+ ]
+ cell = ''.join(cell)
+ html_code.insert(i + 1, cell)
+ html_code = ''.join(html_code)
+ html_code = ''.format(
+ html_code)
+ return html_code
+
+
def stepsInfo(lang='en'):
if lang == 'ch':
msg = "1. 安装与运行:使用上述命令安装与运行程序。\n" \
diff --git a/PPOCRLabel/setup.py b/PPOCRLabel/setup.py
index 1ec54df11a75b8a7ad8f023ca4a5b24ef5343d71..1750f84b8259a237fb6bb1b5eb9dc33e29441bc1 100644
--- a/PPOCRLabel/setup.py
+++ b/PPOCRLabel/setup.py
@@ -33,7 +33,7 @@ setup(
package_dir={'PPOCRLabel': ''},
include_package_data=True,
entry_points={"console_scripts": ["PPOCRLabel= PPOCRLabel.PPOCRLabel:main"]},
- version='1.0.2',
+ version='2.1.1',
install_requires=requirements,
license='Apache License 2.0',
description='PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field, with built-in PPOCR model to automatically detect and re-recognize data. It is written in python3 and pyqt5, supporting rectangular box annotation and four-point annotation modes. Annotations can be directly used for the training of PPOCR detection and recognition models',
diff --git a/README.md b/README.md
index 75828c3589a78e33a8c4feb15a771c115a33e5e7..62cc8536da3e7cd6d49aea19b85e19cc2537d642 100644
--- a/README.md
+++ b/README.md
@@ -123,7 +123,7 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel
- [Inference and Deployment](./deploy/README.md)
- [Python Inference](./ppstructure/docs/inference_en.md)
- [C++ Inference](./deploy/cpp_infer/readme.md)
- - [Serving](./deploy/pdserving/README.md)
+ - [Serving](./deploy/hubserving/readme_en.md)
- [Academic Algorithms](./doc/doc_en/algorithm_overview_en.md)
- [Text detection](./doc/doc_en/algorithm_overview_en.md)
- [Text recognition](./doc/doc_en/algorithm_overview_en.md)
diff --git a/README_ch.md b/README_ch.md
index 8ffa7a3755970374e1559d3c771bd82c02010a61..24a925f6c8092f28b58452e761ac74b0a5f3d2c3 100755
--- a/README_ch.md
+++ b/README_ch.md
@@ -135,7 +135,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力
- [推理部署](./deploy/README_ch.md)
- [基于Python预测引擎推理](./ppstructure/docs/inference.md)
- [基于C++预测引擎推理](./deploy/cpp_infer/readme_ch.md)
- - [服务化部署](./deploy/pdserving/README_CN.md)
+ - [服务化部署](./deploy/hubserving/readme.md)
- [前沿算法与模型🚀](./doc/doc_ch/algorithm_overview.md)
- [文本检测算法](./doc/doc_ch/algorithm_overview.md)
- [文本识别算法](./doc/doc_ch/algorithm_overview.md)
diff --git "a/applications/\345\217\221\347\245\250\345\205\263\351\224\256\344\277\241\346\201\257\346\212\275\345\217\226.md" "b/applications/\345\217\221\347\245\250\345\205\263\351\224\256\344\277\241\346\201\257\346\212\275\345\217\226.md"
index 14a6a1c8f1dd2350767afa162063b06791e79dd4..82f5b8d48600c6bebb4d3183ee801305d305d531 100644
--- "a/applications/\345\217\221\347\245\250\345\205\263\351\224\256\344\277\241\346\201\257\346\212\275\345\217\226.md"
+++ "b/applications/\345\217\221\347\245\250\345\205\263\351\224\256\344\277\241\346\201\257\346\212\275\345\217\226.md"
@@ -30,7 +30,7 @@ cd PaddleOCR
# 安装PaddleOCR的依赖
pip install -r requirements.txt
# 安装关键信息抽取任务的依赖
-pip install -r ./ppstructure/vqa/requirements.txt
+pip install -r ./ppstructure/kie/requirements.txt
```
## 4. 关键信息抽取
@@ -94,7 +94,7 @@ VI-LayoutXLM的配置为[ser_vi_layoutxlm_xfund_zh_udml.yml](../configs/kie/vi_l
```yml
Architecture:
- model_type: &model_type "vqa"
+ model_type: &model_type "kie"
name: DistillationModel
algorithm: Distillation
Models:
@@ -177,7 +177,7 @@ python3 tools/eval.py -c ./fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.
使用下面的命令进行预测。
```bash
-python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/XFUND/zh_val/val.json Global.infer_mode=False
+python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/XFUND/zh_val/val.json Global.infer_mode=False
```
预测结果会保存在配置文件中的`Global.save_res_path`目录中。
@@ -195,7 +195,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect
```bash
-python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True
+python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True
```
结果如下所示。
@@ -211,7 +211,7 @@ python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architect
如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型,可以使用下面的方法传入检测与识别的inference 模型路径,即可完成OCR文本检测与识别以及SER的串联过程。
```bash
-python3 tools/infer_vqa_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
+python3 tools/infer_kie_token_ser.py -c fapiao/ser_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/imgs/b25.jpg Global.infer_mode=True Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
```
### 4.4 关系抽取(Relation Extraction)
@@ -316,7 +316,7 @@ python3 tools/eval.py -c ./fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.c
# -o 后面的字段是RE任务的配置
# -c_ser 后面的是SER任务的配置文件
# -c_ser 后面的字段是SER任务的配置
-python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=False -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy
+python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_trained/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=False -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_trained/best_accuracy
```
预测结果会保存在配置文件中的`Global.save_res_path`目录中。
@@ -333,11 +333,11 @@ python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Archite
如果希望使用OCR引擎结果得到的结果进行推理,则可以使用下面的命令进行推理。
```bash
-python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy
+python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy
```
如果希望构建基于你在垂类场景训练得到的OCR检测与识别模型,可以使用下面的方法传入,即可完成SER + RE的串联过程。
```bash
-python3 tools/infer_vqa_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
+python3 tools/infer_kie_token_ser_re.py -c fapiao/re_vi_layoutxlm.yml -o Architecture.Backbone.checkpoints=fapiao/models/re_vi_layoutxlm_fapiao_udml/best_accuracy Global.infer_img=./train_data/zzsfp/val.json Global.infer_mode=True -c_ser fapiao/ser_vi_layoutxlm.yml -o_ser Architecture.Backbone.checkpoints=fapiao/models/ser_vi_layoutxlm_fapiao_udml/best_accuracy Global.kie_rec_model_dir="your_rec_model" Global.kie_det_model_dir="your_det_model"
```
diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml
index acf438950a43af3356c7ab0aadf956fdf226814e..0c6ab2a0d1d9733d647dc40a7b182fe201866a78 100644
--- a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml
+++ b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml
@@ -191,7 +191,6 @@ Eval:
channel_first: False
- DetLabelEncode: # Class handling label
- DetResizeForTest:
-# image_shape: [736, 1280]
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
diff --git a/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml b/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml
index ef58befd694e26704c734d7fd072ebc3370c8554..000d95e892cb8e6dcceeb7c22264c28934d1000c 100644
--- a/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml
+++ b/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml
@@ -24,6 +24,7 @@ Architecture:
model_type: det
Models:
Student:
+ pretrained:
model_type: det
algorithm: DB
Transform: null
@@ -40,6 +41,7 @@ Architecture:
name: DBHead
k: 50
Student2:
+ pretrained:
model_type: det
algorithm: DB
Transform: null
@@ -91,14 +93,11 @@ Loss:
- ["Student", "Student2"]
maps_name: "thrink_maps"
weight: 1.0
- # act: None
model_name_pairs: ["Student", "Student2"]
key: maps
- DistillationDBLoss:
weight: 1.0
model_name_list: ["Student", "Student2"]
- # key: maps
- # name: DBLoss
balance_loss: true
main_loss_type: DiceLoss
alpha: 5
@@ -197,6 +196,7 @@ Train:
drop_last: false
batch_size_per_card: 8
num_workers: 4
+
Eval:
dataset:
name: SimpleDataSet
@@ -204,31 +204,21 @@ Eval:
label_file_list:
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms:
- - DecodeImage:
- img_mode: BGR
- channel_first: false
- - DetLabelEncode: null
- - DetResizeForTest: null
- - NormalizeImage:
- scale: 1./255.
- mean:
- - 0.485
- - 0.456
- - 0.406
- std:
- - 0.229
- - 0.224
- - 0.225
- order: hwc
- - ToCHWImage: null
- - KeepKeys:
- keep_keys:
- - image
- - shape
- - polys
- - ignore_tags
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - DetLabelEncode: # Class handling label
+ - DetResizeForTest:
+ - NormalizeImage:
+ scale: 1./255.
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ order: 'hwc'
+ - ToCHWImage:
+ - KeepKeys:
+ keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
loader:
- shuffle: false
- drop_last: false
- batch_size_per_card: 1
- num_workers: 2
+ shuffle: False
+ drop_last: False
+ batch_size_per_card: 1 # must be 1
+ num_workers: 2
\ No newline at end of file
diff --git a/configs/det/det_r18_vd_ct.yml b/configs/det/det_r18_vd_ct.yml
new file mode 100644
index 0000000000000000000000000000000000000000..42922dfd22c0e49d20d50534c76fedae16b27a4a
--- /dev/null
+++ b/configs/det/det_r18_vd_ct.yml
@@ -0,0 +1,107 @@
+Global:
+ use_gpu: true
+ epoch_num: 600
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/det_ct/
+ save_epoch_step: 10
+ # evaluation is run every 2000 iterations
+ eval_batch_step: [0,1000]
+ cal_metric_during_train: False
+ pretrained_model: ./pretrain_models/ResNet18_vd_pretrained.pdparams
+ checkpoints:
+ save_inference_dir:
+ use_visualdl: False
+ infer_img: doc/imgs_en/img623.jpg
+ save_res_path: ./output/det_ct/predicts_ct.txt
+
+Architecture:
+ model_type: det
+ algorithm: CT
+ Transform:
+ Backbone:
+ name: ResNet_vd
+ layers: 18
+ Neck:
+ name: CTFPN
+ Head:
+ name: CT_Head
+ in_channels: 512
+ hidden_dim: 128
+ num_classes: 3
+
+Loss:
+ name: CTLoss
+
+Optimizer:
+ name: Adam
+ lr: #PolynomialDecay
+ name: Linear
+ learning_rate: 0.001
+ end_lr: 0.
+ epochs: 600
+ step_each_epoch: 1254
+ power: 0.9
+
+PostProcess:
+ name: CTPostProcess
+ box_type: poly
+
+Metric:
+ name: CTMetric
+ main_indicator: f_score
+
+Train:
+ dataset:
+ name: SimpleDataSet
+ data_dir: ./train_data/total_text/train
+ label_file_list:
+ - ./train_data/total_text/train/train.txt
+ ratio_list: [1.0]
+ transforms:
+ - DecodeImage:
+ img_mode: RGB
+ channel_first: False
+ - CTLabelEncode: # Class handling label
+ - RandomScale:
+ - MakeShrink:
+ - GroupRandomHorizontalFlip:
+ - GroupRandomRotate:
+ - GroupRandomCropPadding:
+ - MakeCentripetalShift:
+ - ColorJitter:
+ brightness: 0.125
+ saturation: 0.5
+ - ToCHWImage:
+ - NormalizeImage:
+ - KeepKeys:
+ keep_keys: ['image', 'gt_kernel', 'training_mask', 'gt_instance', 'gt_kernel_instance', 'training_mask_distance', 'gt_distance'] # the order of the dataloader list
+ loader:
+ shuffle: True
+ drop_last: True
+ batch_size_per_card: 4
+ num_workers: 8
+
+Eval:
+ dataset:
+ name: SimpleDataSet
+ data_dir: ./train_data/total_text/test
+ label_file_list:
+ - ./train_data/total_text/test/test.txt
+ ratio_list: [1.0]
+ transforms:
+ - DecodeImage:
+ img_mode: RGB
+ channel_first: False
+ - CTLabelEncode: # Class handling label
+ - ScaleAlignedShort:
+ - NormalizeImage:
+ order: 'hwc'
+ - ToCHWImage:
+ - KeepKeys:
+ keep_keys: ['image', 'shape', 'polys', 'texts'] # the order of the dataloader list
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size_per_card: 1
+ num_workers: 2
diff --git a/configs/e2e/e2e_r50_vd_pg.yml b/configs/e2e/e2e_r50_vd_pg.yml
index c4c5226e796a42db723ce78ef65473e357c25dc6..4642f544868f720d413f7f5242740705bc9fd0a5 100644
--- a/configs/e2e/e2e_r50_vd_pg.yml
+++ b/configs/e2e/e2e_r50_vd_pg.yml
@@ -13,6 +13,7 @@ Global:
save_inference_dir:
use_visualdl: False
infer_img:
+ infer_visual_type: EN # two mode: EN is for english datasets, CN is for chinese datasets
valid_set: totaltext # two mode: totaltext valid curved words, partvgg valid non-curved words
save_res_path: ./output/pgnet_r50_vd_totaltext/predicts_pgnet.txt
character_dict_path: ppocr/utils/ic15_dict.txt
@@ -32,6 +33,7 @@ Architecture:
name: PGFPN
Head:
name: PGHead
+ character_dict_path: ppocr/utils/ic15_dict.txt # the same as Global:character_dict_path
Loss:
name: PGLoss
@@ -45,16 +47,18 @@ Optimizer:
beta1: 0.9
beta2: 0.999
lr:
+ name: Cosine
learning_rate: 0.001
+ warmup_epoch: 50
regularizer:
name: 'L2'
- factor: 0
-
+ factor: 0.0001
PostProcess:
name: PGPostProcess
score_thresh: 0.5
mode: fast # fast or slow two ways
+ point_gather_mode: align # same as PGProcessTrain: point_gather_mode
Metric:
name: E2EMetric
@@ -76,9 +80,12 @@ Train:
- E2ELabelEncodeTrain:
- PGProcessTrain:
batch_size: 14 # same as loader: batch_size_per_card
+ use_resize: True
+ use_random_crop: False
min_crop_size: 24
min_text_size: 4
max_text_size: 512
+ point_gather_mode: align # two mode: align and none, align mode is better than none mode
- KeepKeys:
keep_keys: [ 'images', 'tcl_maps', 'tcl_label_maps', 'border_maps','direction_maps', 'training_masks', 'label_list', 'pos_list', 'pos_mask' ] # dataloader will return list in this order
loader:
diff --git a/configs/table/SLANet.yml b/configs/table/SLANet.yml
index 384c95852e815f9780328f63cbbd52fa0ef3deb4..a896614556e36f77bd784218b6c2f29914219dbe 100644
--- a/configs/table/SLANet.yml
+++ b/configs/table/SLANet.yml
@@ -12,7 +12,7 @@ Global:
checkpoints:
save_inference_dir: ./output/SLANet/infer
use_visualdl: False
- infer_img: doc/table/table.jpg
+ infer_img: ppstructure/docs/table/table.jpg
# for data or label process
character_dict_path: ppocr/utils/dict/table_structure_dict.txt
character_type: en
diff --git a/configs/table/SLANet_ch.yml b/configs/table/SLANet_ch.yml
index 997ff0a77b5ea824957abc1d32a7ba7f70abc12c..3b1e5c6bd9dd4cd2a084d557a1285983a56bdf2a 100644
--- a/configs/table/SLANet_ch.yml
+++ b/configs/table/SLANet_ch.yml
@@ -12,7 +12,7 @@ Global:
checkpoints:
save_inference_dir: ./output/SLANet_ch/infer
use_visualdl: False
- infer_img: doc/table/table.jpg
+ infer_img: ppstructure/docs/table/table.jpg
# for data or label process
character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
character_type: en
@@ -107,7 +107,7 @@ Train:
Eval:
dataset:
name: PubTabDataSet
- data_dir: train_data/table/val/
+ data_dir: train_data/table/val/
label_file_list: [train_data/table/val.txt]
transforms:
- DecodeImage:
diff --git a/deploy/cpp_infer/src/ocr_cls.cpp b/deploy/cpp_infer/src/ocr_cls.cpp
index 674630bf1e7e04841e027a7320d62af4a453ffc8..92d83600cea04419db231c0097caa53ed6fec58b 100644
--- a/deploy/cpp_infer/src/ocr_cls.cpp
+++ b/deploy/cpp_infer/src/ocr_cls.cpp
@@ -112,6 +112,11 @@ void Classifier::LoadModel(const std::string &model_dir) {
precision = paddle_infer::Config::Precision::kInt8;
}
config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
+ if (!Utility::PathExists("./trt_cls_shape.txt")){
+ config.CollectShapeRangeInfo("./trt_cls_shape.txt");
+ } else {
+ config.EnableTunedTensorRtDynamicShape("./trt_cls_shape.txt", true);
+ }
}
} else {
config.DisableGpu();
diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp
index 56de195186a0d4d6c8b2482eb57c106347485928..030d5c2f359bba522662324d84c6ef1cc0bc83b8 100644
--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -32,49 +32,13 @@ void DBDetector::LoadModel(const std::string &model_dir) {
if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8;
}
- config.EnableTensorRtEngine(1 << 20, 1, 20, precision, false, false);
- std::map> min_input_shape = {
- {"x", {1, 3, 50, 50}},
- {"conv2d_92.tmp_0", {1, 120, 20, 20}},
- {"conv2d_91.tmp_0", {1, 24, 10, 10}},
- {"conv2d_59.tmp_0", {1, 96, 20, 20}},
- {"nearest_interp_v2_1.tmp_0", {1, 256, 10, 10}},
- {"nearest_interp_v2_2.tmp_0", {1, 256, 20, 20}},
- {"conv2d_124.tmp_0", {1, 256, 20, 20}},
- {"nearest_interp_v2_3.tmp_0", {1, 64, 20, 20}},
- {"nearest_interp_v2_4.tmp_0", {1, 64, 20, 20}},
- {"nearest_interp_v2_5.tmp_0", {1, 64, 20, 20}},
- {"elementwise_add_7", {1, 56, 2, 2}},
- {"nearest_interp_v2_0.tmp_0", {1, 256, 2, 2}}};
- std::map> max_input_shape = {
- {"x", {1, 3, 1536, 1536}},
- {"conv2d_92.tmp_0", {1, 120, 400, 400}},
- {"conv2d_91.tmp_0", {1, 24, 200, 200}},
- {"conv2d_59.tmp_0", {1, 96, 400, 400}},
- {"nearest_interp_v2_1.tmp_0", {1, 256, 200, 200}},
- {"nearest_interp_v2_2.tmp_0", {1, 256, 400, 400}},
- {"conv2d_124.tmp_0", {1, 256, 400, 400}},
- {"nearest_interp_v2_3.tmp_0", {1, 64, 400, 400}},
- {"nearest_interp_v2_4.tmp_0", {1, 64, 400, 400}},
- {"nearest_interp_v2_5.tmp_0", {1, 64, 400, 400}},
- {"elementwise_add_7", {1, 56, 400, 400}},
- {"nearest_interp_v2_0.tmp_0", {1, 256, 400, 400}}};
- std::map> opt_input_shape = {
- {"x", {1, 3, 640, 640}},
- {"conv2d_92.tmp_0", {1, 120, 160, 160}},
- {"conv2d_91.tmp_0", {1, 24, 80, 80}},
- {"conv2d_59.tmp_0", {1, 96, 160, 160}},
- {"nearest_interp_v2_1.tmp_0", {1, 256, 80, 80}},
- {"nearest_interp_v2_2.tmp_0", {1, 256, 160, 160}},
- {"conv2d_124.tmp_0", {1, 256, 160, 160}},
- {"nearest_interp_v2_3.tmp_0", {1, 64, 160, 160}},
- {"nearest_interp_v2_4.tmp_0", {1, 64, 160, 160}},
- {"nearest_interp_v2_5.tmp_0", {1, 64, 160, 160}},
- {"elementwise_add_7", {1, 56, 40, 40}},
- {"nearest_interp_v2_0.tmp_0", {1, 256, 40, 40}}};
-
- config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
- opt_input_shape);
+ config.EnableTensorRtEngine(1 << 30, 1, 20, precision, false, false);
+ if (!Utility::PathExists("./trt_det_shape.txt")){
+ config.CollectShapeRangeInfo("./trt_det_shape.txt");
+ } else {
+ config.EnableTunedTensorRtDynamicShape("./trt_det_shape.txt", true);
+ }
+
}
} else {
config.DisableGpu();
diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp
index 0f90ddfab4872f97829da081e64cb7437e72493a..088cb942ba5ac4b09c9e8d1731a3b20d40967edf 100644
--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -147,20 +147,12 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8;
}
- config.EnableTensorRtEngine(1 << 20, 10, 15, precision, false, false);
- int imgH = this->rec_image_shape_[1];
- int imgW = this->rec_image_shape_[2];
- std::map> min_input_shape = {
- {"x", {1, 3, imgH, 10}}, {"lstm_0.tmp_0", {10, 1, 96}}};
- std::map> max_input_shape = {
- {"x", {this->rec_batch_num_, 3, imgH, 2500}},
- {"lstm_0.tmp_0", {1000, 1, 96}}};
- std::map> opt_input_shape = {
- {"x", {this->rec_batch_num_, 3, imgH, imgW}},
- {"lstm_0.tmp_0", {25, 1, 96}}};
-
- config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
- opt_input_shape);
+ if (!Utility::PathExists("./trt_rec_shape.txt")){
+ config.CollectShapeRangeInfo("./trt_rec_shape.txt");
+ } else {
+ config.EnableTunedTensorRtDynamicShape("./trt_rec_shape.txt", true);
+ }
+
}
} else {
config.DisableGpu();
diff --git a/deploy/lite/config.txt b/deploy/lite/config.txt
index dda0d2b0320544d3a82f59b0672c086c64d83d3d..404249323b6cb5de345438056a9a10abd64b38bc 100644
--- a/deploy/lite/config.txt
+++ b/deploy/lite/config.txt
@@ -5,4 +5,4 @@ det_db_unclip_ratio 1.6
det_db_use_dilate 0
det_use_polygon_score 1
use_direction_classify 1
-rec_image_height 32
\ No newline at end of file
+rec_image_height 48
\ No newline at end of file
diff --git a/deploy/lite/readme.md b/deploy/lite/readme.md
index a1bef8120e52dd91db0fda4ac2a4d91cc2800818..fc91cbfa7d69f6a8c1086243e4df3f820bd78339 100644
--- a/deploy/lite/readme.md
+++ b/deploy/lite/readme.md
@@ -99,6 +99,8 @@ The following table also provides a series of models that can be deployed on mob
|Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch|
|---|---|---|---|---|---|---|
+|PP-OCRv3|extra-lightweight chinese OCR optimized model|16.2M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.nb)|v2.10|
+|PP-OCRv3(slim)|extra-lightweight chinese OCR optimized model|5.9M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb)|v2.10|
|PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
|PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.6M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
@@ -134,17 +136,16 @@ Introduction to paddle_lite_opt parameters:
The following takes the ultra-lightweight Chinese model of PaddleOCR as an example to introduce the use of the compiled opt file to complete the conversion of the inference model to the Paddle-Lite optimized model
```
-# 【[Recommendation] Download the Chinese and English inference model of PP-OCRv2
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
+# 【[Recommendation] Download the Chinese and English inference model of PP-OCRv3
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar
# Convert detection model
-./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_det_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_det_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# Convert recognition model
-./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# Convert angle classifier model
-./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
-
+paddle_lite_opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
```
After the conversion is successful, there will be more files ending with `.nb` in the inference model directory, which is the successfully converted model file.
@@ -197,15 +198,15 @@ Some preparatory work is required first.
cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/
```
-Prepare the test image, taking PaddleOCR/doc/imgs/11.jpg as an example, copy the image file to the demo/cxx/ocr/debug/ folder. Prepare the model files optimized by the lite opt tool, ch_det_mv3_db_opt.nb, ch_rec_mv3_crnn_opt.nb, and place them under the demo/cxx/ocr/debug/ folder.
+Prepare the test image, taking PaddleOCR/doc/imgs/11.jpg as an example, copy the image file to the demo/cxx/ocr/debug/ folder. Prepare the model files optimized by the lite opt tool, ch_PP-OCRv3_det_slim_opt.nb , ch_PP-OCRv3_rec_slim_opt.nb , and place them under the demo/cxx/ocr/debug/ folder.
The structure of the OCR demo is as follows after the above command is executed:
```
demo/cxx/ocr/
|-- debug/
-| |--ch_PP-OCRv2_det_slim_opt.nb Detection model
-| |--ch_PP-OCRv2_rec_slim_opt.nb Recognition model
+| |--ch_PP-OCRv3_det_slim_opt.nb Detection model
+| |--ch_PP-OCRv3_rec_slim_opt.nb Recognition model
| |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb Text direction classification model
| |--11.jpg Image for OCR
| |--ppocr_keys_v1.txt Dictionary file
@@ -240,7 +241,7 @@ det_db_thresh 0.3 # Used to filter the binarized image of DB prediction,
det_db_box_thresh 0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate
det_db_unclip_ratio 1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text
use_direction_classify 0 # Whether to use the direction classifier, 0 means not to use, 1 means to use
-rec_image_height 32 # The height of the input image of the recognition model, the PP-OCRv3 model needs to be set to 48, and the PP-OCRv2 model needs to be set to 32
+rec_image_height 48 # The height of the input image of the recognition model, the PP-OCRv3 model needs to be set to 48, and the PP-OCRv2 model needs to be set to 32
```
5. Run Model on phone
@@ -260,14 +261,14 @@ After the above steps are completed, you can use adb to push the file to the pho
export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
# The use of ocr_db_crnn is:
# ./ocr_db_crnn Mode Detection model file Orientation classifier model file Recognition model file Hardware Precision Threads Batchsize Test image path Dictionary file path
- ./ocr_db_crnn system ch_PP-OCRv2_det_slim_opt.nb ch_PP-OCRv2_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True
+ ./ocr_db_crnn system ch_PP-OCRv3_det_slim_opt.nb ch_PP-OCRv3_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True
# precision can be INT8 for quantitative model or FP32 for normal model.
# Only using detection model
-./ocr_db_crnn det ch_PP-OCRv2_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt
+./ocr_db_crnn det ch_PP-OCRv3_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt
# Only using recognition model
-./ocr_db_crnn rec ch_PP-OCRv2_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
+./ocr_db_crnn rec ch_PP-OCRv3_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
```
If you modify the code, you need to recompile and push to the phone.
diff --git a/deploy/lite/readme_ch.md b/deploy/lite/readme_ch.md
index 0793827fe647c470944fc36e2b243c8f7e704e99..78e2510917e0fd85c4a724ec74eccb0b7cfc6118 100644
--- a/deploy/lite/readme_ch.md
+++ b/deploy/lite/readme_ch.md
@@ -97,6 +97,8 @@ Paddle-Lite 提供了多种策略来自动优化原始的模型,其中包括
|模型版本|模型简介|模型大小|检测模型|文本方向分类模型|识别模型|Paddle-Lite版本|
|---|---|---|---|---|---|---|
+|PP-OCRv3|蒸馏版超轻量中文OCR移动端模型|16.2M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.nb)|v2.10|
+|PP-OCRv3(slim)|蒸馏版超轻量中文OCR移动端模型|5.9M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb)|v2.10|
|PP-OCRv2|蒸馏版超轻量中文OCR移动端模型|11M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
|PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.6M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
@@ -131,16 +133,16 @@ paddle_lite_opt 参数介绍:
下面以PaddleOCR的超轻量中文模型为例,介绍使用编译好的opt文件完成inference模型到Paddle-Lite优化模型的转换。
```
-# 【推荐】 下载 PP-OCRv2版本的中英文 inference模型
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
+# 【推荐】 下载 PP-OCRv3版本的中英文 inference模型
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar
# 转换检测模型
-./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_det_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_det_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# 转换识别模型
-./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# 转换方向分类器模型
-./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
```
@@ -194,15 +196,15 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls
```
准备测试图像,以`PaddleOCR/doc/imgs/11.jpg`为例,将测试的图像复制到`demo/cxx/ocr/debug/`文件夹下。
- 准备lite opt工具优化后的模型文件,比如使用`ch_PP-OCRv2_det_slim_opt.ch_PP-OCRv2_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`,模型文件放置在`demo/cxx/ocr/debug/`文件夹下。
+ 准备lite opt工具优化后的模型文件,比如使用`ch_PP-OCRv3_det_slim_opt.ch_PP-OCRv3_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`,模型文件放置在`demo/cxx/ocr/debug/`文件夹下。
执行完成后,ocr文件夹下将有如下文件格式:
```
demo/cxx/ocr/
|-- debug/
-| |--ch_PP-OCRv2_det_slim_opt.nb 优化后的检测模型文件
-| |--ch_PP-OCRv2_rec_slim_opt.nb 优化后的识别模型文件
+| |--ch_PP-OCRv3_det_slim_opt.nb 优化后的检测模型文件
+| |--ch_PP-OCRv3_rec_slim_opt.nb 优化后的识别模型文件
| |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb 优化后的文字方向分类器模型文件
| |--11.jpg 待测试图像
| |--ppocr_keys_v1.txt 中文字典文件
@@ -239,7 +241,7 @@ det_db_thresh 0.3 # 用于过滤DB预测的二值化图像,设置为0.
det_db_box_thresh 0.5 # 检测器后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小
det_db_unclip_ratio 1.6 # 表示文本框的紧致程度,越小则文本框更靠近文本
use_direction_classify 0 # 是否使用方向分类器,0表示不使用,1表示使用
-rec_image_height 32 # 识别模型输入图像的高度,PP-OCRv3模型设置为48,PP-OCRv2模型需要设置为32
+rec_image_height 48 # 识别模型输入图像的高度,PP-OCRv3模型设置为48,PP-OCRv2模型需要设置为32
```
5. 启动调试
@@ -259,13 +261,13 @@ rec_image_height 32 # 识别模型输入图像的高度,PP-OCRv3模型
export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
# 开始使用,ocr_db_crnn可执行文件的使用方式为:
# ./ocr_db_crnn 预测模式 检测模型文件 方向分类器模型文件 识别模型文件 运行硬件 运行精度 线程数 batchsize 测试图像路径 参数配置路径 字典文件路径 是否使用benchmark参数
- ./ocr_db_crnn system ch_PP-OCRv2_det_slim_opt.nb ch_PP-OCRv2_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True
+ ./ocr_db_crnn system ch_PP-OCRv3_det_slim_opt.nb ch_PP-OCRv3_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True
# 仅使用文本检测模型,使用方式如下:
-./ocr_db_crnn det ch_PP-OCRv2_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt
+./ocr_db_crnn det ch_PP-OCRv3_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt
# 仅使用文本识别模型,使用方式如下:
-./ocr_db_crnn rec ch_PP-OCRv2_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
+./ocr_db_crnn rec ch_PP-OCRv3_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
```
如果对代码做了修改,则需要重新编译并push到手机上。
diff --git a/deploy/slim/quantization/README.md b/deploy/slim/quantization/README.md
index 4c1d784b99aade614d78b4bd6fb20afef15f0f6f..7f1ff7ae22e78cded28f1689d66a5e41dd8950a2 100644
--- a/deploy/slim/quantization/README.md
+++ b/deploy/slim/quantization/README.md
@@ -22,7 +22,7 @@
### 1. 安装PaddleSlim
```bash
-pip3 install paddleslim==2.2.2
+pip3 install paddleslim==2.3.2
```
### 2. 准备训练好的模型
@@ -33,17 +33,7 @@ PaddleOCR提供了一系列训练好的[模型](../../../doc/doc_ch/models_list.
量化训练包括离线量化训练和在线量化训练,在线量化训练效果更好,需加载预训练模型,在定义好量化策略后即可对模型进行量化。
-量化训练的代码位于slim/quantization/quant.py 中,比如训练检测模型,训练指令如下:
-```bash
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model='your trained model' Global.save_model_dir=./output/quant_model
-
-# 比如下载提供的训练模型
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
-tar -xf ch_ppocr_mobile_v2.0_det_train.tar
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.save_model_dir=./output/quant_model
-```
-
-模型蒸馏和模型量化可以同时使用,以PPOCRv3检测模型为例:
+量化训练的代码位于slim/quantization/quant.py 中,比如训练检测模型,以PPOCRv3检测模型为例,训练指令如下:
```
# 下载检测预训练模型:
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
@@ -58,7 +48,7 @@ python deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_
在得到量化训练保存的模型后,我们可以将其导出为inference_model,用于预测部署:
```bash
-python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
+python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
```
### 5. 量化模型部署
diff --git a/deploy/slim/quantization/README_en.md b/deploy/slim/quantization/README_en.md
index c6796ae9dc256496308e432023c45ef1026c3d92..f82c3d844e292ee76b95624f7632ed40301e5a4c 100644
--- a/deploy/slim/quantization/README_en.md
+++ b/deploy/slim/quantization/README_en.md
@@ -25,7 +25,7 @@ After training, if you want to further compress the model size and accelerate th
### 1. Install PaddleSlim
```bash
-pip3 install paddleslim==2.2.2
+pip3 install paddleslim==2.3.2
```
@@ -39,18 +39,7 @@ Quantization training includes offline quantization training and online quantiza
Online quantization training is more effective. It is necessary to load the pre-trained model.
After the quantization strategy is defined, the model can be quantified.
-The code for quantization training is located in `slim/quantization/quant.py`. For example, to train a detection model, the training instructions are as follows:
-```bash
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model='your trained model' Global.save_model_dir=./output/quant_model
-
-# download provided model
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
-tar -xf ch_ppocr_mobile_v2.0_det_train.tar
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.save_model_dir=./output/quant_model
-```
-
-
-Model distillation and model quantization can be used at the same time, taking the PPOCRv3 detection model as an example:
+The code for quantization training is located in `slim/quantization/quant.py`. For example, the training instructions of slim PPOCRv3 detection model are as follows:
```
# download provided model
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
@@ -66,7 +55,7 @@ If you want to quantify the text recognition model, you can modify the configura
Once we got the model after pruning and fine-tuning, we can export it as an inference model for the deployment of predictive tasks:
```bash
-python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
+python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
```
### 5. Deploy
diff --git a/deploy/slim/quantization/export_model.py b/deploy/slim/quantization/export_model.py
index fd1c3e5e109667fa74f5ade18b78f634e4d325db..bd132b625181cab853961efd2e2c38c411e9edf4 100755
--- a/deploy/slim/quantization/export_model.py
+++ b/deploy/slim/quantization/export_model.py
@@ -151,17 +151,24 @@ def main():
arch_config = config["Architecture"]
- arch_config = config["Architecture"]
+ if arch_config["algorithm"] == "SVTR" and arch_config["Head"][
+ "name"] != 'MultiHead':
+ input_shape = config["Eval"]["dataset"]["transforms"][-2][
+ 'SVTRRecResizeImg']['image_shape']
+ else:
+ input_shape = None
if arch_config["algorithm"] in ["Distillation", ]: # distillation model
archs = list(arch_config["Models"].values())
for idx, name in enumerate(model.model_name_list):
sub_model_save_path = os.path.join(save_path, name, "inference")
export_single_model(model.model_list[idx], archs[idx],
- sub_model_save_path, logger, quanter)
+ sub_model_save_path, logger, input_shape,
+ quanter)
else:
save_path = os.path.join(save_path, "inference")
- export_single_model(model, arch_config, save_path, logger, quanter)
+ export_single_model(model, arch_config, save_path, logger, input_shape,
+ quanter)
if __name__ == "__main__":
diff --git a/deploy/slim/quantization/quant.py b/deploy/slim/quantization/quant.py
index 64521b5e06df61cf656da4087e6cd49f82adfadd..ef2c3e28f94e8b72d1aa7822fc88ecfd5c406b89 100755
--- a/deploy/slim/quantization/quant.py
+++ b/deploy/slim/quantization/quant.py
@@ -158,8 +158,7 @@ def main(config, device, logger, vdl_writer):
pre_best_model_dict = dict()
# load fp32 model to begin quantization
- if config["Global"]["pretrained_model"] is not None:
- pre_best_model_dict = load_model(config, model)
+ pre_best_model_dict = load_model(config, model, None, config['Architecture']["model_type"])
freeze_params = False
if config['Architecture']["algorithm"] in ["Distillation"]:
@@ -184,8 +183,7 @@ def main(config, device, logger, vdl_writer):
model=model)
# resume PACT training process
- if config["Global"]["checkpoints"] is not None:
- pre_best_model_dict = load_model(config, model, optimizer)
+ pre_best_model_dict = load_model(config, model, optimizer, config['Architecture']["model_type"])
# build metric
eval_class = build_metric(config['Metric'])
diff --git a/deploy/slim/quantization/quant_kl.py b/deploy/slim/quantization/quant_kl.py
index cc3a455b971937fbb2e401b87112475341bd41f3..73e1a957e8606fd7cc8269e96eec1e274484db06 100755
--- a/deploy/slim/quantization/quant_kl.py
+++ b/deploy/slim/quantization/quant_kl.py
@@ -97,6 +97,17 @@ def sample_generator(loader):
return __reader__
+def sample_generator_layoutxlm_ser(loader):
+ def __reader__():
+ for indx, data in enumerate(loader):
+ input_ids = np.array(data[0])
+ bbox = np.array(data[1])
+ attention_mask = np.array(data[2])
+ token_type_ids = np.array(data[3])
+ images = np.array(data[4])
+ yield [input_ids, bbox, attention_mask, token_type_ids, images]
+
+ return __reader__
def main(config, device, logger, vdl_writer):
# init dist environment
@@ -107,16 +118,18 @@ def main(config, device, logger, vdl_writer):
# build dataloader
config['Train']['loader']['num_workers'] = 0
+ is_layoutxlm_ser = config['Architecture']['model_type'] =='kie' and config['Architecture']['Backbone']['name'] == 'LayoutXLMForSer'
train_dataloader = build_dataloader(config, 'Train', device, logger)
if config['Eval']:
config['Eval']['loader']['num_workers'] = 0
valid_dataloader = build_dataloader(config, 'Eval', device, logger)
+ if is_layoutxlm_ser:
+ train_dataloader = valid_dataloader
else:
valid_dataloader = None
paddle.enable_static()
- place = paddle.CPUPlace()
- exe = paddle.static.Executor(place)
+ exe = paddle.static.Executor(device)
if 'inference_model' in global_config.keys(): # , 'inference_model'):
inference_model_dir = global_config['inference_model']
@@ -127,6 +140,11 @@ def main(config, device, logger, vdl_writer):
raise ValueError(
"Please set inference model dir in Global.inference_model or Global.pretrained_model for post-quantazition"
)
+
+ if is_layoutxlm_ser:
+ generator = sample_generator_layoutxlm_ser(train_dataloader)
+ else:
+ generator = sample_generator(train_dataloader)
paddleslim.quant.quant_post_static(
executor=exe,
@@ -134,7 +152,7 @@ def main(config, device, logger, vdl_writer):
model_filename='inference.pdmodel',
params_filename='inference.pdiparams',
quantize_model_path=global_config['save_inference_dir'],
- sample_generator=sample_generator(train_dataloader),
+ sample_generator=generator,
save_model_filename='inference.pdmodel',
save_params_filename='inference.pdiparams',
batch_size=1,
diff --git a/doc/doc_ch/algorithm_det_ct.md b/doc/doc_ch/algorithm_det_ct.md
new file mode 100644
index 0000000000000000000000000000000000000000..ea3522b7bf3c2dc17ef4f645bc47738477f07cf1
--- /dev/null
+++ b/doc/doc_ch/algorithm_det_ct.md
@@ -0,0 +1,95 @@
+# CT
+
+- [1. 算法简介](#1)
+- [2. 环境配置](#2)
+- [3. 模型训练、评估、预测](#3)
+ - [3.1 训练](#3-1)
+ - [3.2 评估](#3-2)
+ - [3.3 预测](#3-3)
+- [4. 推理部署](#4)
+ - [4.1 Python推理](#4-1)
+ - [4.2 C++推理](#4-2)
+ - [4.3 Serving服务化部署](#4-3)
+ - [4.4 更多推理部署](#4-4)
+- [5. FAQ](#5)
+
+
+## 1. 算法简介
+
+论文信息:
+> [CentripetalText: An Efficient Text Instance Representation for Scene Text Detection](https://arxiv.org/abs/2107.05945)
+> Tao Sheng, Jie Chen, Zhouhui Lian
+> NeurIPS, 2021
+
+
+在Total-Text文本检测公开数据集上,算法复现效果如下:
+
+|模型|骨干网络|配置文件|precision|recall|Hmean|下载链接|
+| --- | --- | --- | --- | --- | --- | --- |
+|CT|ResNet18_vd|[configs/det/det_r18_vd_ct.yml](../../configs/det/det_r18_vd_ct.yml)|88.68%|81.70%|85.05%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r18_ct_train.tar)|
+
+
+
+## 2. 环境配置
+请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境,参考[《项目克隆》](./clone.md)克隆项目代码。
+
+
+
+## 3. 模型训练、评估、预测
+
+CT模型使用Total-Text文本检测公开数据集训练得到,数据集下载可参考 [Total-Text-Dataset](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset), 我们将标签文件转成了paddleocr格式,转换好的标签文件下载参考[train.txt](https://paddleocr.bj.bcebos.com/dataset/ct_tipc/train.txt), [text.txt](https://paddleocr.bj.bcebos.com/dataset/ct_tipc/test.txt)。
+
+请参考[文本检测训练教程](./detection.md)。PaddleOCR对代码进行了模块化,训练不同的检测模型只需要**更换配置文件**即可。
+
+
+
+## 4. 推理部署
+
+
+### 4.1 Python推理
+首先将CT文本检测训练过程中保存的模型,转换成inference model。以基于Resnet18_vd骨干网络,在Total-Text英文数据集训练的模型为例( [模型下载地址](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r18_ct_train.tar) ),可以使用如下命令进行转换:
+
+```shell
+python3 tools/export_model.py -c configs/det/det_r18_vd_ct.yml -o Global.pretrained_model=./det_r18_ct_train/best_accuracy Global.save_inference_dir=./inference/det_ct
+```
+
+CT文本检测模型推理,可以执行如下命令:
+
+```shell
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs_en/img623.jpg" --det_model_dir="./inference/det_ct/" --det_algorithm="CT"
+```
+
+可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'det_res'。结果示例如下:
+
+
+
+
+
+### 4.2 C++推理
+
+暂不支持
+
+
+### 4.3 Serving服务化部署
+
+暂不支持
+
+
+### 4.4 更多推理部署
+
+暂不支持
+
+
+## 5. FAQ
+
+
+## 引用
+
+```bibtex
+@inproceedings{sheng2021centripetaltext,
+ title={CentripetalText: An Efficient Text Instance Representation for Scene Text Detection},
+ author={Tao Sheng and Jie Chen and Zhouhui Lian},
+ booktitle={Thirty-Fifth Conference on Neural Information Processing Systems},
+ year={2021}
+}
+```
diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md
index 511e0421f1e249e340f2002a900b59633e31880e..315329464f15aa1127e34a38d3407a9c81dbc627 100644
--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -390,6 +390,7 @@ im_show.save('result.jpg')
| det_db_thresh | DB模型输出预测图的二值化阈值 | 0.3 |
| det_db_box_thresh | DB模型输出框的阈值,低于此值的预测框会被丢弃 | 0.5 |
| det_db_unclip_ratio | DB模型输出框扩大的比例 | 2 |
+| det_db_score_mode | 计算检测框score的方式,有'fast'和'slow',如果要检测的文字有弯曲,建议用'slow','slow'模式计算的box的score偏大,box不容易被过滤掉 | 'fast' |
| det_east_score_thresh | EAST模型输出预测图的二值化阈值 | 0.8 |
| det_east_cover_thresh | EAST模型输出框的阈值,低于此值的预测框会被丢弃 | 0.1 |
| det_east_nms_thresh | EAST模型输出框NMS的阈值 | 0.2 |
diff --git a/doc/doc_en/algorithm_det_ct_en.md b/doc/doc_en/algorithm_det_ct_en.md
new file mode 100644
index 0000000000000000000000000000000000000000..d56b3fc6b3353bacb1f26fba3873ba5276b10c8b
--- /dev/null
+++ b/doc/doc_en/algorithm_det_ct_en.md
@@ -0,0 +1,96 @@
+# CT
+
+- [1. Introduction](#1)
+- [2. Environment](#2)
+- [3. Model Training / Evaluation / Prediction](#3)
+ - [3.1 Training](#3-1)
+ - [3.2 Evaluation](#3-2)
+ - [3.3 Prediction](#3-3)
+- [4. Inference and Deployment](#4)
+ - [4.1 Python Inference](#4-1)
+ - [4.2 C++ Inference](#4-2)
+ - [4.3 Serving](#4-3)
+ - [4.4 More](#4-4)
+- [5. FAQ](#5)
+
+
+## 1. Introduction
+
+Paper:
+> [CentripetalText: An Efficient Text Instance Representation for Scene Text Detection](https://arxiv.org/abs/2107.05945)
+> Tao Sheng, Jie Chen, Zhouhui Lian
+> NeurIPS, 2021
+
+
+On the Total-Text dataset, the text detection result is as follows:
+
+|Model|Backbone|Configuration|Precision|Recall|Hmean|Download|
+| --- | --- | --- | --- | --- | --- | --- |
+|CT|ResNet18_vd|[configs/det/det_r18_vd_ct.yml](../../configs/det/det_r18_vd_ct.yml)|88.68%|81.70%|85.05%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r18_ct_train.tar)|
+
+
+
+## 2. Environment
+Please prepare your environment referring to [prepare the environment](./environment_en.md) and [clone the repo](./clone_en.md).
+
+
+
+## 3. Model Training / Evaluation / Prediction
+
+
+The above CT model is trained using the Total-Text text detection public dataset. For the download of the dataset, please refer to [Total-Text-Dataset](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset). PaddleOCR format annotation download link [train.txt](https://paddleocr.bj.bcebos.com/dataset/ct_tipc/train.txt), [test.txt](https://paddleocr.bj.bcebos.com/dataset/ct_tipc/test.txt).
+
+
+Please refer to [text detection training tutorial](./detection_en.md). PaddleOCR has modularized the code structure, so that you only need to **replace the configuration file** to train different detection models.
+
+
+## 4. Inference and Deployment
+
+
+### 4.1 Python Inference
+First, convert the model saved in the CT text detection training process into an inference model. Taking the model based on the Resnet18_vd backbone network and trained on the Total Text English dataset as example ([model download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r18_ct_train.tar)), you can use the following command to convert:
+
+```shell
+python3 tools/export_model.py -c configs/det/det_r18_vd_ct.yml -o Global.pretrained_model=./det_r18_ct_train/best_accuracy Global.save_inference_dir=./inference/det_ct
+```
+
+CT text detection model inference, you can execute the following command:
+
+```shell
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs_en/img623.jpg" --det_model_dir="./inference/det_ct/" --det_algorithm="CT"
+```
+
+The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'det_res'. Examples of results are as follows:
+
+
+
+
+
+### 4.2 C++ Inference
+
+Not supported
+
+
+### 4.3 Serving
+
+Not supported
+
+
+### 4.4 More
+
+Not supported
+
+
+## 5. FAQ
+
+
+## Citation
+
+```bibtex
+@inproceedings{sheng2021centripetaltext,
+ title={CentripetalText: An Efficient Text Instance Representation for Scene Text Detection},
+ author={Tao Sheng and Jie Chen and Zhouhui Lian},
+ booktitle={Thirty-Fifth Conference on Neural Information Processing Systems},
+ year={2021}
+}
+```
diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md
index d81e5532cf1db0193abf61b972420bdc3bacfd0b..da2dff67c16b4a9a0a653934b1f1df64cb6e9707 100644
--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@@ -342,6 +342,7 @@ im_show.save('result.jpg')
| det_db_thresh | Binarization threshold value of DB output map | 0.3 |
| det_db_box_thresh | The threshold value of the DB output box. Boxes score lower than this value will be discarded | 0.5 |
| det_db_unclip_ratio | The expanded ratio of DB output box | 2 |
+| det_db_score_mode | The parameter that control how the score of the detection frame is calculated. There are 'fast' and 'slow' options. If the text to be detected is curved, it is recommended to use 'slow' | 'fast' |
| det_east_score_thresh | Binarization threshold value of EAST output map | 0.8 |
| det_east_cover_thresh | The threshold value of the EAST output box. Boxes score lower than this value will be discarded | 0.1 |
| det_east_nms_thresh | The NMS threshold value of EAST model output box | 0.2 |
diff --git a/doc/imgs_results/det_res_img623_ct.jpg b/doc/imgs_results/det_res_img623_ct.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2c5f57d96cca896c70d9e0d33ba80a0177a8aeb9
Binary files /dev/null and b/doc/imgs_results/det_res_img623_ct.jpg differ
diff --git a/paddleocr.py b/paddleocr.py
index 0b7aed36279081f50208f75272fc54c5081929a7..fa732fc110dc7873f8d89b2ca2a21817a1e6d20d 100644
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -414,6 +414,33 @@ def get_model_config(type, version, model_type, lang):
return model_urls[version][model_type][lang]
+def img_decode(content: bytes):
+ np_arr = np.frombuffer(content, dtype=np.uint8)
+ return cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
+
+
+def check_img(img):
+ if isinstance(img, bytes):
+ img = img_decode(img)
+ if isinstance(img, str):
+ # download net image
+ if is_link(img):
+ download_with_progressbar(img, 'tmp.jpg')
+ img = 'tmp.jpg'
+ image_file = img
+ img, flag, _ = check_and_read(image_file)
+ if not flag:
+ with open(image_file, 'rb') as f:
+ img = img_decode(f.read())
+ if img is None:
+ logger.error("error in loading image:{}".format(image_file))
+ return None
+ if isinstance(img, np.ndarray) and len(img.shape) == 2:
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+
+ return img
+
+
class PaddleOCR(predict_system.TextSystem):
def __init__(self, **kwargs):
"""
@@ -482,7 +509,7 @@ class PaddleOCR(predict_system.TextSystem):
rec: use text recognition or not. If false, only det will be exec. Default is True
cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
"""
- assert isinstance(img, (np.ndarray, list, str))
+ assert isinstance(img, (np.ndarray, list, str, bytes))
if isinstance(img, list) and det == True:
logger.error('When input a list of images, det must be false')
exit(0)
@@ -491,22 +518,8 @@ class PaddleOCR(predict_system.TextSystem):
'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
)
- if isinstance(img, str):
- # download net image
- if img.startswith('http'):
- download_with_progressbar(img, 'tmp.jpg')
- img = 'tmp.jpg'
- image_file = img
- img, flag, _ = check_and_read(image_file)
- if not flag:
- with open(image_file, 'rb') as f:
- np_arr = np.frombuffer(f.read(), dtype=np.uint8)
- img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
- if img is None:
- logger.error("error in loading image:{}".format(image_file))
- return None
- if isinstance(img, np.ndarray) and len(img.shape) == 2:
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+ img = check_img(img)
+
if det and rec:
dt_boxes, rec_res, _ = self.__call__(img, cls)
return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
@@ -585,23 +598,7 @@ class PPStructure(StructureSystem):
super().__init__(params)
def __call__(self, img, return_ocr_result_in_table=False, img_idx=0):
- if isinstance(img, str):
- # download net image
- if img.startswith('http'):
- download_with_progressbar(img, 'tmp.jpg')
- img = 'tmp.jpg'
- image_file = img
- img, flag, _ = check_and_read(image_file)
- if not flag:
- with open(image_file, 'rb') as f:
- np_arr = np.frombuffer(f.read(), dtype=np.uint8)
- img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
- if img is None:
- logger.error("error in loading image:{}".format(image_file))
- return None
- if isinstance(img, np.ndarray) and len(img.shape) == 2:
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-
+ img = check_img(img)
res, _ = super().__call__(
img, return_ocr_result_in_table, img_idx=img_idx)
return res
@@ -644,7 +641,7 @@ def main():
if not flag_pdf:
if img is None:
- logger.error("error in loading image:{}".format(image_file))
+ logger.error("error in loading image:{}".format(img_path))
continue
img_paths = [[img_path, img]]
else:
diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py
index 102f48fcc19e59d9f8ffb0ad496f54cc64864f7d..863988cccfa9d9f2c865a444410d4245687f49ee 100644
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -43,6 +43,7 @@ from .vqa import *
from .fce_aug import *
from .fce_targets import FCENetTargets
+from .ct_process import *
def transform(data, ops=None):
diff --git a/ppocr/data/imaug/ct_process.py b/ppocr/data/imaug/ct_process.py
new file mode 100644
index 0000000000000000000000000000000000000000..59715090036e1020800950b02b9ea06ab5c8d4c2
--- /dev/null
+++ b/ppocr/data/imaug/ct_process.py
@@ -0,0 +1,355 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import cv2
+import random
+import pyclipper
+import paddle
+
+import numpy as np
+import Polygon as plg
+import scipy.io as scio
+
+from PIL import Image
+import paddle.vision.transforms as transforms
+
+
+class RandomScale():
+ def __init__(self, short_size=640, **kwargs):
+ self.short_size = short_size
+
+ def scale_aligned(self, img, scale):
+ oh, ow = img.shape[0:2]
+ h = int(oh * scale + 0.5)
+ w = int(ow * scale + 0.5)
+ if h % 32 != 0:
+ h = h + (32 - h % 32)
+ if w % 32 != 0:
+ w = w + (32 - w % 32)
+ img = cv2.resize(img, dsize=(w, h))
+ factor_h = h / oh
+ factor_w = w / ow
+ return img, factor_h, factor_w
+
+ def __call__(self, data):
+ img = data['image']
+
+ h, w = img.shape[0:2]
+ random_scale = np.array([0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3])
+ scale = (np.random.choice(random_scale) * self.short_size) / min(h, w)
+ img, factor_h, factor_w = self.scale_aligned(img, scale)
+
+ data['scale_factor'] = (factor_w, factor_h)
+ data['image'] = img
+ return data
+
+
+class MakeShrink():
+ def __init__(self, kernel_scale=0.7, **kwargs):
+ self.kernel_scale = kernel_scale
+
+ def dist(self, a, b):
+ return np.linalg.norm((a - b), ord=2, axis=0)
+
+ def perimeter(self, bbox):
+ peri = 0.0
+ for i in range(bbox.shape[0]):
+ peri += self.dist(bbox[i], bbox[(i + 1) % bbox.shape[0]])
+ return peri
+
+ def shrink(self, bboxes, rate, max_shr=20):
+ rate = rate * rate
+ shrinked_bboxes = []
+ for bbox in bboxes:
+ area = plg.Polygon(bbox).area()
+ peri = self.perimeter(bbox)
+
+ try:
+ pco = pyclipper.PyclipperOffset()
+ pco.AddPath(bbox, pyclipper.JT_ROUND,
+ pyclipper.ET_CLOSEDPOLYGON)
+ offset = min(
+ int(area * (1 - rate) / (peri + 0.001) + 0.5), max_shr)
+
+ shrinked_bbox = pco.Execute(-offset)
+ if len(shrinked_bbox) == 0:
+ shrinked_bboxes.append(bbox)
+ continue
+
+ shrinked_bbox = np.array(shrinked_bbox[0])
+ if shrinked_bbox.shape[0] <= 2:
+ shrinked_bboxes.append(bbox)
+ continue
+
+ shrinked_bboxes.append(shrinked_bbox)
+ except Exception as e:
+ shrinked_bboxes.append(bbox)
+
+ return shrinked_bboxes
+
+ def __call__(self, data):
+ img = data['image']
+ bboxes = data['polys']
+ words = data['texts']
+ scale_factor = data['scale_factor']
+
+ gt_instance = np.zeros(img.shape[0:2], dtype='uint8') # h,w
+ training_mask = np.ones(img.shape[0:2], dtype='uint8')
+ training_mask_distance = np.ones(img.shape[0:2], dtype='uint8')
+
+ for i in range(len(bboxes)):
+ bboxes[i] = np.reshape(bboxes[i] * (
+ [scale_factor[0], scale_factor[1]] * (bboxes[i].shape[0] // 2)),
+ (bboxes[i].shape[0] // 2, 2)).astype('int32')
+
+ for i in range(len(bboxes)):
+ #different value for different bbox
+ cv2.drawContours(gt_instance, [bboxes[i]], -1, i + 1, -1)
+
+ # set training mask to 0
+ cv2.drawContours(training_mask, [bboxes[i]], -1, 0, -1)
+
+ # for not accurate annotation, use training_mask_distance
+ if words[i] == '###' or words[i] == '???':
+ cv2.drawContours(training_mask_distance, [bboxes[i]], -1, 0, -1)
+
+ # make shrink
+ gt_kernel_instance = np.zeros(img.shape[0:2], dtype='uint8')
+ kernel_bboxes = self.shrink(bboxes, self.kernel_scale)
+ for i in range(len(bboxes)):
+ cv2.drawContours(gt_kernel_instance, [kernel_bboxes[i]], -1, i + 1,
+ -1)
+
+ # for training mask, kernel and background= 1, box region=0
+ if words[i] != '###' and words[i] != '???':
+ cv2.drawContours(training_mask, [kernel_bboxes[i]], -1, 1, -1)
+
+ gt_kernel = gt_kernel_instance.copy()
+ # for gt_kernel, kernel = 1
+ gt_kernel[gt_kernel > 0] = 1
+
+ # shrink 2 times
+ tmp1 = gt_kernel_instance.copy()
+ erode_kernel = np.ones((3, 3), np.uint8)
+ tmp1 = cv2.erode(tmp1, erode_kernel, iterations=1)
+ tmp2 = tmp1.copy()
+ tmp2 = cv2.erode(tmp2, erode_kernel, iterations=1)
+
+ # compute text region
+ gt_kernel_inner = tmp1 - tmp2
+
+ # gt_instance: text instance, bg=0, diff word use diff value
+ # training_mask: text instance mask, word=0,kernel and bg=1
+ # gt_kernel_instance: text kernel instance, bg=0, diff word use diff value
+ # gt_kernel: text_kernel, bg=0,diff word use same value
+ # gt_kernel_inner: text kernel reference
+ # training_mask_distance: word without anno = 0, else 1
+
+ data['image'] = [
+ img, gt_instance, training_mask, gt_kernel_instance, gt_kernel,
+ gt_kernel_inner, training_mask_distance
+ ]
+ return data
+
+
+class GroupRandomHorizontalFlip():
+ def __init__(self, p=0.5, **kwargs):
+ self.p = p
+
+ def __call__(self, data):
+ imgs = data['image']
+
+ if random.random() < self.p:
+ for i in range(len(imgs)):
+ imgs[i] = np.flip(imgs[i], axis=1).copy()
+ data['image'] = imgs
+ return data
+
+
+class GroupRandomRotate():
+ def __init__(self, **kwargs):
+ pass
+
+ def __call__(self, data):
+ imgs = data['image']
+
+ max_angle = 10
+ angle = random.random() * 2 * max_angle - max_angle
+ for i in range(len(imgs)):
+ img = imgs[i]
+ w, h = img.shape[:2]
+ rotation_matrix = cv2.getRotationMatrix2D((h / 2, w / 2), angle, 1)
+ img_rotation = cv2.warpAffine(
+ img, rotation_matrix, (h, w), flags=cv2.INTER_NEAREST)
+ imgs[i] = img_rotation
+
+ data['image'] = imgs
+ return data
+
+
+class GroupRandomCropPadding():
+ def __init__(self, target_size=(640, 640), **kwargs):
+ self.target_size = target_size
+
+ def __call__(self, data):
+ imgs = data['image']
+
+ h, w = imgs[0].shape[0:2]
+ t_w, t_h = self.target_size
+ p_w, p_h = self.target_size
+ if w == t_w and h == t_h:
+ return data
+
+ t_h = t_h if t_h < h else h
+ t_w = t_w if t_w < w else w
+
+ if random.random() > 3.0 / 8.0 and np.max(imgs[1]) > 0:
+ # make sure to crop the text region
+ tl = np.min(np.where(imgs[1] > 0), axis=1) - (t_h, t_w)
+ tl[tl < 0] = 0
+ br = np.max(np.where(imgs[1] > 0), axis=1) - (t_h, t_w)
+ br[br < 0] = 0
+ br[0] = min(br[0], h - t_h)
+ br[1] = min(br[1], w - t_w)
+
+ i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0
+ j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0
+ else:
+ i = random.randint(0, h - t_h) if h - t_h > 0 else 0
+ j = random.randint(0, w - t_w) if w - t_w > 0 else 0
+
+ n_imgs = []
+ for idx in range(len(imgs)):
+ if len(imgs[idx].shape) == 3:
+ s3_length = int(imgs[idx].shape[-1])
+ img = imgs[idx][i:i + t_h, j:j + t_w, :]
+ img_p = cv2.copyMakeBorder(
+ img,
+ 0,
+ p_h - t_h,
+ 0,
+ p_w - t_w,
+ borderType=cv2.BORDER_CONSTANT,
+ value=tuple(0 for i in range(s3_length)))
+ else:
+ img = imgs[idx][i:i + t_h, j:j + t_w]
+ img_p = cv2.copyMakeBorder(
+ img,
+ 0,
+ p_h - t_h,
+ 0,
+ p_w - t_w,
+ borderType=cv2.BORDER_CONSTANT,
+ value=(0, ))
+ n_imgs.append(img_p)
+
+ data['image'] = n_imgs
+ return data
+
+
+class MakeCentripetalShift():
+ def __init__(self, **kwargs):
+ pass
+
+ def jaccard(self, As, Bs):
+ A = As.shape[0] # small
+ B = Bs.shape[0] # large
+
+ dis = np.sqrt(
+ np.sum((As[:, np.newaxis, :].repeat(
+ B, axis=1) - Bs[np.newaxis, :, :].repeat(
+ A, axis=0))**2,
+ axis=-1))
+
+ ind = np.argmin(dis, axis=-1)
+
+ return ind
+
+ def __call__(self, data):
+ imgs = data['image']
+
+ img, gt_instance, training_mask, gt_kernel_instance, gt_kernel, gt_kernel_inner, training_mask_distance = \
+ imgs[0], imgs[1], imgs[2], imgs[3], imgs[4], imgs[5], imgs[6]
+
+ max_instance = np.max(gt_instance) # num bbox
+
+ # make centripetal shift
+ gt_distance = np.zeros((2, *img.shape[0:2]), dtype=np.float32)
+ for i in range(1, max_instance + 1):
+ # kernel_reference
+ ind = (gt_kernel_inner == i)
+
+ if np.sum(ind) == 0:
+ training_mask[gt_instance == i] = 0
+ training_mask_distance[gt_instance == i] = 0
+ continue
+
+ kpoints = np.array(np.where(ind)).transpose(
+ (1, 0))[:, ::-1].astype('float32')
+
+ ind = (gt_instance == i) * (gt_kernel_instance == 0)
+ if np.sum(ind) == 0:
+ continue
+ pixels = np.where(ind)
+
+ points = np.array(pixels).transpose(
+ (1, 0))[:, ::-1].astype('float32')
+
+ bbox_ind = self.jaccard(points, kpoints)
+
+ offset_gt = kpoints[bbox_ind] - points
+
+ gt_distance[:, pixels[0], pixels[1]] = offset_gt.T * 0.1
+
+ img = Image.fromarray(img)
+ img = img.convert('RGB')
+
+ data["image"] = img
+ data["gt_kernel"] = gt_kernel.astype("int64")
+ data["training_mask"] = training_mask.astype("int64")
+ data["gt_instance"] = gt_instance.astype("int64")
+ data["gt_kernel_instance"] = gt_kernel_instance.astype("int64")
+ data["training_mask_distance"] = training_mask_distance.astype("int64")
+ data["gt_distance"] = gt_distance.astype("float32")
+
+ return data
+
+
+class ScaleAlignedShort():
+ def __init__(self, short_size=640, **kwargs):
+ self.short_size = short_size
+
+ def __call__(self, data):
+ img = data['image']
+
+ org_img_shape = img.shape
+
+ h, w = img.shape[0:2]
+ scale = self.short_size * 1.0 / min(h, w)
+ h = int(h * scale + 0.5)
+ w = int(w * scale + 0.5)
+ if h % 32 != 0:
+ h = h + (32 - h % 32)
+ if w % 32 != 0:
+ w = w + (32 - w % 32)
+ img = cv2.resize(img, dsize=(w, h))
+
+ new_img_shape = img.shape
+ img_shape = np.array(org_img_shape + new_img_shape)
+
+ data['shape'] = img_shape
+ data['image'] = img
+
+ return data
\ No newline at end of file
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index 59cb9b8a253cf04244ebf83511ab412174487a53..dbfb93176cc782bedc8f7b33367b59046c4abec8 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -1395,3 +1395,29 @@ class VLLabelEncode(BaseRecLabelEncode):
data['label_res'] = np.array(label_res)
data['label_sub'] = np.array(label_sub)
return data
+
+
+class CTLabelEncode(object):
+ def __init__(self, **kwargs):
+ pass
+
+ def __call__(self, data):
+ label = data['label']
+
+ label = json.loads(label)
+ nBox = len(label)
+ boxes, txts = [], []
+ for bno in range(0, nBox):
+ box = label[bno]['points']
+ box = np.array(box)
+
+ boxes.append(box)
+ txt = label[bno]['transcription']
+ txts.append(txt)
+
+ if len(boxes) == 0:
+ return None
+
+ data['polys'] = boxes
+ data['texts'] = txts
+ return data
\ No newline at end of file
diff --git a/ppocr/data/imaug/operators.py b/ppocr/data/imaug/operators.py
index f8ed28929707eb750ad6e8499a73568cae3a8e6b..5e84b1aac9c54d8a8283468af6826ca917ba0384 100644
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -225,6 +225,8 @@ class DetResizeForTest(object):
def __call__(self, data):
img = data['image']
src_h, src_w, _ = img.shape
+ if sum([src_h, src_w]) < 64:
+ img = self.image_padding(img)
if self.resize_type == 0:
# img, shape = self.resize_image_type0(img)
@@ -238,6 +240,12 @@ class DetResizeForTest(object):
data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
return data
+ def image_padding(self, im, value=0):
+ h, w, c = im.shape
+ im_pad = np.zeros((max(32, h), max(32, w), c), np.uint8) + value
+ im_pad[:h, :w, :] = im
+ return im_pad
+
def resize_image_type1(self, img):
resize_h, resize_w = self.image_shape
ori_h, ori_w = img.shape[:2] # (h, w, c)
diff --git a/ppocr/data/imaug/pg_process.py b/ppocr/data/imaug/pg_process.py
index 53031064c019ddce00c7546f898ac67a7f0459f9..f1e5f912b7a55dc3b9e883a9f4f8c5de482dcd5a 100644
--- a/ppocr/data/imaug/pg_process.py
+++ b/ppocr/data/imaug/pg_process.py
@@ -15,6 +15,8 @@
import math
import cv2
import numpy as np
+from skimage.morphology._skeletonize import thin
+from ppocr.utils.e2e_utils.extract_textpoint_fast import sort_and_expand_with_direction_v2
__all__ = ['PGProcessTrain']
@@ -26,17 +28,24 @@ class PGProcessTrain(object):
max_text_nums,
tcl_len,
batch_size=14,
+ use_resize=True,
+ use_random_crop=False,
min_crop_size=24,
min_text_size=4,
max_text_size=512,
+ point_gather_mode=None,
**kwargs):
self.tcl_len = tcl_len
self.max_text_length = max_text_length
self.max_text_nums = max_text_nums
self.batch_size = batch_size
- self.min_crop_size = min_crop_size
+ if use_random_crop is True:
+ self.min_crop_size = min_crop_size
+ self.use_random_crop = use_random_crop
self.min_text_size = min_text_size
self.max_text_size = max_text_size
+ self.use_resize = use_resize
+ self.point_gather_mode = point_gather_mode
self.Lexicon_Table = self.get_dict(character_dict_path)
self.pad_num = len(self.Lexicon_Table)
self.img_id = 0
@@ -282,6 +291,95 @@ class PGProcessTrain(object):
pos_m[:keep] = 1.0
return pos_l, pos_m
+ def fit_and_gather_tcl_points_v3(self,
+ min_area_quad,
+ poly,
+ max_h,
+ max_w,
+ fixed_point_num=64,
+ img_id=0,
+ reference_height=3):
+ """
+ Find the center point of poly as key_points, then fit and gather.
+ """
+ det_mask = np.zeros((int(max_h / self.ds_ratio),
+ int(max_w / self.ds_ratio))).astype(np.float32)
+
+ # score_big_map
+ cv2.fillPoly(det_mask,
+ np.round(poly / self.ds_ratio).astype(np.int32), 1.0)
+ det_mask = cv2.resize(
+ det_mask, dsize=None, fx=self.ds_ratio, fy=self.ds_ratio)
+ det_mask = np.array(det_mask > 1e-3, dtype='float32')
+
+ f_direction = self.f_direction
+ skeleton_map = thin(det_mask.astype(np.uint8))
+ instance_count, instance_label_map = cv2.connectedComponents(
+ skeleton_map.astype(np.uint8), connectivity=8)
+
+ ys, xs = np.where(instance_label_map == 1)
+ pos_list = list(zip(ys, xs))
+ if len(pos_list) < 3:
+ return None
+ pos_list_sorted = sort_and_expand_with_direction_v2(
+ pos_list, f_direction, det_mask)
+
+ pos_list_sorted = np.array(pos_list_sorted)
+ length = len(pos_list_sorted) - 1
+ insert_num = 0
+ for index in range(length):
+ stride_y = np.abs(pos_list_sorted[index + insert_num][0] -
+ pos_list_sorted[index + 1 + insert_num][0])
+ stride_x = np.abs(pos_list_sorted[index + insert_num][1] -
+ pos_list_sorted[index + 1 + insert_num][1])
+ max_points = int(max(stride_x, stride_y))
+
+ stride = (pos_list_sorted[index + insert_num] -
+ pos_list_sorted[index + 1 + insert_num]) / (max_points)
+ insert_num_temp = max_points - 1
+
+ for i in range(int(insert_num_temp)):
+ insert_value = pos_list_sorted[index + insert_num] - (i + 1
+ ) * stride
+ insert_index = index + i + 1 + insert_num
+ pos_list_sorted = np.insert(
+ pos_list_sorted, insert_index, insert_value, axis=0)
+ insert_num += insert_num_temp
+
+ pos_info = np.array(pos_list_sorted).reshape(-1, 2).astype(
+ np.float32) # xy-> yx
+
+ point_num = len(pos_info)
+ if point_num > fixed_point_num:
+ keep_ids = [
+ int((point_num * 1.0 / fixed_point_num) * x)
+ for x in range(fixed_point_num)
+ ]
+ pos_info = pos_info[keep_ids, :]
+
+ keep = int(min(len(pos_info), fixed_point_num))
+ reference_width = (np.abs(poly[0, 0, 0] - poly[-1, 1, 0]) +
+ np.abs(poly[0, 3, 0] - poly[-1, 2, 0])) // 2
+ if np.random.rand() < 1:
+ dh = (np.random.rand(keep) - 0.5) * reference_height
+ offset = np.random.rand() - 0.5
+ dw = np.array([[0, offset * reference_width * 0.2]])
+ random_float_h = np.array([1, 0]).reshape([1, 2]) * dh.reshape(
+ [keep, 1])
+ random_float_w = dw.repeat(keep, axis=0)
+ pos_info += random_float_h
+ pos_info += random_float_w
+ pos_info[:, 0] = np.clip(pos_info[:, 0], 0, max_h - 1)
+ pos_info[:, 1] = np.clip(pos_info[:, 1], 0, max_w - 1)
+
+ # padding to fixed length
+ pos_l = np.zeros((self.tcl_len, 3), dtype=np.int32)
+ pos_l[:, 0] = np.ones((self.tcl_len, )) * img_id
+ pos_m = np.zeros((self.tcl_len, 1), dtype=np.float32)
+ pos_l[:keep, 1:] = np.round(pos_info).astype(np.int32)
+ pos_m[:keep] = 1.0
+ return pos_l, pos_m
+
def generate_direction_map(self, poly_quads, n_char, direction_map):
"""
"""
@@ -334,6 +432,7 @@ class PGProcessTrain(object):
"""
Generate polygon.
"""
+ self.ds_ratio = ds_ratio
score_map_big = np.zeros(
(
h,
@@ -384,7 +483,6 @@ class PGProcessTrain(object):
text_label = text_strs[poly_idx]
text_label = self.prepare_text_label(text_label,
self.Lexicon_Table)
-
text_label_index_list = [[self.Lexicon_Table.index(c_)]
for c_ in text_label
if c_ in self.Lexicon_Table]
@@ -432,14 +530,30 @@ class PGProcessTrain(object):
# pos info
average_shrink_height = self.calculate_average_height(
stcl_quads)
- pos_l, pos_m = self.fit_and_gather_tcl_points_v2(
- min_area_quad,
- poly,
- max_h=h,
- max_w=w,
- fixed_point_num=64,
- img_id=self.img_id,
- reference_height=average_shrink_height)
+
+ if self.point_gather_mode == 'align':
+ self.f_direction = direction_map[:, :, :-1].copy()
+ pos_res = self.fit_and_gather_tcl_points_v3(
+ min_area_quad,
+ stcl_quads,
+ max_h=h,
+ max_w=w,
+ fixed_point_num=64,
+ img_id=self.img_id,
+ reference_height=average_shrink_height)
+ if pos_res is None:
+ continue
+ pos_l, pos_m = pos_res[0], pos_res[1]
+
+ else:
+ pos_l, pos_m = self.fit_and_gather_tcl_points_v2(
+ min_area_quad,
+ poly,
+ max_h=h,
+ max_w=w,
+ fixed_point_num=64,
+ img_id=self.img_id,
+ reference_height=average_shrink_height)
label_l = text_label_index_list
if len(text_label_index_list) < 2:
@@ -770,27 +884,41 @@ class PGProcessTrain(object):
text_polys[:, :, 0] *= asp_wx
text_polys[:, :, 1] *= asp_hy
- h, w, _ = im.shape
- if max(h, w) > 2048:
- rd_scale = 2048.0 / max(h, w)
- im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
- text_polys *= rd_scale
- h, w, _ = im.shape
- if min(h, w) < 16:
- return None
-
- # no background
- im, text_polys, text_tags, hv_tags, text_strs = self.crop_area(
- im,
- text_polys,
- text_tags,
- hv_tags,
- text_strs,
- crop_background=False)
+ if self.use_resize is True:
+ ori_h, ori_w, _ = im.shape
+ if max(ori_h, ori_w) < 200:
+ ratio = 200 / max(ori_h, ori_w)
+ im = cv2.resize(im, (int(ori_w * ratio), int(ori_h * ratio)))
+ text_polys[:, :, 0] *= ratio
+ text_polys[:, :, 1] *= ratio
+
+ if max(ori_h, ori_w) > 512:
+ ratio = 512 / max(ori_h, ori_w)
+ im = cv2.resize(im, (int(ori_w * ratio), int(ori_h * ratio)))
+ text_polys[:, :, 0] *= ratio
+ text_polys[:, :, 1] *= ratio
+ elif self.use_random_crop is True:
+ h, w, _ = im.shape
+ if max(h, w) > 2048:
+ rd_scale = 2048.0 / max(h, w)
+ im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
+ text_polys *= rd_scale
+ h, w, _ = im.shape
+ if min(h, w) < 16:
+ return None
+
+ # no background
+ im, text_polys, text_tags, hv_tags, text_strs = self.crop_area(
+ im,
+ text_polys,
+ text_tags,
+ hv_tags,
+ text_strs,
+ crop_background=False)
if text_polys.shape[0] == 0:
return None
- # # continue for all ignore case
+ # continue for all ignore case
if np.sum((text_tags * 1.0)) >= text_tags.size:
return None
new_h, new_w, _ = im.shape
diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py
index a5e0de8496559a40d42641a043848d5d43c98de1..89022d85ad8f24f61ef7725319ab46be01fe4d16 100644
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -502,7 +502,7 @@ def resize_norm_img_chinese(img, image_shape):
max_wh_ratio = imgW * 1.0 / imgH
h, w = img.shape[0], img.shape[1]
ratio = w * 1.0 / h
- max_wh_ratio = max(max_wh_ratio, ratio)
+ max_wh_ratio = min(max(max_wh_ratio, ratio), max_wh_ratio)
imgW = int(imgH * max_wh_ratio)
if math.ceil(imgH * ratio) > imgW:
resized_w = imgW
diff --git a/ppocr/losses/__init__.py b/ppocr/losses/__init__.py
index 1a11778945c9d7b5f5519cd55473e8bf7790db2c..02525b3d50ad87509a6cba6fb2c1b00cb0add56e 100755
--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -25,6 +25,7 @@ from .det_east_loss import EASTLoss
from .det_sast_loss import SASTLoss
from .det_pse_loss import PSELoss
from .det_fce_loss import FCELoss
+from .det_ct_loss import CTLoss
# rec loss
from .rec_ctc_loss import CTCLoss
@@ -68,7 +69,7 @@ def build_loss(config):
'CELoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss',
'TableMasterLoss', 'SPINAttentionLoss', 'VLLoss', 'StrokeFocusLoss',
- 'SLALoss'
+ 'SLALoss', 'CTLoss'
]
config = copy.deepcopy(config)
module_name = config.pop('name')
diff --git a/ppocr/losses/basic_loss.py b/ppocr/losses/basic_loss.py
index da9faa08bc5ca35c5d65f7a7bfbbdd67192f052b..58410b4db2157074c2cb0f7db590c84021e10ace 100644
--- a/ppocr/losses/basic_loss.py
+++ b/ppocr/losses/basic_loss.py
@@ -60,19 +60,19 @@ class KLJSLoss(object):
], "mode can only be one of ['kl', 'KL', 'js', 'JS']"
self.mode = mode
- def __call__(self, p1, p2, reduction="mean"):
+ def __call__(self, p1, p2, reduction="mean", eps=1e-5):
if self.mode.lower() == 'kl':
loss = paddle.multiply(p2,
- paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5))
- loss += paddle.multiply(
- p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5))
+ paddle.log((p2 + eps) / (p1 + eps) + eps))
+ loss += paddle.multiply(p1,
+ paddle.log((p1 + eps) / (p2 + eps) + eps))
loss *= 0.5
elif self.mode.lower() == "js":
loss = paddle.multiply(
- p2, paddle.log((2 * p2 + 1e-5) / (p1 + p2 + 1e-5) + 1e-5))
+ p2, paddle.log((2 * p2 + eps) / (p1 + p2 + eps) + eps))
loss += paddle.multiply(
- p1, paddle.log((2 * p1 + 1e-5) / (p1 + p2 + 1e-5) + 1e-5))
+ p1, paddle.log((2 * p1 + eps) / (p1 + p2 + eps) + eps))
loss *= 0.5
else:
raise ValueError(
diff --git a/ppocr/losses/det_ct_loss.py b/ppocr/losses/det_ct_loss.py
new file mode 100755
index 0000000000000000000000000000000000000000..f48c95be4f84e2d8520363379b3061fa4245c105
--- /dev/null
+++ b/ppocr/losses/det_ct_loss.py
@@ -0,0 +1,276 @@
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/shengtao96/CentripetalText/tree/main/models/loss
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+import numpy as np
+
+
+def ohem_single(score, gt_text, training_mask):
+ # online hard example mining
+
+ pos_num = int(paddle.sum(gt_text > 0.5)) - int(
+ paddle.sum((gt_text > 0.5) & (training_mask <= 0.5)))
+
+ if pos_num == 0:
+ # selected_mask = gt_text.copy() * 0 # may be not good
+ selected_mask = training_mask
+ selected_mask = paddle.cast(
+ selected_mask.reshape(
+ (1, selected_mask.shape[0], selected_mask.shape[1])), "float32")
+ return selected_mask
+
+ neg_num = int(paddle.sum((gt_text <= 0.5) & (training_mask > 0.5)))
+ neg_num = int(min(pos_num * 3, neg_num))
+
+ if neg_num == 0:
+ selected_mask = training_mask
+ selected_mask = paddle.cast(
+ selected_mask.reshape(
+ (1, selected_mask.shape[0], selected_mask.shape[1])), "float32")
+ return selected_mask
+
+ # hard example
+ neg_score = score[(gt_text <= 0.5) & (training_mask > 0.5)]
+ neg_score_sorted = paddle.sort(-neg_score)
+ threshold = -neg_score_sorted[neg_num - 1]
+
+ selected_mask = ((score >= threshold) |
+ (gt_text > 0.5)) & (training_mask > 0.5)
+ selected_mask = paddle.cast(
+ selected_mask.reshape(
+ (1, selected_mask.shape[0], selected_mask.shape[1])), "float32")
+ return selected_mask
+
+
+def ohem_batch(scores, gt_texts, training_masks):
+ selected_masks = []
+ for i in range(scores.shape[0]):
+ selected_masks.append(
+ ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[
+ i, :, :]))
+
+ selected_masks = paddle.cast(paddle.concat(selected_masks, 0), "float32")
+ return selected_masks
+
+
+def iou_single(a, b, mask, n_class):
+ EPS = 1e-6
+ valid = mask == 1
+ a = a[valid]
+ b = b[valid]
+ miou = []
+
+ # iou of each class
+ for i in range(n_class):
+ inter = paddle.cast(((a == i) & (b == i)), "float32")
+ union = paddle.cast(((a == i) | (b == i)), "float32")
+
+ miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS))
+ miou = sum(miou) / len(miou)
+ return miou
+
+
+def iou(a, b, mask, n_class=2, reduce=True):
+ batch_size = a.shape[0]
+
+ a = a.reshape((batch_size, -1))
+ b = b.reshape((batch_size, -1))
+ mask = mask.reshape((batch_size, -1))
+
+ iou = paddle.zeros((batch_size, ), dtype="float32")
+ for i in range(batch_size):
+ iou[i] = iou_single(a[i], b[i], mask[i], n_class)
+
+ if reduce:
+ iou = paddle.mean(iou)
+ return iou
+
+
+class DiceLoss(nn.Layer):
+ def __init__(self, loss_weight=1.0):
+ super(DiceLoss, self).__init__()
+ self.loss_weight = loss_weight
+
+ def forward(self, input, target, mask, reduce=True):
+ batch_size = input.shape[0]
+ input = F.sigmoid(input) # scale to 0-1
+
+ input = input.reshape((batch_size, -1))
+ target = paddle.cast(target.reshape((batch_size, -1)), "float32")
+ mask = paddle.cast(mask.reshape((batch_size, -1)), "float32")
+
+ input = input * mask
+ target = target * mask
+
+ a = paddle.sum(input * target, axis=1)
+ b = paddle.sum(input * input, axis=1) + 0.001
+ c = paddle.sum(target * target, axis=1) + 0.001
+ d = (2 * a) / (b + c)
+ loss = 1 - d
+
+ loss = self.loss_weight * loss
+
+ if reduce:
+ loss = paddle.mean(loss)
+
+ return loss
+
+
+class SmoothL1Loss(nn.Layer):
+ def __init__(self, beta=1.0, loss_weight=1.0):
+ super(SmoothL1Loss, self).__init__()
+ self.beta = beta
+ self.loss_weight = loss_weight
+
+ np_coord = np.zeros(shape=[640, 640, 2], dtype=np.int64)
+ for i in range(640):
+ for j in range(640):
+ np_coord[i, j, 0] = j
+ np_coord[i, j, 1] = i
+ np_coord = np_coord.reshape((-1, 2))
+
+ self.coord = self.create_parameter(
+ shape=[640 * 640, 2],
+ dtype="int32", # NOTE: not support "int64" before paddle 2.3.1
+ default_initializer=nn.initializer.Assign(value=np_coord))
+ self.coord.stop_gradient = True
+
+ def forward_single(self, input, target, mask, beta=1.0, eps=1e-6):
+ batch_size = input.shape[0]
+
+ diff = paddle.abs(input - target) * mask.unsqueeze(1)
+ loss = paddle.where(diff < beta, 0.5 * diff * diff / beta,
+ diff - 0.5 * beta)
+ loss = paddle.cast(loss.reshape((batch_size, -1)), "float32")
+ mask = paddle.cast(mask.reshape((batch_size, -1)), "float32")
+ loss = paddle.sum(loss, axis=-1)
+ loss = loss / (mask.sum(axis=-1) + eps)
+
+ return loss
+
+ def select_single(self, distance, gt_instance, gt_kernel_instance,
+ training_mask):
+
+ with paddle.no_grad():
+ # paddle 2.3.1, paddle.slice not support:
+ # distance[:, self.coord[:, 1], self.coord[:, 0]]
+ select_distance_list = []
+ for i in range(2):
+ tmp1 = distance[i, :]
+ tmp2 = tmp1[self.coord[:, 1], self.coord[:, 0]]
+ select_distance_list.append(tmp2.unsqueeze(0))
+ select_distance = paddle.concat(select_distance_list, axis=0)
+
+ off_points = paddle.cast(
+ self.coord, "float32") + 10 * select_distance.transpose((1, 0))
+
+ off_points = paddle.cast(off_points, "int64")
+ off_points = paddle.clip(off_points, 0, distance.shape[-1] - 1)
+
+ selected_mask = (
+ gt_instance[self.coord[:, 1], self.coord[:, 0]] !=
+ gt_kernel_instance[off_points[:, 1], off_points[:, 0]])
+ selected_mask = paddle.cast(
+ selected_mask.reshape((1, -1, distance.shape[-1])), "int64")
+ selected_training_mask = selected_mask * training_mask
+
+ return selected_training_mask
+
+ def forward(self,
+ distances,
+ gt_instances,
+ gt_kernel_instances,
+ training_masks,
+ gt_distances,
+ reduce=True):
+
+ selected_training_masks = []
+ for i in range(distances.shape[0]):
+ selected_training_masks.append(
+ self.select_single(distances[i, :, :, :], gt_instances[i, :, :],
+ gt_kernel_instances[i, :, :], training_masks[
+ i, :, :]))
+ selected_training_masks = paddle.cast(
+ paddle.concat(selected_training_masks, 0), "float32")
+
+ loss = self.forward_single(distances, gt_distances,
+ selected_training_masks, self.beta)
+ loss = self.loss_weight * loss
+
+ with paddle.no_grad():
+ batch_size = distances.shape[0]
+ false_num = selected_training_masks.reshape((batch_size, -1))
+ false_num = false_num.sum(axis=-1)
+ total_num = paddle.cast(
+ training_masks.reshape((batch_size, -1)), "float32")
+ total_num = total_num.sum(axis=-1)
+ iou_text = (total_num - false_num) / (total_num + 1e-6)
+
+ if reduce:
+ loss = paddle.mean(loss)
+
+ return loss, iou_text
+
+
+class CTLoss(nn.Layer):
+ def __init__(self):
+ super(CTLoss, self).__init__()
+ self.kernel_loss = DiceLoss()
+ self.loc_loss = SmoothL1Loss(beta=0.1, loss_weight=0.05)
+
+ def forward(self, preds, batch):
+ imgs = batch[0]
+ out = preds['maps']
+ gt_kernels, training_masks, gt_instances, gt_kernel_instances, training_mask_distances, gt_distances = batch[
+ 1:]
+
+ kernels = out[:, 0, :, :]
+ distances = out[:, 1:, :, :]
+
+ # kernel loss
+ selected_masks = ohem_batch(kernels, gt_kernels, training_masks)
+
+ loss_kernel = self.kernel_loss(
+ kernels, gt_kernels, selected_masks, reduce=False)
+
+ iou_kernel = iou(paddle.cast((kernels > 0), "int64"),
+ gt_kernels,
+ training_masks,
+ reduce=False)
+ losses = dict(loss_kernels=loss_kernel, )
+
+ # loc loss
+ loss_loc, iou_text = self.loc_loss(
+ distances,
+ gt_instances,
+ gt_kernel_instances,
+ training_mask_distances,
+ gt_distances,
+ reduce=False)
+ losses.update(dict(loss_loc=loss_loc, ))
+
+ loss_all = loss_kernel + loss_loc
+ losses = {'loss': loss_all}
+
+ return losses
diff --git a/ppocr/losses/e2e_pg_loss.py b/ppocr/losses/e2e_pg_loss.py
index 10a8ed0aa907123b155976ba498426604f23c2b0..aff67b7ce3c208bf9c7b1371e095eac8c70ce9df 100644
--- a/ppocr/losses/e2e_pg_loss.py
+++ b/ppocr/losses/e2e_pg_loss.py
@@ -89,12 +89,13 @@ class PGLoss(nn.Layer):
tcl_pos = paddle.reshape(tcl_pos, [-1, 3])
tcl_pos = paddle.cast(tcl_pos, dtype=int)
f_tcl_char = paddle.gather_nd(f_char, tcl_pos)
- f_tcl_char = paddle.reshape(f_tcl_char,
- [-1, 64, 37]) # len(Lexicon_Table)+1
- f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2)
+ f_tcl_char = paddle.reshape(
+ f_tcl_char, [-1, 64, self.pad_num + 1]) # len(Lexicon_Table)+1
+ f_tcl_char_fg, f_tcl_char_bg = paddle.split(
+ f_tcl_char, [self.pad_num, 1], axis=2)
f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0
b, c, l = tcl_mask.shape
- tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l])
+ tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, self.pad_num * l])
tcl_mask_fg.stop_gradient = True
f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * (
-20.0)
diff --git a/ppocr/metrics/__init__.py b/ppocr/metrics/__init__.py
index 853647c06cf0519a0e049e14c16a0d3e26f9845b..a39d0a464f3f96b44d23cec55768223ca41311fa 100644
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
@@ -31,12 +31,14 @@ from .kie_metric import KIEMetric
from .vqa_token_ser_metric import VQASerTokenMetric
from .vqa_token_re_metric import VQAReTokenMetric
from .sr_metric import SRMetric
+from .ct_metric import CTMetric
+
def build_metric(config):
support_dict = [
"DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric",
"DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric',
- 'VQAReTokenMetric', 'SRMetric'
+ 'VQAReTokenMetric', 'SRMetric', 'CTMetric'
]
config = copy.deepcopy(config)
diff --git a/ppocr/metrics/ct_metric.py b/ppocr/metrics/ct_metric.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7634230a23027a5dd5c32a7b8eb87ee4a229076
--- /dev/null
+++ b/ppocr/metrics/ct_metric.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+from scipy import io
+import numpy as np
+
+from ppocr.utils.e2e_metric.Deteval import combine_results, get_score_C
+
+
+class CTMetric(object):
+ def __init__(self, main_indicator, delimiter='\t', **kwargs):
+ self.delimiter = delimiter
+ self.main_indicator = main_indicator
+ self.reset()
+
+ def reset(self):
+ self.results = [] # clear results
+
+ def __call__(self, preds, batch, **kwargs):
+ # NOTE: only support bs=1 now, as the label length of different sample is Unequal
+ assert len(
+ preds) == 1, "CentripetalText test now only suuport batch_size=1."
+ label = batch[2]
+ text = batch[3]
+ pred = preds[0]['points']
+ result = get_score_C(label, text, pred)
+
+ self.results.append(result)
+
+ def get_metric(self):
+ """
+ Input format: y0,x0, ..... yn,xn. Each detection is separated by the end of line token ('\n')'
+ """
+ metrics = combine_results(self.results, rec_flag=False)
+ self.reset()
+ return metrics
diff --git a/ppocr/modeling/heads/__init__.py b/ppocr/modeling/heads/__init__.py
index 0feda6c6e062fa314d97b8949d8545ed3305c22e..751757e5f176119688e2db47a68c514850b91823 100755
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -23,6 +23,7 @@ def build_head(config):
from .det_pse_head import PSEHead
from .det_fce_head import FCEHead
from .e2e_pg_head import PGHead
+ from .det_ct_head import CT_Head
# rec head
from .rec_ctc_head import CTCHead
@@ -52,7 +53,7 @@ def build_head(config):
'ClsHead', 'AttentionHead', 'SRNHead', 'PGHead', 'Transformer',
'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead',
'MultiHead', 'ABINetHead', 'TableMasterHead', 'SPINAttentionHead',
- 'VLHead', 'SLAHead', 'RobustScannerHead'
+ 'VLHead', 'SLAHead', 'RobustScannerHead', 'CT_Head'
]
#table head
diff --git a/ppocr/modeling/heads/det_ct_head.py b/ppocr/modeling/heads/det_ct_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..08e6719e8f0ade6887eb4ad7f44a2bc36ec132db
--- /dev/null
+++ b/ppocr/modeling/heads/det_ct_head.py
@@ -0,0 +1,69 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+import math
+from paddle.nn.initializer import TruncatedNormal, Constant, Normal
+ones_ = Constant(value=1.)
+zeros_ = Constant(value=0.)
+
+
+class CT_Head(nn.Layer):
+ def __init__(self,
+ in_channels,
+ hidden_dim,
+ num_classes,
+ loss_kernel=None,
+ loss_loc=None):
+ super(CT_Head, self).__init__()
+ self.conv1 = nn.Conv2D(
+ in_channels, hidden_dim, kernel_size=3, stride=1, padding=1)
+ self.bn1 = nn.BatchNorm2D(hidden_dim)
+ self.relu1 = nn.ReLU()
+
+ self.conv2 = nn.Conv2D(
+ hidden_dim, num_classes, kernel_size=1, stride=1, padding=0)
+
+ for m in self.sublayers():
+ if isinstance(m, nn.Conv2D):
+ n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
+ normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
+ normal_(m.weight)
+ elif isinstance(m, nn.BatchNorm2D):
+ zeros_(m.bias)
+ ones_(m.weight)
+
+ def _upsample(self, x, scale=1):
+ return F.upsample(x, scale_factor=scale, mode='bilinear')
+
+ def forward(self, f, targets=None):
+ out = self.conv1(f)
+ out = self.relu1(self.bn1(out))
+ out = self.conv2(out)
+
+ if self.training:
+ out = self._upsample(out, scale=4)
+ return {'maps': out}
+ else:
+ score = F.sigmoid(out[:, 0, :, :])
+ return {'maps': out, 'score': score}
diff --git a/ppocr/modeling/heads/e2e_pg_head.py b/ppocr/modeling/heads/e2e_pg_head.py
index 274e1cdac5172f45590c9f7d7b50522c74db6750..514962ef97e503d331b6351c6d314070dfd8b15f 100644
--- a/ppocr/modeling/heads/e2e_pg_head.py
+++ b/ppocr/modeling/heads/e2e_pg_head.py
@@ -66,8 +66,17 @@ class PGHead(nn.Layer):
"""
"""
- def __init__(self, in_channels, **kwargs):
+ def __init__(self,
+ in_channels,
+ character_dict_path='ppocr/utils/ic15_dict.txt',
+ **kwargs):
super(PGHead, self).__init__()
+
+ # get character_length
+ with open(character_dict_path, "rb") as fin:
+ lines = fin.readlines()
+ character_length = len(lines) + 1
+
self.conv_f_score1 = ConvBNLayer(
in_channels=in_channels,
out_channels=64,
@@ -178,7 +187,7 @@ class PGHead(nn.Layer):
name="conv_f_char{}".format(5))
self.conv3 = nn.Conv2D(
in_channels=256,
- out_channels=37,
+ out_channels=character_length,
kernel_size=3,
stride=1,
padding=1,
diff --git a/ppocr/modeling/heads/table_att_head.py b/ppocr/modeling/heads/table_att_head.py
index 00b434105bd9fe1f0d928c5f026dc5804b33fe23..d3c86e22b02e08c18d8d5cb193f2ffb8b07ad785 100644
--- a/ppocr/modeling/heads/table_att_head.py
+++ b/ppocr/modeling/heads/table_att_head.py
@@ -166,6 +166,7 @@ class SLAHead(nn.Layer):
self.max_text_length = max_text_length
self.emb = self._char_to_onehot
self.num_embeddings = out_channels
+ self.loc_reg_num = loc_reg_num
# structure
self.structure_attention_cell = AttentionGRUCell(
@@ -213,15 +214,17 @@ class SLAHead(nn.Layer):
fea = fea.transpose([0, 2, 1]) # (NTC)(batch, width, channels)
hidden = paddle.zeros((batch_size, self.hidden_size))
- structure_preds = []
- loc_preds = []
+ structure_preds = paddle.zeros((batch_size, self.max_text_length + 1, self.num_embeddings))
+ loc_preds = paddle.zeros((batch_size, self.max_text_length + 1, self.loc_reg_num))
+ structure_preds.stop_gradient = True
+ loc_preds.stop_gradient = True
if self.training and targets is not None:
structure = targets[0]
for i in range(self.max_text_length + 1):
hidden, structure_step, loc_step = self._decode(structure[:, i],
fea, hidden)
- structure_preds.append(structure_step)
- loc_preds.append(loc_step)
+ structure_preds[:, i, :] = structure_step
+ loc_preds[:, i, :] = loc_step
else:
pre_chars = paddle.zeros(shape=[batch_size], dtype="int32")
max_text_length = paddle.to_tensor(self.max_text_length)
@@ -231,10 +234,8 @@ class SLAHead(nn.Layer):
hidden, structure_step, loc_step = self._decode(pre_chars, fea,
hidden)
pre_chars = structure_step.argmax(axis=1, dtype="int32")
- structure_preds.append(structure_step)
- loc_preds.append(loc_step)
- structure_preds = paddle.stack(structure_preds, axis=1)
- loc_preds = paddle.stack(loc_preds, axis=1)
+ structure_preds[:, i, :] = structure_step
+ loc_preds[:, i, :] = loc_step
if not self.training:
structure_preds = F.softmax(structure_preds)
return {'structure_probs': structure_preds, 'loc_preds': loc_preds}
diff --git a/ppocr/modeling/necks/__init__.py b/ppocr/modeling/necks/__init__.py
index e3ae2d6ef27821f592645a4ba945d3feeaa8cf8a..c7e8dd068b4a68e56b066ca8fa629644a8f302c6 100644
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -26,13 +26,15 @@ def build_neck(config):
from .fce_fpn import FCEFPN
from .pren_fpn import PRENFPN
from .csp_pan import CSPPAN
+ from .ct_fpn import CTFPN
support_dict = [
'FPN', 'FCEFPN', 'LKPAN', 'DBFPN', 'RSEFPN', 'EASTFPN', 'SASTFPN',
- 'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN', 'CSPPAN'
+ 'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN', 'CSPPAN', 'CTFPN'
]
module_name = config.pop('name')
assert module_name in support_dict, Exception('neck only support {}'.format(
support_dict))
+
module_class = eval(module_name)(**config)
return module_class
diff --git a/ppocr/modeling/necks/ct_fpn.py b/ppocr/modeling/necks/ct_fpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee4d25e901b5b3093588571f0412a931eaf6f364
--- /dev/null
+++ b/ppocr/modeling/necks/ct_fpn.py
@@ -0,0 +1,185 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+import os
+import sys
+
+import math
+from paddle.nn.initializer import TruncatedNormal, Constant, Normal
+ones_ = Constant(value=1.)
+zeros_ = Constant(value=0.)
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..')))
+
+
+class Conv_BN_ReLU(nn.Layer):
+ def __init__(self,
+ in_planes,
+ out_planes,
+ kernel_size=1,
+ stride=1,
+ padding=0):
+ super(Conv_BN_ReLU, self).__init__()
+ self.conv = nn.Conv2D(
+ in_planes,
+ out_planes,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ bias_attr=False)
+ self.bn = nn.BatchNorm2D(out_planes)
+ self.relu = nn.ReLU()
+
+ for m in self.sublayers():
+ if isinstance(m, nn.Conv2D):
+ n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
+ normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
+ normal_(m.weight)
+ elif isinstance(m, nn.BatchNorm2D):
+ zeros_(m.bias)
+ ones_(m.weight)
+
+ def forward(self, x):
+ return self.relu(self.bn(self.conv(x)))
+
+
+class FPEM(nn.Layer):
+ def __init__(self, in_channels, out_channels):
+ super(FPEM, self).__init__()
+ planes = out_channels
+ self.dwconv3_1 = nn.Conv2D(
+ planes,
+ planes,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ groups=planes,
+ bias_attr=False)
+ self.smooth_layer3_1 = Conv_BN_ReLU(planes, planes)
+
+ self.dwconv2_1 = nn.Conv2D(
+ planes,
+ planes,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ groups=planes,
+ bias_attr=False)
+ self.smooth_layer2_1 = Conv_BN_ReLU(planes, planes)
+
+ self.dwconv1_1 = nn.Conv2D(
+ planes,
+ planes,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ groups=planes,
+ bias_attr=False)
+ self.smooth_layer1_1 = Conv_BN_ReLU(planes, planes)
+
+ self.dwconv2_2 = nn.Conv2D(
+ planes,
+ planes,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=planes,
+ bias_attr=False)
+ self.smooth_layer2_2 = Conv_BN_ReLU(planes, planes)
+
+ self.dwconv3_2 = nn.Conv2D(
+ planes,
+ planes,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=planes,
+ bias_attr=False)
+ self.smooth_layer3_2 = Conv_BN_ReLU(planes, planes)
+
+ self.dwconv4_2 = nn.Conv2D(
+ planes,
+ planes,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ groups=planes,
+ bias_attr=False)
+ self.smooth_layer4_2 = Conv_BN_ReLU(planes, planes)
+
+ def _upsample_add(self, x, y):
+ return F.upsample(x, scale_factor=2, mode='bilinear') + y
+
+ def forward(self, f1, f2, f3, f4):
+ # up-down
+ f3 = self.smooth_layer3_1(self.dwconv3_1(self._upsample_add(f4, f3)))
+ f2 = self.smooth_layer2_1(self.dwconv2_1(self._upsample_add(f3, f2)))
+ f1 = self.smooth_layer1_1(self.dwconv1_1(self._upsample_add(f2, f1)))
+
+ # down-up
+ f2 = self.smooth_layer2_2(self.dwconv2_2(self._upsample_add(f2, f1)))
+ f3 = self.smooth_layer3_2(self.dwconv3_2(self._upsample_add(f3, f2)))
+ f4 = self.smooth_layer4_2(self.dwconv4_2(self._upsample_add(f4, f3)))
+
+ return f1, f2, f3, f4
+
+
+class CTFPN(nn.Layer):
+ def __init__(self, in_channels, out_channel=128):
+ super(CTFPN, self).__init__()
+ self.out_channels = out_channel * 4
+
+ self.reduce_layer1 = Conv_BN_ReLU(in_channels[0], 128)
+ self.reduce_layer2 = Conv_BN_ReLU(in_channels[1], 128)
+ self.reduce_layer3 = Conv_BN_ReLU(in_channels[2], 128)
+ self.reduce_layer4 = Conv_BN_ReLU(in_channels[3], 128)
+
+ self.fpem1 = FPEM(in_channels=(64, 128, 256, 512), out_channels=128)
+ self.fpem2 = FPEM(in_channels=(64, 128, 256, 512), out_channels=128)
+
+ def _upsample(self, x, scale=1):
+ return F.upsample(x, scale_factor=scale, mode='bilinear')
+
+ def forward(self, f):
+ # # reduce channel
+ f1 = self.reduce_layer1(f[0]) # N,64,160,160 --> N, 128, 160, 160
+ f2 = self.reduce_layer2(f[1]) # N, 128, 80, 80 --> N, 128, 80, 80
+ f3 = self.reduce_layer3(f[2]) # N, 256, 40, 40 --> N, 128, 40, 40
+ f4 = self.reduce_layer4(f[3]) # N, 512, 20, 20 --> N, 128, 20, 20
+
+ # FPEM
+ f1_1, f2_1, f3_1, f4_1 = self.fpem1(f1, f2, f3, f4)
+ f1_2, f2_2, f3_2, f4_2 = self.fpem2(f1_1, f2_1, f3_1, f4_1)
+
+ # FFM
+ f1 = f1_1 + f1_2
+ f2 = f2_1 + f2_2
+ f3 = f3_1 + f3_2
+ f4 = f4_1 + f4_2
+
+ f2 = self._upsample(f2, scale=2)
+ f3 = self._upsample(f3, scale=4)
+ f4 = self._upsample(f4, scale=8)
+ ff = paddle.concat((f1, f2, f3, f4), 1) # N,512, 160,160
+ return ff
diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py
index 8f41a005f5b90e7edf11fad80b9b7eac89257160..35b7a6800da422264a796da14236ae8a484c30d9 100644
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -35,6 +35,7 @@ from .vqa_token_ser_layoutlm_postprocess import VQASerTokenLayoutLMPostProcess,
from .vqa_token_re_layoutlm_postprocess import VQAReTokenLayoutLMPostProcess, DistillationRePostProcess
from .table_postprocess import TableMasterLabelDecode, TableLabelDecode
from .picodet_postprocess import PicoDetPostProcess
+from .ct_postprocess import CTPostProcess
def build_post_process(config, global_config=None):
@@ -48,7 +49,7 @@ def build_post_process(config, global_config=None):
'DistillationSARLabelDecode', 'ViTSTRLabelDecode', 'ABINetLabelDecode',
'TableMasterLabelDecode', 'SPINLabelDecode',
'DistillationSerPostProcess', 'DistillationRePostProcess',
- 'VLLabelDecode', 'PicoDetPostProcess'
+ 'VLLabelDecode', 'PicoDetPostProcess', 'CTPostProcess'
]
if config['name'] == 'PSEPostProcess':
diff --git a/ppocr/postprocess/ct_postprocess.py b/ppocr/postprocess/ct_postprocess.py
new file mode 100755
index 0000000000000000000000000000000000000000..3ab90be24d65888339698a5abe2ed692ceaab4c7
--- /dev/null
+++ b/ppocr/postprocess/ct_postprocess.py
@@ -0,0 +1,154 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refered from:
+https://github.com/shengtao96/CentripetalText/blob/main/test.py
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import os.path as osp
+import numpy as np
+import cv2
+import paddle
+import pyclipper
+
+
+class CTPostProcess(object):
+ """
+ The post process for Centripetal Text (CT).
+ """
+
+ def __init__(self, min_score=0.88, min_area=16, box_type='poly', **kwargs):
+ self.min_score = min_score
+ self.min_area = min_area
+ self.box_type = box_type
+
+ self.coord = np.zeros((2, 300, 300), dtype=np.int32)
+ for i in range(300):
+ for j in range(300):
+ self.coord[0, i, j] = j
+ self.coord[1, i, j] = i
+
+ def __call__(self, preds, batch):
+ outs = preds['maps']
+ out_scores = preds['score']
+
+ if isinstance(outs, paddle.Tensor):
+ outs = outs.numpy()
+ if isinstance(out_scores, paddle.Tensor):
+ out_scores = out_scores.numpy()
+
+ batch_size = outs.shape[0]
+ boxes_batch = []
+ for idx in range(batch_size):
+ bboxes = []
+ scores = []
+
+ img_shape = batch[idx]
+
+ org_img_size = img_shape[:3]
+ img_shape = img_shape[3:]
+ img_size = img_shape[:2]
+
+ out = np.expand_dims(outs[idx], axis=0)
+ outputs = dict()
+
+ score = np.expand_dims(out_scores[idx], axis=0)
+
+ kernel = out[:, 0, :, :] > 0.2
+ loc = out[:, 1:, :, :].astype("float32")
+
+ score = score[0].astype(np.float32)
+ kernel = kernel[0].astype(np.uint8)
+ loc = loc[0].astype(np.float32)
+
+ label_num, label_kernel = cv2.connectedComponents(
+ kernel, connectivity=4)
+
+ for i in range(1, label_num):
+ ind = (label_kernel == i)
+ if ind.sum(
+ ) < 10: # pixel number less than 10, treated as background
+ label_kernel[ind] = 0
+
+ label = np.zeros_like(label_kernel)
+ h, w = label_kernel.shape
+ pixels = self.coord[:, :h, :w].reshape(2, -1)
+ points = pixels.transpose([1, 0]).astype(np.float32)
+
+ off_points = (points + 10. / 4. * loc[:, pixels[1], pixels[0]].T
+ ).astype(np.int32)
+ off_points[:, 0] = np.clip(off_points[:, 0], 0, label.shape[1] - 1)
+ off_points[:, 1] = np.clip(off_points[:, 1], 0, label.shape[0] - 1)
+
+ label[pixels[1], pixels[0]] = label_kernel[off_points[:, 1],
+ off_points[:, 0]]
+ label[label_kernel > 0] = label_kernel[label_kernel > 0]
+
+ score_pocket = [0.0]
+ for i in range(1, label_num):
+ ind = (label_kernel == i)
+ if ind.sum() == 0:
+ score_pocket.append(0.0)
+ continue
+ score_i = np.mean(score[ind])
+ score_pocket.append(score_i)
+
+ label_num = np.max(label) + 1
+ label = cv2.resize(
+ label, (img_size[1], img_size[0]),
+ interpolation=cv2.INTER_NEAREST)
+
+ scale = (float(org_img_size[1]) / float(img_size[1]),
+ float(org_img_size[0]) / float(img_size[0]))
+
+ for i in range(1, label_num):
+ ind = (label == i)
+ points = np.array(np.where(ind)).transpose((1, 0))
+
+ if points.shape[0] < self.min_area:
+ continue
+
+ score_i = score_pocket[i]
+ if score_i < self.min_score:
+ continue
+
+ if self.box_type == 'rect':
+ rect = cv2.minAreaRect(points[:, ::-1])
+ bbox = cv2.boxPoints(rect) * scale
+ z = bbox.mean(0)
+ bbox = z + (bbox - z) * 0.85
+ elif self.box_type == 'poly':
+ binary = np.zeros(label.shape, dtype='uint8')
+ binary[ind] = 1
+ try:
+ _, contours, _ = cv2.findContours(
+ binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+ except BaseException:
+ contours, _ = cv2.findContours(
+ binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+ bbox = contours[0] * scale
+
+ bbox = bbox.astype('int32')
+ bboxes.append(bbox.reshape(-1, 2))
+ scores.append(score_i)
+
+ boxes_batch.append({'points': bboxes})
+
+ return boxes_batch
diff --git a/ppocr/postprocess/pg_postprocess.py b/ppocr/postprocess/pg_postprocess.py
index 0b1455181fddb0adb5347406bb2eb3093ee6fb30..058cf8b907de296094d3ed2fc7e6981939ced328 100644
--- a/ppocr/postprocess/pg_postprocess.py
+++ b/ppocr/postprocess/pg_postprocess.py
@@ -30,12 +30,18 @@ class PGPostProcess(object):
The post process for PGNet.
"""
- def __init__(self, character_dict_path, valid_set, score_thresh, mode,
+ def __init__(self,
+ character_dict_path,
+ valid_set,
+ score_thresh,
+ mode,
+ point_gather_mode=None,
**kwargs):
self.character_dict_path = character_dict_path
self.valid_set = valid_set
self.score_thresh = score_thresh
self.mode = mode
+ self.point_gather_mode = point_gather_mode
# c++ la-nms is faster, but only support python 3.5
self.is_python35 = False
@@ -43,8 +49,13 @@ class PGPostProcess(object):
self.is_python35 = True
def __call__(self, outs_dict, shape_list):
- post = PGNet_PostProcess(self.character_dict_path, self.valid_set,
- self.score_thresh, outs_dict, shape_list)
+ post = PGNet_PostProcess(
+ self.character_dict_path,
+ self.valid_set,
+ self.score_thresh,
+ outs_dict,
+ shape_list,
+ point_gather_mode=self.point_gather_mode)
if self.mode == 'fast':
data = post.pg_postprocess_fast()
else:
diff --git a/ppocr/utils/e2e_metric/Deteval.py b/ppocr/utils/e2e_metric/Deteval.py
index 45567a7dd2d82b6c583abd4a4eabef52974be081..6ce56eda2aa9f38fdc712d49ae64945c558b418d 100755
--- a/ppocr/utils/e2e_metric/Deteval.py
+++ b/ppocr/utils/e2e_metric/Deteval.py
@@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import json
import numpy as np
import scipy.io as io
+import Polygon as plg
from ppocr.utils.e2e_metric.polygon_fast import iod, area_of_intersection, area
@@ -269,7 +271,124 @@ def get_socre_B(gt_dir, img_id, pred_dict):
return single_data
-def combine_results(all_data):
+def get_score_C(gt_label, text, pred_bboxes):
+ """
+ get score for CentripetalText (CT) prediction.
+ """
+
+ def gt_reading_mod(gt_label, text):
+ """This helper reads groundtruths from mat files"""
+ groundtruths = []
+ nbox = len(gt_label)
+ for i in range(nbox):
+ label = {"transcription": text[i][0], "points": gt_label[i].numpy()}
+ groundtruths.append(label)
+
+ return groundtruths
+
+ def get_union(pD, pG):
+ areaA = pD.area()
+ areaB = pG.area()
+ return areaA + areaB - get_intersection(pD, pG)
+
+ def get_intersection(pD, pG):
+ pInt = pD & pG
+ if len(pInt) == 0:
+ return 0
+ return pInt.area()
+
+ def detection_filtering(detections, groundtruths, threshold=0.5):
+ for gt in groundtruths:
+ point_num = gt['points'].shape[1] // 2
+ if gt['transcription'] == '###' and (point_num > 1):
+ gt_p = np.array(gt['points']).reshape(point_num,
+ 2).astype('int32')
+ gt_p = plg.Polygon(gt_p)
+
+ for det_id, detection in enumerate(detections):
+ det_y = detection[0::2]
+ det_x = detection[1::2]
+
+ det_p = np.concatenate((np.array(det_x), np.array(det_y)))
+ det_p = det_p.reshape(2, -1).transpose()
+ det_p = plg.Polygon(det_p)
+
+ try:
+ det_gt_iou = get_intersection(det_p,
+ gt_p) / det_p.area()
+ except:
+ print(det_x, det_y, gt_p)
+ if det_gt_iou > threshold:
+ detections[det_id] = []
+
+ detections[:] = [item for item in detections if item != []]
+ return detections
+
+ def sigma_calculation(det_p, gt_p):
+ """
+ sigma = inter_area / gt_area
+ """
+ if gt_p.area() == 0.:
+ return 0
+ return get_intersection(det_p, gt_p) / gt_p.area()
+
+ def tau_calculation(det_p, gt_p):
+ """
+ tau = inter_area / det_area
+ """
+ if det_p.area() == 0.:
+ return 0
+ return get_intersection(det_p, gt_p) / det_p.area()
+
+ detections = []
+
+ for item in pred_bboxes:
+ detections.append(item[:, ::-1].reshape(-1))
+
+ groundtruths = gt_reading_mod(gt_label, text)
+
+ detections = detection_filtering(
+ detections, groundtruths) # filters detections overlapping with DC area
+
+ for idx in range(len(groundtruths) - 1, -1, -1):
+ #NOTE: source code use 'orin' to indicate '#', here we use 'anno',
+ # which may cause slight drop in fscore, about 0.12
+ if groundtruths[idx]['transcription'] == '###':
+ groundtruths.pop(idx)
+
+ local_sigma_table = np.zeros((len(groundtruths), len(detections)))
+ local_tau_table = np.zeros((len(groundtruths), len(detections)))
+
+ for gt_id, gt in enumerate(groundtruths):
+ if len(detections) > 0:
+ for det_id, detection in enumerate(detections):
+ point_num = gt['points'].shape[1] // 2
+
+ gt_p = np.array(gt['points']).reshape(point_num,
+ 2).astype('int32')
+ gt_p = plg.Polygon(gt_p)
+
+ det_y = detection[0::2]
+ det_x = detection[1::2]
+
+ det_p = np.concatenate((np.array(det_x), np.array(det_y)))
+
+ det_p = det_p.reshape(2, -1).transpose()
+ det_p = plg.Polygon(det_p)
+
+ local_sigma_table[gt_id, det_id] = sigma_calculation(det_p,
+ gt_p)
+ local_tau_table[gt_id, det_id] = tau_calculation(det_p, gt_p)
+
+ data = {}
+ data['sigma'] = local_sigma_table
+ data['global_tau'] = local_tau_table
+ data['global_pred_str'] = ''
+ data['global_gt_str'] = ''
+ return data
+
+
+def combine_results(all_data, rec_flag=True):
tr = 0.7
tp = 0.6
fsc_k = 0.8
@@ -278,6 +397,7 @@ def combine_results(all_data):
global_tau = []
global_pred_str = []
global_gt_str = []
+
for data in all_data:
global_sigma.append(data['sigma'])
global_tau.append(data['global_tau'])
@@ -294,7 +414,7 @@ def combine_results(all_data):
def one_to_one(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision,
- gt_flag, det_flag, idy):
+ gt_flag, det_flag, idy, rec_flag):
hit_str_num = 0
for gt_id in range(num_gt):
gt_matching_qualified_sigma_candidates = np.where(
@@ -328,14 +448,15 @@ def combine_results(all_data):
gt_flag[0, gt_id] = 1
matched_det_id = np.where(local_sigma_table[gt_id, :] > tr)
# recg start
- gt_str_cur = global_gt_str[idy][gt_id]
- pred_str_cur = global_pred_str[idy][matched_det_id[0].tolist()[
- 0]]
- if pred_str_cur == gt_str_cur:
- hit_str_num += 1
- else:
- if pred_str_cur.lower() == gt_str_cur.lower():
+ if rec_flag:
+ gt_str_cur = global_gt_str[idy][gt_id]
+ pred_str_cur = global_pred_str[idy][matched_det_id[0]
+ .tolist()[0]]
+ if pred_str_cur == gt_str_cur:
hit_str_num += 1
+ else:
+ if pred_str_cur.lower() == gt_str_cur.lower():
+ hit_str_num += 1
# recg end
det_flag[0, matched_det_id] = 1
return local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, gt_flag, det_flag, hit_str_num
@@ -343,7 +464,7 @@ def combine_results(all_data):
def one_to_many(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision,
- gt_flag, det_flag, idy):
+ gt_flag, det_flag, idy, rec_flag):
hit_str_num = 0
for gt_id in range(num_gt):
# skip the following if the groundtruth was matched
@@ -374,28 +495,30 @@ def combine_results(all_data):
gt_flag[0, gt_id] = 1
det_flag[0, qualified_tau_candidates] = 1
# recg start
- gt_str_cur = global_gt_str[idy][gt_id]
- pred_str_cur = global_pred_str[idy][
- qualified_tau_candidates[0].tolist()[0]]
- if pred_str_cur == gt_str_cur:
- hit_str_num += 1
- else:
- if pred_str_cur.lower() == gt_str_cur.lower():
+ if rec_flag:
+ gt_str_cur = global_gt_str[idy][gt_id]
+ pred_str_cur = global_pred_str[idy][
+ qualified_tau_candidates[0].tolist()[0]]
+ if pred_str_cur == gt_str_cur:
hit_str_num += 1
+ else:
+ if pred_str_cur.lower() == gt_str_cur.lower():
+ hit_str_num += 1
# recg end
elif (np.sum(local_sigma_table[gt_id, qualified_tau_candidates])
>= tr):
gt_flag[0, gt_id] = 1
det_flag[0, qualified_tau_candidates] = 1
# recg start
- gt_str_cur = global_gt_str[idy][gt_id]
- pred_str_cur = global_pred_str[idy][
- qualified_tau_candidates[0].tolist()[0]]
- if pred_str_cur == gt_str_cur:
- hit_str_num += 1
- else:
- if pred_str_cur.lower() == gt_str_cur.lower():
+ if rec_flag:
+ gt_str_cur = global_gt_str[idy][gt_id]
+ pred_str_cur = global_pred_str[idy][
+ qualified_tau_candidates[0].tolist()[0]]
+ if pred_str_cur == gt_str_cur:
hit_str_num += 1
+ else:
+ if pred_str_cur.lower() == gt_str_cur.lower():
+ hit_str_num += 1
# recg end
global_accumulative_recall = global_accumulative_recall + fsc_k
@@ -409,7 +532,7 @@ def combine_results(all_data):
def many_to_one(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision,
- gt_flag, det_flag, idy):
+ gt_flag, det_flag, idy, rec_flag):
hit_str_num = 0
for det_id in range(num_det):
# skip the following if the detection was matched
@@ -440,6 +563,30 @@ def combine_results(all_data):
gt_flag[0, qualified_sigma_candidates] = 1
det_flag[0, det_id] = 1
# recg start
+ if rec_flag:
+ pred_str_cur = global_pred_str[idy][det_id]
+ gt_len = len(qualified_sigma_candidates[0])
+ for idx in range(gt_len):
+ ele_gt_id = qualified_sigma_candidates[
+ 0].tolist()[idx]
+ if ele_gt_id not in global_gt_str[idy]:
+ continue
+ gt_str_cur = global_gt_str[idy][ele_gt_id]
+ if pred_str_cur == gt_str_cur:
+ hit_str_num += 1
+ break
+ else:
+ if pred_str_cur.lower() == gt_str_cur.lower(
+ ):
+ hit_str_num += 1
+ break
+ # recg end
+ elif (np.sum(local_tau_table[qualified_sigma_candidates,
+ det_id]) >= tp):
+ det_flag[0, det_id] = 1
+ gt_flag[0, qualified_sigma_candidates] = 1
+ # recg start
+ if rec_flag:
pred_str_cur = global_pred_str[idy][det_id]
gt_len = len(qualified_sigma_candidates[0])
for idx in range(gt_len):
@@ -454,27 +601,7 @@ def combine_results(all_data):
else:
if pred_str_cur.lower() == gt_str_cur.lower():
hit_str_num += 1
- break
- # recg end
- elif (np.sum(local_tau_table[qualified_sigma_candidates,
- det_id]) >= tp):
- det_flag[0, det_id] = 1
- gt_flag[0, qualified_sigma_candidates] = 1
- # recg start
- pred_str_cur = global_pred_str[idy][det_id]
- gt_len = len(qualified_sigma_candidates[0])
- for idx in range(gt_len):
- ele_gt_id = qualified_sigma_candidates[0].tolist()[idx]
- if ele_gt_id not in global_gt_str[idy]:
- continue
- gt_str_cur = global_gt_str[idy][ele_gt_id]
- if pred_str_cur == gt_str_cur:
- hit_str_num += 1
- break
- else:
- if pred_str_cur.lower() == gt_str_cur.lower():
- hit_str_num += 1
- break
+ break
# recg end
global_accumulative_recall = global_accumulative_recall + num_qualified_sigma_candidates * fsc_k
@@ -504,7 +631,7 @@ def combine_results(all_data):
gt_flag, det_flag, hit_str_num = one_to_one(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision,
- gt_flag, det_flag, idx)
+ gt_flag, det_flag, idx, rec_flag)
hit_str_count += hit_str_num
#######then check for one-to-many case##########
@@ -512,14 +639,14 @@ def combine_results(all_data):
gt_flag, det_flag, hit_str_num = one_to_many(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision,
- gt_flag, det_flag, idx)
+ gt_flag, det_flag, idx, rec_flag)
hit_str_count += hit_str_num
#######then check for many-to-one case##########
local_accumulative_recall, local_accumulative_precision, global_accumulative_recall, global_accumulative_precision, \
gt_flag, det_flag, hit_str_num = many_to_one(local_sigma_table, local_tau_table,
local_accumulative_recall, local_accumulative_precision,
global_accumulative_recall, global_accumulative_precision,
- gt_flag, det_flag, idx)
+ gt_flag, det_flag, idx, rec_flag)
hit_str_count += hit_str_num
try:
diff --git a/ppocr/utils/e2e_utils/extract_textpoint_fast.py b/ppocr/utils/e2e_utils/extract_textpoint_fast.py
index 787cd3017fafa6fc554bead0cc05b5bfe682df42..a85b8e78ead00e64630b57400b9e5141eb0181a8 100644
--- a/ppocr/utils/e2e_utils/extract_textpoint_fast.py
+++ b/ppocr/utils/e2e_utils/extract_textpoint_fast.py
@@ -88,8 +88,35 @@ def ctc_greedy_decoder(probs_seq, blank=95, keep_blank_in_idxs=True):
return dst_str, keep_idx_list
-def instance_ctc_greedy_decoder(gather_info, logits_map, pts_num=4):
+def instance_ctc_greedy_decoder(gather_info,
+ logits_map,
+ pts_num=4,
+ point_gather_mode=None):
_, _, C = logits_map.shape
+ if point_gather_mode == 'align':
+ insert_num = 0
+ gather_info = np.array(gather_info)
+ length = len(gather_info) - 1
+ for index in range(length):
+ stride_y = np.abs(gather_info[index + insert_num][0] - gather_info[
+ index + 1 + insert_num][0])
+ stride_x = np.abs(gather_info[index + insert_num][1] - gather_info[
+ index + 1 + insert_num][1])
+ max_points = int(max(stride_x, stride_y))
+ stride = (gather_info[index + insert_num] -
+ gather_info[index + 1 + insert_num]) / (max_points)
+ insert_num_temp = max_points - 1
+
+ for i in range(int(insert_num_temp)):
+ insert_value = gather_info[index + insert_num] - (i + 1
+ ) * stride
+ insert_index = index + i + 1 + insert_num
+ gather_info = np.insert(
+ gather_info, insert_index, insert_value, axis=0)
+ insert_num += insert_num_temp
+ gather_info = gather_info.tolist()
+ else:
+ pass
ys, xs = zip(*gather_info)
logits_seq = logits_map[list(ys), list(xs)]
probs_seq = logits_seq
@@ -104,7 +131,8 @@ def instance_ctc_greedy_decoder(gather_info, logits_map, pts_num=4):
def ctc_decoder_for_image(gather_info_list,
logits_map,
Lexicon_Table,
- pts_num=6):
+ pts_num=6,
+ point_gather_mode=None):
"""
CTC decoder using multiple processes.
"""
@@ -114,7 +142,10 @@ def ctc_decoder_for_image(gather_info_list,
if len(gather_info) < pts_num:
continue
dst_str, xys_list = instance_ctc_greedy_decoder(
- gather_info, logits_map, pts_num=pts_num)
+ gather_info,
+ logits_map,
+ pts_num=pts_num,
+ point_gather_mode=point_gather_mode)
dst_str_readable = ''.join([Lexicon_Table[idx] for idx in dst_str])
if len(dst_str_readable) < 2:
continue
@@ -356,7 +387,8 @@ def generate_pivot_list_fast(p_score,
p_char_maps,
f_direction,
Lexicon_Table,
- score_thresh=0.5):
+ score_thresh=0.5,
+ point_gather_mode=None):
"""
return center point and end point of TCL instance; filter with the char maps;
"""
@@ -384,7 +416,10 @@ def generate_pivot_list_fast(p_score,
p_char_maps = p_char_maps.transpose([1, 2, 0])
decoded_str, keep_yxs_list = ctc_decoder_for_image(
- all_pos_yxs, logits_map=p_char_maps, Lexicon_Table=Lexicon_Table)
+ all_pos_yxs,
+ logits_map=p_char_maps,
+ Lexicon_Table=Lexicon_Table,
+ point_gather_mode=point_gather_mode)
return keep_yxs_list, decoded_str
diff --git a/ppocr/utils/e2e_utils/pgnet_pp_utils.py b/ppocr/utils/e2e_utils/pgnet_pp_utils.py
index a15503c0a88f735cc5f5eef924b0d022e5684eed..06a766b0e714e2792c0b0d3069963de998eb9eb7 100644
--- a/ppocr/utils/e2e_utils/pgnet_pp_utils.py
+++ b/ppocr/utils/e2e_utils/pgnet_pp_utils.py
@@ -28,13 +28,19 @@ from extract_textpoint_fast import generate_pivot_list_fast, restore_poly
class PGNet_PostProcess(object):
# two different post-process
- def __init__(self, character_dict_path, valid_set, score_thresh, outs_dict,
- shape_list):
+ def __init__(self,
+ character_dict_path,
+ valid_set,
+ score_thresh,
+ outs_dict,
+ shape_list,
+ point_gather_mode=None):
self.Lexicon_Table = get_dict(character_dict_path)
self.valid_set = valid_set
self.score_thresh = score_thresh
self.outs_dict = outs_dict
self.shape_list = shape_list
+ self.point_gather_mode = point_gather_mode
def pg_postprocess_fast(self):
p_score = self.outs_dict['f_score']
@@ -58,7 +64,8 @@ class PGNet_PostProcess(object):
p_char,
p_direction,
self.Lexicon_Table,
- score_thresh=self.score_thresh)
+ score_thresh=self.score_thresh,
+ point_gather_mode=self.point_gather_mode)
poly_list, keep_str_list = restore_poly(instance_yxs_list, seq_strs,
p_border, ratio_w, ratio_h,
src_w, src_h, self.valid_set)
diff --git a/ppstructure/kie/README.md b/ppstructure/kie/README.md
index 562ebb9e25b09015150d3265a7b9a6c8c74e7aae..b3b4d47d86d0cf2871ff96951afa0007306a572b 100644
--- a/ppstructure/kie/README.md
+++ b/ppstructure/kie/README.md
@@ -172,16 +172,16 @@ If you want to use OCR engine to obtain end-to-end prediction results, you can u
# just predict using SER trained model
python3 tools/infer_kie_token_ser.py \
-c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
- -o Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
+ -o Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./ppstructure/docs/kie/input/zh_val_42.jpg
# predict using SER and RE trained model at the same time
python3 ./tools/infer_kie_token_ser_re.py \
-c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \
- -o Architecture.Backbone.checkpoints=./pretrain_models/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
+ -o Architecture.Backbone.checkpoints=./pretrained_model/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/image/zh_val_42.jpg \
-c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
- -o_ser Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
+ -o_ser Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
```
The visual result images and the predicted text file will be saved in the `Global.save_res_path` directory.
@@ -193,18 +193,18 @@ If you want to load the text detection and recognition results collected before,
# just predict using SER trained model
python3 tools/infer_kie_token_ser.py \
-c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
- -o Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
+ -o Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/val.json \
Global.infer_mode=False
# predict using SER and RE trained model at the same time
python3 ./tools/infer_kie_token_ser_re.py \
-c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \
- -o Architecture.Backbone.checkpoints=./pretrain_models/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
+ -o Architecture.Backbone.checkpoints=./pretrained_model/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/val.json \
Global.infer_mode=False \
-c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
- -o_ser Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
+ -o_ser Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
```
#### 4.2.3 Inference using PaddleInference
diff --git a/ppstructure/kie/README_ch.md b/ppstructure/kie/README_ch.md
index 56c99ab73abe2b33ccfa18d4181312cd5f4d3622..cc8c60009f4cb83d349c45573a9fa03832665374 100644
--- a/ppstructure/kie/README_ch.md
+++ b/ppstructure/kie/README_ch.md
@@ -156,16 +156,16 @@ wget https://paddleocr.bj.bcebos.com/ppstructure/models/vi_layoutxlm/re_vi_layou
# 仅预测SER模型
python3 tools/infer_kie_token_ser.py \
-c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
- -o Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
+ -o Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./ppstructure/docs/kie/input/zh_val_42.jpg
# SER + RE模型串联
python3 ./tools/infer_kie_token_ser_re.py \
-c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \
- -o Architecture.Backbone.checkpoints=./pretrain_models/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
+ -o Architecture.Backbone.checkpoints=./pretrained_model/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/image/zh_val_42.jpg \
-c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
- -o_ser Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
+ -o_ser Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
```
`Global.save_res_path`目录中会保存可视化的结果图像以及预测的文本文件。
@@ -177,18 +177,18 @@ python3 ./tools/infer_kie_token_ser_re.py \
# 仅预测SER模型
python3 tools/infer_kie_token_ser.py \
-c configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
- -o Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
+ -o Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/val.json \
Global.infer_mode=False
# SER + RE模型串联
python3 ./tools/infer_kie_token_ser_re.py \
-c configs/kie/vi_layoutxlm/re_vi_layoutxlm_xfund_zh.yml \
- -o Architecture.Backbone.checkpoints=./pretrain_models/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
+ -o Architecture.Backbone.checkpoints=./pretrained_model/re_vi_layoutxlm_xfund_pretrained/best_accuracy \
Global.infer_img=./train_data/XFUND/zh_val/val.json \
Global.infer_mode=False \
-c_ser configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh.yml \
- -o_ser Architecture.Backbone.checkpoints=./pretrain_models/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
+ -o_ser Architecture.Backbone.checkpoints=./pretrained_model/ser_vi_layoutxlm_xfund_pretrained/best_accuracy
```
#### 4.2.3 基于PaddleInference的预测
diff --git a/ppstructure/pdf2word/pdf2word.md b/ppstructure/pdf2word/README.md
similarity index 100%
rename from ppstructure/pdf2word/pdf2word.md
rename to ppstructure/pdf2word/README.md
diff --git a/ppstructure/table/predict_structure.py b/ppstructure/table/predict_structure.py
index 45cbba3e298004d3711b05e6fb7cffecae637601..0bf100852b9e9d501dfc858d8ce0787da42a61ed 100755
--- a/ppstructure/table/predict_structure.py
+++ b/ppstructure/table/predict_structure.py
@@ -68,6 +68,7 @@ def build_pre_process_list(args):
class TableStructurer(object):
def __init__(self, args):
+ self.use_onnx = args.use_onnx
pre_process_list = build_pre_process_list(args)
if args.table_algorithm not in ['TableMaster']:
postprocess_params = {
@@ -98,13 +99,17 @@ class TableStructurer(object):
return None, 0
img = np.expand_dims(img, axis=0)
img = img.copy()
-
- self.input_tensor.copy_from_cpu(img)
- self.predictor.run()
- outputs = []
- for output_tensor in self.output_tensors:
- output = output_tensor.copy_to_cpu()
- outputs.append(output)
+ if self.use_onnx:
+ input_dict = {}
+ input_dict[self.input_tensor.name] = img
+ outputs = self.predictor.run(self.output_tensors, input_dict)
+ else:
+ self.input_tensor.copy_from_cpu(img)
+ self.predictor.run()
+ outputs = []
+ for output_tensor in self.output_tensors:
+ output = output_tensor.copy_to_cpu()
+ outputs.append(output)
preds = {}
preds['structure_probs'] = outputs[1]
diff --git a/requirements.txt b/requirements.txt
index 2ccd486f34ed4cb01312cf3417404f724d762baf..43cd8c1b082768ebad44a5cf58fc31980ebfe891 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,9 +7,11 @@ tqdm
numpy
visualdl
rapidfuzz
+opencv-python
opencv-contrib-python
cython
lxml
premailer
openpyxl
attrdict
+Polygon3
diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
index 1dcb0129e767e6c35adfad36aa5dce2fbd84a2fd..25fda8f97f0bfdefbd6922b13a0ffef3f40c3de9 100644
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
@@ -1,12 +1,6 @@
#!/bin/bash
source test_tipc/common_func.sh
-# set env
-python=python
-export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
-export frame_version=${str_tmp%%.post*}
-export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
-
# run benchmark sh
# Usage:
# bash run_benchmark_train.sh config.txt params
@@ -86,6 +80,13 @@ dataline=`cat $FILENAME`
IFS=$'\n'
lines=(${dataline})
model_name=$(func_parser_value "${lines[1]}")
+python_name=$(func_parser_value "${lines[2]}")
+
+# set env
+python=${python_name}
+export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
+export frame_version=${str_tmp%%.post*}
+export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
# 获取benchmark_params所在的行数
line_num=`grep -n "train_benchmark_params" $FILENAME | cut -d ":" -f 1`
diff --git a/test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt
index f3aa9d0f8218a24b11e3d0d079ae79a07d3e5874..4112e6498c6316e211ad69a69bdb531ec7a105b2 100644
--- a/test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
null:null
##
trainer:norm_train
-norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained Global.print_batch_step=1 Train.loader.shuffle=false
+norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained Global.print_batch_step=2 Train.loader.shuffle=false
pact_train:null
fpgm_train:null
distill_train:null
diff --git a/test_tipc/configs/det_r18_ct/train_infer_python.txt b/test_tipc/configs/det_r18_ct/train_infer_python.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5933fdbeed762a73324fbfb5a4113a390926e7ea
--- /dev/null
+++ b/test_tipc/configs/det_r18_ct/train_infer_python.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:det_r18_ct
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:null
+Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_lite_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/total_text/test/rgb/
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c configs/det/det_r18_vd_ct.yml -o Global.print_batch_step=1 Train.loader.shuffle=false
+quant_export:null
+fpgm_export:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c configs/det/det_r18_vd_ct.yml -o
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c configs/det/det_r18_vd_ct.yml -o
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:./inference/det_r18_vd_ct/best_accuracy
+infer_export:tools/export_model.py -c configs/det/det_r18_vd_ct.yml -o
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:fp32
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+--save_log_path:null
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
\ No newline at end of file
diff --git a/test_tipc/configs/en_table_structure/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt b/test_tipc/configs/en_table_structure/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ad002a334e3b351b0fa2aa641906f4aa753071c9
--- /dev/null
+++ b/test_tipc/configs/en_table_structure/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
@@ -0,0 +1,20 @@
+===========================cpp_infer_params===========================
+model_name:en_table_structure
+use_opencv:True
+infer_model:./inference/en_ppocr_mobile_v2.0_table_structure_infer/
+infer_quant:False
+inference:./deploy/cpp_infer/build/ppocr --rec_img_h=32 --det_model_dir=./inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=./inference/en_ppocr_mobile_v2.0_table_rec_infer --rec_char_dict_path=./ppocr/utils/dict/table_dict.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict.txt --limit_side_len=736 --limit_type=min --output=./output/table --merge_no_span_structure=False --type=structure --table=True
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:6
+--use_tensorrt:False
+--precision:fp32
+--table_model_dir:
+--image_dir:./ppstructure/docs/table/table.jpg
+null:null
+--benchmark:True
+--det:True
+--rec:True
+--cls:False
+--use_angle_cls:False
\ No newline at end of file
diff --git a/test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml b/test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d2be152f0bae7d87129904d87c56c6d777a1f338
--- /dev/null
+++ b/test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml
@@ -0,0 +1,122 @@
+Global:
+ use_gpu: True
+ epoch_num: &epoch_num 200
+ log_smooth_window: 10
+ print_batch_step: 10
+ save_model_dir: ./output/ser_layoutxlm_xfund_zh
+ save_epoch_step: 2000
+ # evaluation is run every 10 iterations after the 0th iteration
+ eval_batch_step: [ 0, 187 ]
+ cal_metric_during_train: False
+ save_inference_dir:
+ use_visualdl: False
+ seed: 2022
+ infer_img: ppstructure/docs/kie/input/zh_val_42.jpg
+ save_res_path: ./output/ser_layoutxlm_xfund_zh/res
+
+Architecture:
+ model_type: kie
+ algorithm: &algorithm "LayoutXLM"
+ Transform:
+ Backbone:
+ name: LayoutXLMForSer
+ pretrained: True
+ checkpoints:
+ num_classes: &num_classes 7
+
+Loss:
+ name: VQASerTokenLayoutLMLoss
+ num_classes: *num_classes
+ key: "backbone_out"
+
+Optimizer:
+ name: AdamW
+ beta1: 0.9
+ beta2: 0.999
+ lr:
+ name: Linear
+ learning_rate: 0.00005
+ epochs: *epoch_num
+ warmup_epoch: 2
+ regularizer:
+ name: L2
+ factor: 0.00000
+
+PostProcess:
+ name: VQASerTokenLayoutLMPostProcess
+ class_path: &class_path train_data/XFUND/class_list_xfun.txt
+
+Metric:
+ name: VQASerTokenMetric
+ main_indicator: hmean
+
+Train:
+ dataset:
+ name: SimpleDataSet
+ data_dir: train_data/XFUND/zh_train/image
+ label_file_list:
+ - train_data/XFUND/zh_train/train.json
+ ratio_list: [ 1.0 ]
+ transforms:
+ - DecodeImage: # load image
+ img_mode: RGB
+ channel_first: False
+ - VQATokenLabelEncode: # Class handling label
+ contains_re: False
+ algorithm: *algorithm
+ class_path: *class_path
+ - VQATokenPad:
+ max_seq_len: &max_seq_len 512
+ return_attention_mask: True
+ - VQASerTokenChunk:
+ max_seq_len: *max_seq_len
+ - Resize:
+ size: [224,224]
+ - NormalizeImage:
+ scale: 1
+ mean: [ 123.675, 116.28, 103.53 ]
+ std: [ 58.395, 57.12, 57.375 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - KeepKeys:
+ keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order
+ loader:
+ shuffle: True
+ drop_last: False
+ batch_size_per_card: 8
+ num_workers: 4
+
+Eval:
+ dataset:
+ name: SimpleDataSet
+ data_dir: train_data/XFUND/zh_val/image
+ label_file_list:
+ - train_data/XFUND/zh_val/val.json
+ transforms:
+ - DecodeImage: # load image
+ img_mode: RGB
+ channel_first: False
+ - VQATokenLabelEncode: # Class handling label
+ contains_re: False
+ algorithm: *algorithm
+ class_path: *class_path
+ - VQATokenPad:
+ max_seq_len: *max_seq_len
+ return_attention_mask: True
+ - VQASerTokenChunk:
+ max_seq_len: *max_seq_len
+ - Resize:
+ size: [224,224]
+ - NormalizeImage:
+ scale: 1
+ mean: [ 123.675, 116.28, 103.53 ]
+ std: [ 58.395, 57.12, 57.375 ]
+ order: 'hwc'
+ - ToCHWImage:
+ - KeepKeys:
+ keep_keys: [ 'input_ids', 'bbox', 'attention_mask', 'token_type_ids', 'image', 'labels'] # dataloader will return list in this order
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size_per_card: 8
+ num_workers: 4
diff --git a/test_tipc/configs/layoutxlm_ser/train_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_infer_python.txt
index 549a31e69e367237ec0396778162a5f91c8b7412..d07daa9a1429ec5cd1955ec64ded122a9d1a723d 100644
--- a/test_tipc/configs/layoutxlm_ser/train_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg
null:null
##
trainer:norm_train
-norm_train:tools/train.py -c configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml -o Global.print_batch_step=1 Global.eval_batch_step=[1000,1000] Train.loader.shuffle=false
+norm_train:tools/train.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Global.print_batch_step=1 Global.eval_batch_step=[1000,1000] Train.loader.shuffle=false
pact_train:null
fpgm_train:null
distill_train:null
@@ -27,7 +27,7 @@ null:null
===========================infer_params===========================
Global.save_inference_dir:./output/
Architecture.Backbone.checkpoints:
-norm_export:tools/export_model.py -c configs/kie/layoutlm_series/ser_layoutlm_xfund_zh.yml -o
+norm_export:tools/export_model.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
quant_export:
fpgm_export:
distill_export:null
diff --git a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fbf2a880269fba4596908def0980cb778a9281e3
--- /dev/null
+++ b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:layoutxlm_ser_PACT
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:fp32
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=17
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=4|whole_train_whole_infer=8
+Architecture.Backbone.checkpoints:pretrain_models/ser_LayoutXLM_xfun_zh
+train_model_name:latest
+train_infer_img_dir:ppstructure/docs/kie/input/zh_val_42.jpg
+null:null
+##
+trainer:pact_train
+norm_train:null
+pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Architecture.Backbone.checkpoints:
+norm_export:null
+quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o
+fpgm_export: null
+distill_export:null
+export1:null
+export2:null
+##
+infer_model:null
+infer_export:null
+infer_quant:False
+inference:ppstructure/kie/predict_kie_token_ser.py --kie_algorithm=LayoutXLM --ser_dict_path=train_data/XFUND/class_list_xfun.txt --output=output
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:fp32
+--ser_model_dir:
+--image_dir:./ppstructure/docs/kie/input/zh_val_42.jpg
+null:null
+--benchmark:False
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,224,224]}]
diff --git a/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt b/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt
new file mode 100644
index 0000000000000000000000000000000000000000..47e1e7026bd6bb113b05d70c2bfc7f90879bd485
--- /dev/null
+++ b/test_tipc/configs/layoutxlm_ser/train_ptq_infer_python.txt
@@ -0,0 +1,21 @@
+===========================train_params===========================
+model_name:layoutxlm_ser_KL
+python:python3.7
+Global.pretrained_model:
+Global.save_inference_dir:null
+infer_model:./inference/ser_LayoutXLM_xfun_zh_infer/
+infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/layoutxlm_ser/ser_layoutxlm_xfund_zh.yml -o Train.loader.batch_size_per_card=1 Eval.loader.batch_size_per_card=1
+infer_quant:True
+inference:ppstructure/kie/predict_kie_token_ser.py --kie_algorithm=LayoutXLM --ser_dict_path=./train_data/XFUND/class_list_xfun.txt
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:int8
+--ser_model_dir:
+--image_dir:./ppstructure/docs/kie/input/zh_val_42.jpg
+null:null
+--benchmark:False
+null:null
+null:null
diff --git a/test_tipc/configs/slanet/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt b/test_tipc/configs/slanet/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1b4226706b067f65361fd3e79bcbc52e1cf70ad0
--- /dev/null
+++ b/test_tipc/configs/slanet/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
@@ -0,0 +1,20 @@
+===========================cpp_infer_params===========================
+model_name:slanet
+use_opencv:True
+infer_model:./inference/ch_ppstructure_mobile_v2.0_SLANet_infer/
+infer_quant:False
+inference:./deploy/cpp_infer/build/ppocr --det_model_dir=./inference/ch_PP-OCRv3_det_infer --rec_model_dir=./inference/ch_PP-OCRv3_rec_infer --output=./output/table --type=structure --table=True --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=./ppocr/utils/dict/table_structure_dict_ch.txt
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:6
+--use_tensorrt:False
+--precision:fp32
+--table_model_dir:
+--image_dir:./ppstructure/docs/table/table.jpg
+null:null
+--benchmark:True
+--det:True
+--rec:True
+--cls:False
+--use_angle_cls:False
\ No newline at end of file
diff --git a/test_tipc/configs/table_master/train_infer_python.txt b/test_tipc/configs/table_master/train_infer_python.txt
index 56b8e636026939ae8cd700308690010e1300d8f6..c3a871731a36fb5434db111cfd68b6eab7ba3f99 100644
--- a/test_tipc/configs/table_master/train_infer_python.txt
+++ b/test_tipc/configs/table_master/train_infer_python.txt
@@ -37,8 +37,8 @@ export2:null
infer_model:null
infer_export:null
infer_quant:False
-inference:ppstructure/table/predict_structure.py --table_char_dict_path=./ppocr/utils/dict/table_master_structure_dict.txt --image_dir=./ppstructure/docs/table/table.jpg --output ./output/table --table_algorithm=TableMaster --table_max_len=480
---use_gpu:True|False
+inference:ppstructure/table/predict_structure.py --table_char_dict_path=./ppocr/utils/dict/table_master_structure_dict.txt --output ./output/table --table_algorithm=TableMaster --table_max_len=480
+--use_gpu:True
--enable_mkldnn:False
--cpu_threads:6
--rec_batch_num:1
diff --git a/test_tipc/docs/jeston_test_train_inference_python.md b/test_tipc/docs/jeston_test_train_inference_python.md
index b25175ed0071dd3728ae22c7588ca20535af0505..22fc21c1cb615fa3e9cb0eb12441db80968a23ed 100644
--- a/test_tipc/docs/jeston_test_train_inference_python.md
+++ b/test_tipc/docs/jeston_test_train_inference_python.md
@@ -24,12 +24,7 @@ Jetson端基础训练预测功能测试的主程序为`test_inference_inference.
```
- 安装autolog(规范化日志输出工具)
```
- git clone https://github.com/LDOUBLEV/AutoLog
- cd AutoLog
- pip install -r requirements.txt
- python setup.py bdist_wheel
- pip install ./dist/auto_log-1.0.0-py3-none-any.whl
- cd ../
+ pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
```
- 安装PaddleSlim (可选)
```
diff --git a/test_tipc/docs/mac_test_train_inference_python.md b/test_tipc/docs/mac_test_train_inference_python.md
index c37291a8fc9b239564adce8f556565f51f2a9475..759ea516430183a1b949ed5b69e24cceac8b6125 100644
--- a/test_tipc/docs/mac_test_train_inference_python.md
+++ b/test_tipc/docs/mac_test_train_inference_python.md
@@ -1,6 +1,6 @@
# Mac端基础训练预测功能测试
-Mac端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,可以测试基于Python的模型CPU训练,包括裁剪、量化、蒸馏训练,以及评估、CPU推理等基本功能。
+Mac端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,可以测试基于Python的模型CPU训练,包括裁剪、PACT在线量化、蒸馏训练,以及评估、CPU推理等基本功能。
注:Mac端测试用法同linux端测试方法类似,但是无需测试需要在GPU上运行的测试。
@@ -10,7 +10,7 @@ Mac端基础训练预测功能测试的主程序为`test_train_inference_python.
| 算法名称 | 模型名称 | 单机单卡(CPU) | 单机多卡 | 多机多卡 | 模型压缩(CPU) |
| :---- | :---- | :---- | :---- | :---- | :---- |
-| DB | ch_ppocr_mobile_v2.0_det| 正常训练 | - | - | 正常训练:FPGM裁剪、PACT量化 离线量化(无需训练) |
+| DB | ch_ppocr_mobile_v2.0_det| 正常训练 | - | - | 正常训练:FPGM裁剪、PACT量化 |
- 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型`和`量化模型`,这两类模型对应的预测功能汇总如下,
@@ -26,19 +26,14 @@ Mac端基础训练预测功能测试的主程序为`test_train_inference_python.
Mac端无GPU,环境准备只需要Python环境即可,安装PaddlePaddle等依赖参考下述文档。
### 2.1 安装依赖
-- 安装PaddlePaddle >= 2.0
+- 安装PaddlePaddle >= 2.3
- 安装PaddleOCR依赖
```
pip install -r ../requirements.txt
```
- 安装autolog(规范化日志输出工具)
```
- git clone https://github.com/LDOUBLEV/AutoLog
- cd AutoLog
- pip install -r requirements.txt
- python setup.py bdist_wheel
- pip install ./dist/auto_log-1.0.0-py3-none-any.whl
- cd ../
+ pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
```
- 安装PaddleSlim (可选)
```
@@ -49,53 +44,46 @@ Mac端无GPU,环境准备只需要Python环境即可,安装PaddlePaddle等
### 2.2 功能测试
-先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。
+先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`,model_name/lite_train_lite_infer/*.log`格式的日志文件。
-`test_train_inference_python.sh`包含5种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
+`test_train_inference_python.sh`包含基础链条的4种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
- 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度;
```shell
# 同linux端运行不同的是,Mac端测试使用新的配置文件mac_ppocr_det_mobile_params.txt,
# 配置文件中默认去掉了GPU和mkldnn相关的测试链条
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_lite_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_lite_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_lite_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_lite_infer'
```
- 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理;
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_whole_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_whole_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_whole_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_whole_infer'
```
- 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度;
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer'
# 用法1:
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer'
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer' '1'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer' '1'
```
- 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度;(Mac端不建议运行此模式)
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_train_whole_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_train_whole_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_train_whole_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_train_whole_infer'
```
-- 模式5:klquant_whole_infer,测试离线量化;
-```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_mac_cpu.txt 'klquant_whole_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_mac_cpu.txt 'klquant_whole_infer'
-```
-
运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如`lite_train_lite_infer`模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件:
```
-test_tipc/output/
+test_tipc/output/model_name/lite_train_lite_infer/
|- results_python.log # 运行指令状态的日志
|- norm_train_gpus_-1_autocast_null/ # CPU上正常训练的训练日志和模型保存文件夹
-|- pact_train_gpus_-1_autocast_null/ # CPU上量化训练的训练日志和模型保存文件夹
......
-|- python_infer_cpu_usemkldnn_False_threads_1_batchsize_1.log # CPU上关闭Mkldnn线程数设置为1,测试batch_size=1条件下的预测运行日志
+|- python_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log # CPU上关闭Mkldnn线程数设置为1,测试batch_size=1条件下的fp32精度预测运行日志
......
```
diff --git a/test_tipc/docs/test_inference_cpp.md b/test_tipc/docs/test_inference_cpp.md
index e662f4bacc0b69bd605a79dac0e36c99daac87d5..5d8aeda6c401b48892de1006c2a024447823defa 100644
--- a/test_tipc/docs/test_inference_cpp.md
+++ b/test_tipc/docs/test_inference_cpp.md
@@ -17,15 +17,15 @@ C++预测功能测试的主程序为`test_inference_cpp.sh`,可以测试基于
运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。
### 2.1 功能测试
-先运行`prepare.sh`准备数据和模型,然后运行`test_inference_cpp.sh`进行测试,最终在```test_tipc/output```目录下生成`cpp_infer_*.log`后缀的日志文件。
+先运行`prepare.sh`准备数据和模型,然后运行`test_inference_cpp.sh`进行测试,最终在```test_tipc/output/{model_name}/cpp_infer```目录下生成`cpp_infer_*.log`后缀的日志文件。
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt "cpp_infer"
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2_rec/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt "cpp_infer"
# 用法1:
-bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
+bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_PP-OCRv2_rec/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
-bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt '1'
+bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_PP-OCRv2_rec/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt '1'
```
运行预测指令后,在`test_tipc/output`文件夹下自动会保存运行日志,包括以下文件:
@@ -33,23 +33,21 @@ bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_ppocr_mobile_v2.0_det/
```shell
test_tipc/output/
|- results_cpp.log # 运行指令状态的日志
-|- cpp_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log # CPU上不开启Mkldnn,线程数设置为1,测试batch_size=1条件下的预测运行日志
-|- cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log # CPU上不开启Mkldnn,线程数设置为6,测试batch_size=1条件下的预测运行日志
-|- cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log # GPU上不开启TensorRT,测试batch_size=1的fp32精度预测日志
-|- cpp_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log # GPU上开启TensorRT,测试batch_size=1的fp16精度预测日志
+|- cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_6.log # CPU上不开启Mkldnn,线程数设置为6,测试batch_size=6条件下的预测运行日志
+|- cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_6.log # GPU上不开启TensorRT,测试batch_size=6的fp32精度预测日志
......
```
其中results_cpp.log中包含了每条指令的运行状态,如果运行成功会输出:
```
-Run successfully with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmar k=True > ./test_tipc/output/cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log 2>&1 !
-Run successfully with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=True --use_tensorrt=False --precision=fp32 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark =True > ./test_tipc/output/cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log 2>&1 !
+[33m Run successfully with command - ch_PP-OCRv2_rec - ./deploy/cpp_infer/build/ppocr --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_img_h=32 --use_gpu=True --use_tensorrt=False --precision=fp32 --rec_model_dir=./inference/ch_PP-OCRv2_rec_infer/ --rec_batch_num=6 --image_dir=./inference/rec_inference/ --benchmark=True --det=False --rec=True --cls=False --use_angle_cls=False > ./test_tipc/output/ch_PP-OCRv2_rec/cpp_infer/cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_6.log 2>&1 ! [0m
+[33m Run successfully with command - ch_PP-OCRv2_rec - ./deploy/cpp_infer/build/ppocr --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_img_h=32 --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --rec_model_dir=./inference/ch_PP-OCRv2_rec_infer/ --rec_batch_num=6 --image_dir=./inference/rec_inference/ --benchmark=True --det=False --rec=True --cls=False --use_angle_cls=False > ./test_tipc/output/ch_PP-OCRv2_rec/cpp_infer/cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_6.log 2>&1 ! [0m
......
```
如果运行失败,会输出:
```
-Run failed with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=True --use_tensorrt=True --precision=fp32 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/cpp_infer_gpu_usetrt_True_precision_fp32_batchsize_1.log 2>&1 !
-Run failed with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=True --use_tensorrt=True --precision=fp16 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/cpp_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log 2>&1 !
+Run failed with command - ch_PP-OCRv2_rec - ./deploy/cpp_infer/build/ppocr --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_img_h=32 --use_gpu=True --use_tensorrt=False --precision=fp32 --rec_model_dir=./inference/ch_PP-OCRv2_rec_infer/ --rec_batch_num=6 --image_dir=./inference/rec_inference/ --benchmark=True --det=False --rec=True --cls=False --use_angle_cls=False > ./test_tipc/output/ch_PP-OCRv2_rec/cpp_infer/cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_6.log 2>&1 !
+Run failed with command - ch_PP-OCRv2_rec - ./deploy/cpp_infer/build/ppocr --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_img_h=32 --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --rec_model_dir=./inference/ch_PP-OCRv2_rec_infer/ --rec_batch_num=6 --image_dir=./inference/rec_inference/ --benchmark=True --det=False --rec=True --cls=False --use_angle_cls=False > ./test_tipc/output/ch_PP-OCRv2_rec/cpp_infer/cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_6.log 2>&1 !
......
```
可以很方便的根据results_cpp.log中的内容判定哪一个指令运行错误。
diff --git a/test_tipc/docs/test_paddle2onnx.md b/test_tipc/docs/test_paddle2onnx.md
index df2734771e9252a40811c42ead03abbff1b7a1a3..299621d01122995434646351edfd524a0aa3206a 100644
--- a/test_tipc/docs/test_paddle2onnx.md
+++ b/test_tipc/docs/test_paddle2onnx.md
@@ -15,29 +15,30 @@ PaddleServing预测功能测试的主程序为`test_paddle2onnx.sh`,可以测
## 2. 测试流程
### 2.1 功能测试
-先运行`prepare.sh`准备数据和模型,然后运行`test_paddle2onnx.sh`进行测试,最终在```test_tipc/output```目录下生成`paddle2onnx_infer_*.log`后缀的日志文件。
+先运行`prepare.sh`准备数据和模型,然后运行`test_paddle2onnx.sh`进行测试,最终在```test_tipc/output/{model_name}/paddle2onnx```目录下生成`paddle2onnx_infer_*.log`后缀的日志文件。
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt "paddle2onnx_infer"
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt "paddle2onnx_infer"
# 用法:
-bash test_tipc/test_paddle2onnx.sh ./test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt
+bash test_tipc/test_paddle2onnx.sh ./test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt
```
#### 运行结果
-各测试的运行情况会打印在 `test_tipc/output/results_paddle2onnx.log` 中:
+各测试的运行情况会打印在 `test_tipc/output/{model_name}/paddle2onnx/results_paddle2onnx.log` 中:
运行成功时会输出:
```
-Run successfully with command - paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --model_filename=inference.pdmodel --params_filename=inference.pdiparams --save_file=./inference/det_mobile_onnx/model.onnx --opset_version=10 --enable_onnx_checker=True!
-Run successfully with command - python test_tipc/onnx_inference/predict_det.py --use_gpu=False --image_dir=./inference/ch_det_data_50/all-sum-510/ --det_model_dir=./inference/det_mobile_onnx/model.onnx 2>&1 !
+Run successfully with command - ch_PP-OCRv2_det - paddle2onnx --model_dir=./inference/ch_PP-OCRv2_det_infer/ --model_filename=inference.pdmodel --params_filename=inference.pdiparams --save_file=./inference/det_v2_onnx/model.onnx --opset_version=10 --enable_onnx_checker=True!
+Run successfully with command - ch_PP-OCRv2_det - python3.7 tools/infer/predict_det.py --use_gpu=True --image_dir=./inference/ch_det_data_50/all-sum-510/ --det_model_dir=./inference/det_v2_onnx/model.onnx --use_onnx=True > ./test_tipc/output/ch_PP-OCRv2_det/paddle2onnx/paddle2onnx_infer_gpu.log 2>&1 !
+Run successfully with command - ch_PP-OCRv2_det - python3.7 tools/infer/predict_det.py --use_gpu=False --image_dir=./inference/ch_det_data_50/all-sum-510/ --det_model_dir=./inference/det_v2_onnx/model.onnx --use_onnx=True > ./test_tipc/output/ch_PP-OCRv2_det/paddle2onnx/paddle2onnx_infer_cpu.log 2>&1 !
```
运行失败时会输出:
```
-Run failed with command - paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --model_filename=inference.pdmodel --params_filename=inference.pdiparams --save_file=./inference/det_mobile_onnx/model.onnx --opset_version=10 --enable_onnx_checker=True!
+Run failed with command - ch_PP-OCRv2_det - paddle2onnx --model_dir=./inference/ch_PP-OCRv2_det_infer/ --model_filename=inference.pdmodel --params_filename=inference.pdiparams --save_file=./inference/det_v2_onnx/model.onnx --opset_version=10 --enable_onnx_checker=True!
...
```
diff --git a/test_tipc/docs/test_ptq_inference_python.md b/test_tipc/docs/test_ptq_inference_python.md
new file mode 100644
index 0000000000000000000000000000000000000000..7887c0b5c93decac61f56d8c8b92018f40c78b32
--- /dev/null
+++ b/test_tipc/docs/test_ptq_inference_python.md
@@ -0,0 +1,51 @@
+# Linux GPU/CPU KL离线量化训练推理测试
+
+Linux GPU/CPU KL离线量化训练推理测试的主程序为`test_ptq_inference_python.sh`,可以测试基于Python的模型训练、评估、推理等基本功能。
+
+## 1. 测试结论汇总
+- 训练相关:
+
+| 算法名称 | 模型名称 | 单机单卡 |
+| :----: | :----: | :----: |
+| | model_name | KL离线量化训练 |
+
+- 推理相关:
+
+| 算法名称 | 模型名称 | device_CPU | device_GPU | batchsize |
+| :----: | :----: | :----: | :----: | :----: |
+| | model_name | 支持 | 支持 | 1 |
+
+## 2. 测试流程
+
+### 2.1 准备数据和模型
+
+先运行`prepare.sh`准备数据和模型,然后运行`test_ptq_inference_python.sh`进行测试,最终在```test_tipc/output/{model_name}/whole_infer```目录下生成`python_infer_*.log`后缀的日志文件。
+
+```shell
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_ptq_infer_python.txt "whole_infer"
+
+# 用法:
+bash test_tipc/test_ptq_inference_python.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_ptq_infer_python.txt "whole_infer"
+```
+
+#### 运行结果
+
+各测试的运行情况会打印在 `test_tipc/output/{model_name}/paddle2onnx/results_paddle2onnx.log` 中:
+运行成功时会输出:
+
+```
+Run successfully with command - ch_PP-OCRv2_det_KL - python3.7 deploy/slim/quantization/quant_kl.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o Global.pretrained_model=./inference/ch_PP-OCRv2_det_infer/ Global.save_inference_dir=./inference/ch_PP-OCRv2_det_infer/_klquant > ./test_tipc/output/ch_PP-OCRv2_det_KL/whole_infer/whole_infer_export_0.log 2>&1 !
+Run successfully with command - ch_PP-OCRv2_det_KL - python3.7 tools/infer/predict_det.py --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --det_model_dir=./inference/ch_PP-OCRv2_det_infer/_klquant --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --precision=int8 > ./test_tipc/output/ch_PP-OCRv2_det_KL/whole_infer/python_infer_cpu_usemkldnn_False_threads_6_precision_int8_batchsize_1.log 2>&1 !
+Run successfully with command - ch_PP-OCRv2_det_KL - python3.7 tools/infer/predict_det.py --use_gpu=True --use_tensorrt=False --precision=int8 --det_model_dir=./inference/ch_PP-OCRv2_det_infer/_klquant --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ > ./test_tipc/output/ch_PP-OCRv2_det_KL/whole_infer/python_infer_gpu_usetrt_False_precision_int8_batchsize_1.log 2>&1 !
+```
+
+运行失败时会输出:
+
+```
+Run failed with command - ch_PP-OCRv2_det_KL - python3.7 deploy/slim/quantization/quant_kl.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o Global.pretrained_model=./inference/ch_PP-OCRv2_det_infer/ Global.save_inference_dir=./inference/ch_PP-OCRv2_det_infer/_klquant > ./test_tipc/output/ch_PP-OCRv2_det_KL/whole_infer/whole_infer_export_0.log 2>&1 !
+...
+```
+
+## 3. 更多教程
+
+本文档为功能测试用,更详细的量化使用教程请参考:[量化](../../deploy/slim/quantization/README.md)
diff --git a/test_tipc/docs/test_serving.md b/test_tipc/docs/test_serving.md
index 71f01c0d5ff47004d70baa17b404c10714a6fb64..ef38888784b600233fe85afe3c1064caf12173d4 100644
--- a/test_tipc/docs/test_serving.md
+++ b/test_tipc/docs/test_serving.md
@@ -18,71 +18,44 @@ PaddleServing预测功能测试的主程序为`test_serving_infer_python.sh`和`
### 2.1 功能测试
**python serving**
-先运行`prepare.sh`准备数据和模型,然后运行`test_serving_infer_python.sh`进行测试,最终在```test_tipc/output```目录下生成`serving_infer_python*.log`后缀的日志文件。
+先运行`prepare.sh`准备数据和模型,然后运行`test_serving_infer_python.sh`进行测试,最终在```test_tipc/output/{model_name}/serving_infer/python```目录下生成`python_*.log`后缀的日志文件。
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt "serving_infer"
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt "serving_infer"
# 用法:
-bash test_tipc/test_serving_infer_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt "serving_infer"
+bash test_tipc/test_serving_infer_python.sh ./test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt "serving_infer"
```
**cpp serving**
-先运行`prepare.sh`准备数据和模型,然后运行`test_serving_infer_cpp.sh`进行测试,最终在```test_tipc/output```目录下生成`serving_infer_cpp*.log`后缀的日志文件。
+先运行`prepare.sh`准备数据和模型,然后运行`test_serving_infer_cpp.sh`进行测试,最终在```test_tipc/output/{model_name}/serving_infer/cpp```目录下生成`cpp_*.log`后缀的日志文件。
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt "serving_infer"
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt "serving_infer"
# 用法:
-bash test_tipc/test_serving_infer_cpp.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt "serving_infer"
+bash test_tipc/test_serving_infer_cpp.sh ./test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt "serving_infer"
```
#### 运行结果
-各测试的运行情况会打印在 `test_tipc/output/results_serving.log` 中:
+各测试的运行情况会打印在 `test_tipc/output/{model_name}/serving_infer/python(cpp)/results_python(cpp)_serving.log` 中:
运行成功时会输出:
```
-Run successfully with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log 2>&1 !
-Run successfully with command - xxxxx
+Run successfully with command - ch_PP-OCRv2_rec - nohup python3.7 web_service_rec.py --config=config.yml --opt op.rec.concurrency="1" op.det.local_service_conf.devices= op.det.local_service_conf.use_mkldnn=False op.det.local_service_conf.thread_num=6 op.rec.local_service_conf.model_config=ppocr_rec_v2_serving > ./test_tipc/output/ch_PP-OCRv2_rec/serving_infer/python/python_server_cpu_usemkldnn_False_threads_6.log 2>&1 &!
+Run successfully with command - ch_PP-OCRv2_rec - python3.7 pipeline_http_client.py --det=False --image_dir=../../inference/rec_inference > ./test_tipc/output/ch_PP-OCRv2_rec/serving_infer/python/python_client_cpu_pipeline_http_usemkldnn_False_threads_6_batchsize_1.log 2>&1 !
...
```
运行失败时会输出:
```
-Run failed with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log 2>&1 !
-Run failed with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_6_batchsize_1.log 2>&1 !
-Run failed with command - xxxxx
+Run failed with command - ch_PP-OCRv2_rec - nohup python3.7 web_service_rec.py --config=config.yml --opt op.rec.concurrency="1" op.det.local_service_conf.devices= op.det.local_service_conf.use_mkldnn=False op.det.local_service_conf.thread_num=6 op.rec.local_service_conf.model_config=ppocr_rec_v2_serving > ./test_tipc/output/ch_PP-OCRv2_rec/serving_infer/python/python_server_cpu_usemkldnn_False_threads_6.log 2>&1 &!
+Run failed with command - ch_PP-OCRv2_rec - python3.7 pipeline_http_client.py --det=False --image_dir=../../inference/rec_inference > ./test_tipc/output/ch_PP-OCRv2_rec/serving_infer/python/python_client_cpu_pipeline_http_usemkldnn_False_threads_6_batchsize_1.log 2>&1 !
...
```
-详细的预测结果会存在 test_tipc/output/ 文件夹下,例如`server_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log`中会返回检测框的坐标:
-
-```
-{'err_no': 0, 'err_msg': '', 'key': ['dt_boxes'], 'value': ['[[[ 78. 642.]\n [409. 640.]\n [409. 657.]\n
-[ 78. 659.]]\n\n [[ 75. 614.]\n [211. 614.]\n [211. 635.]\n [ 75. 635.]]\n\n
-[[103. 554.]\n [135. 554.]\n [135. 575.]\n [103. 575.]]\n\n [[ 75. 531.]\n
-[347. 531.]\n [347. 549.]\n [ 75. 549.] ]\n\n [[ 76. 503.]\n [309. 498.]\n
-[309. 521.]\n [ 76. 526.]]\n\n [[163. 462.]\n [317. 462.]\n [317. 493.]\n
-[163. 493.]]\n\n [[324. 431.]\n [414. 431.]\n [414. 452.]\n [324. 452.]]\n\n
-[[ 76. 412.]\n [208. 408.]\n [209. 424.]\n [ 76. 428.]]\n\n [[307. 409.]\n
-[428. 409.]\n [428. 426.]\n [307 . 426.]]\n\n [[ 74. 385.]\n [217. 382.]\n
-[217. 400.]\n [ 74. 403.]]\n\n [[308. 381.]\n [427. 380.]\n [427. 400.]\n
-[308. 401.]]\n\n [[ 74. 363.]\n [195. 362.]\n [195. 378.]\n [ 74. 379.]]\n\n
-[[303. 359.]\n [423. 357.]\n [423. 375.]\n [303. 377.]]\n\n [[ 70. 336.]\n
-[239. 334.]\n [239. 354.]\ n [ 70. 356.]]\n\n [[ 70. 312.]\n [204. 310.]\n
-[204. 327.]\n [ 70. 330.]]\n\n [[303. 308.]\n [419. 306.]\n [419. 326.]\n
-[303. 328.]]\n\n [[113. 2 72.]\n [246. 270.]\n [247. 299.]\n [113. 301.]]\n\n
- [[361. 269.]\n [384. 269.]\n [384. 296.]\n [361. 296.]]\n\n [[ 70. 250.]\n
- [243. 246.]\n [243. 265.]\n [ 70. 269.]]\n\n [[ 65. 221.]\n [187. 220.]\n
-[187. 240.]\n [ 65. 241.]]\n\n [[337. 216.]\n [382. 216.]\n [382. 240.]\n
-[337. 240.]]\n\n [ [ 65. 196.]\n [247. 193.]\n [247. 213.]\n [ 65. 216.]]\n\n
-[[296. 197.]\n [423. 191.]\n [424. 209.]\n [296. 215.]]\n\n [[ 65. 167.]\n [244. 167.]\n
-[244. 186.]\n [ 65. 186.]]\n\n [[ 67. 139.]\n [290. 139.]\n [290. 159.]\n [ 67. 159.]]\n\n
-[[ 68. 113.]\n [410. 113.]\n [410. 128.]\n [ 68. 129.] ]\n\n [[277. 87.]\n [416. 87.]\n
-[416. 108.]\n [277. 108.]]\n\n [[ 79. 28.]\n [132. 28.]\n [132. 62.]\n [ 79. 62.]]\n\n
-[[163. 17.]\n [410. 14.]\n [410. 50.]\n [163. 53.]]]']}
-```
+详细的预测结果会存在 test_tipc/output/{model_name}/serving_infer/python(cpp)/ 文件夹下
## 3. 更多教程
diff --git a/test_tipc/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md
index 99de9400797493f429f8176a9b6b374a76df4872..d1dbd8ee47a4dc7fb4c0bb3d26a920aab1c7ff72 100644
--- a/test_tipc/docs/test_train_inference_python.md
+++ b/test_tipc/docs/test_train_inference_python.md
@@ -1,6 +1,6 @@
# Linux端基础训练预测功能测试
-Linux端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,可以测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。
+Linux端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,可以测试基于Python的模型训练、评估、推理等基本功能,包括PACT在线量化。
- Mac端基础训练预测功能测试参考[链接](./mac_test_train_inference_python.md)
- Windows端基础训练预测功能测试参考[链接](./win_test_train_inference_python.md)
@@ -11,13 +11,14 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
| 算法名称 | 模型名称 | 单机单卡 | 单机多卡 | 多机多卡 | 模型压缩(单机多卡) |
| :---- | :---- | :---- | :---- | :---- | :---- |
-| DB | ch_ppocr_mobile_v2.0_det| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | 正常训练:FPGM裁剪、PACT量化 离线量化(无需训练) |
-| DB | ch_ppocr_server_v2.0_det| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | 正常训练:FPGM裁剪、PACT量化 离线量化(无需训练) |
-| CRNN | ch_ppocr_mobile_v2.0_rec| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | 正常训练:PACT量化 离线量化(无需训练) |
-| CRNN | ch_ppocr_server_v2.0_rec| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | 正常训练:PACT量化 离线量化(无需训练) |
-|PP-OCR| ch_ppocr_mobile_v2.0| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | - |
-|PP-OCR| ch_ppocr_server_v2.0| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | - |
+| DB | ch_ppocr_mobile_v2_0_det| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | 正常训练:FPGM裁剪、PACT量化 |
+| DB | ch_ppocr_server_v2_0_det| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | 正常训练:FPGM裁剪、PACT量化 |
+| CRNN | ch_ppocr_mobile_v2_0_rec| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | 正常训练:PACT量化 |
+| CRNN | ch_ppocr_server_v2_0_rec| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | 正常训练:PACT量化 |
+|PP-OCR| ch_ppocr_mobile_v2_0| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | - |
+|PP-OCR| ch_ppocr_server_v2_0| 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | - |
|PP-OCRv2| ch_PP-OCRv2 | 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | - |
+|PP-OCRv3| ch_PP-OCRv3 | 正常训练 混合精度 | 正常训练 混合精度 | 正常训练 混合精度 | - |
- 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型`和`量化模型`,这两类模型对应的预测功能汇总如下,
@@ -35,19 +36,14 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。
### 2.1 安装依赖
-- 安装PaddlePaddle >= 2.0
+- 安装PaddlePaddle >= 2.3
- 安装PaddleOCR依赖
```
pip3 install -r ../requirements.txt
```
- 安装autolog(规范化日志输出工具)
```
- git clone https://github.com/LDOUBLEV/AutoLog
- cd AutoLog
- pip3 install -r requirements.txt
- python3 setup.py bdist_wheel
- pip3 install ./dist/auto_log-1.0.0-py3-none-any.whl
- cd ../
+ pip3 install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
```
- 安装PaddleSlim (可选)
```
@@ -57,60 +53,57 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho
### 2.2 功能测试
-先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。
+#### 2.2.1 基础训练推理链条
+先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`,model_name/lite_train_lite_infer/*.log`格式的日志文件。
-`test_train_inference_python.sh`包含5种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
+`test_train_inference_python.sh`包含基础链条的4种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
- 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度;
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'lite_train_lite_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'lite_train_lite_infer'
```
- 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理;
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_whole_infer'
-bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_whole_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'lite_train_whole_infer'
+bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'lite_train_whole_infer'
```
- 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度;
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'whole_infer'
# 用法1:
-bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_infer'
+bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'whole_infer'
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_infer' '1'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'whole_infer' '1'
```
- 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度;
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_train_whole_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_train_whole_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'whole_train_whole_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_infer_python.txt 'whole_train_whole_infer'
```
-- 模式5:klquant_whole_infer,测试离线量化;
-```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt 'klquant_whole_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt 'klquant_whole_infer'
-```
-
运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件:
```
-test_tipc/output/
+test_tipc/output/model_name/lite_train_lite_infer/
|- results_python.log # 运行指令状态的日志
-|- norm_train_gpus_0_autocast_null/ # GPU 0号卡上正常训练的训练日志和模型保存文件夹
-|- pact_train_gpus_0_autocast_null/ # GPU 0号卡上量化训练的训练日志和模型保存文件夹
+|- norm_train_gpus_0_autocast_null/ # GPU 0号卡上正常单机单卡训练的训练日志和模型保存文件夹
+|- norm_train_gpus_0,1_autocast_null/ # GPU 0,1号卡上正常单机多卡训练的训练日志和模型保存文件夹
......
-|- python_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log # CPU上开启Mkldnn线程数设置为1,测试batch_size=1条件下的预测运行日志
-|- python_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log # GPU上开启TensorRT,测试batch_size=1的半精度预测日志
+|- python_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log # CPU上关闭Mkldnn线程数设置为6,测试batch_size=1条件下的fp32精度预测运行日志
+|- python_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log # GPU上关闭TensorRT,测试batch_size=1的fp32精度预测日志
......
```
其中`results_python.log`中包含了每条指令的运行状态,如果运行成功会输出:
```
-Run successfully with command - python3.7 tools/train.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained Global.use_gpu=True Global.save_model_dir=./tests/output/norm_train_gpus_0_autocast_null Global.epoch_num=1 Train.loader.batch_size_per_card=2 !
-Run successfully with command - python3.7 tools/export_model.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./tests/output/norm_train_gpus_0_autocast_null/latest Global.save_inference_dir=./tests/output/norm_train_gpus_0_autocast_null!
+[33m Run successfully with command - ch_ppocr_mobile_v2_0_det - python3.7 tools/train.py -c configs/det/ch_ppocr_v2_0/ch_det_mv3_db_v2_0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained Global.use_gpu=True Global.save_model_dir=./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null Global.epoch_num=100 Train.loader.batch_size_per_card=2 ! [0m
+[33m Run successfully with command - ch_ppocr_mobile_v2_0_det - python3.7 tools/export_model.py -c configs/det/ch_ppocr_v2_0/ch_det_mv3_db_v2_0.yml -o Global.checkpoints=./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null/latest Global.save_inference_dir=./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null > ./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null_nodes_1_export.log 2>&1 ! [0m
+[33m Run successfully with command - ch_ppocr_mobile_v2_0_det - python3.7 tools/infer/predict_det.py --use_gpu=True --use_tensorrt=False --precision=fp32 --det_model_dir=./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null --rec_batch_num=1 --image_dir=./train_data/icdar2015/text_localization/ch4_test_images/ --benchmark=True > ./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/python_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log 2>&1 ! [0m
+[33m Run successfully with command - ch_ppocr_mobile_v2_0_det - python3.7 tools/infer/predict_det.py --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --det_model_dir=./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/norm_train_gpus_0_autocast_null --rec_batch_num=1 --image_dir=./train_data/icdar2015/text_localization/ch4_test_images/ --benchmark=True --precision=fp32 > ./test_tipc/output/ch_ppocr_mobile_v2_0_det/lite_train_lite_infer/python_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log 2>&1 ! [0m
......
```
如果运行失败,会输出:
@@ -121,6 +114,22 @@ Run failed with command - python3.7 tools/export_model.py -c tests/configs/det_m
```
可以很方便的根据`results_python.log`中的内容判定哪一个指令运行错误。
+#### 2.2.2 PACT在线量化链条
+此外,`test_train_inference_python.sh`还包含PACT在线量化模式,命令如下:
+以ch_PP-OCRv2_det为例,如需测试其他模型更换配置即可。
+
+```shell
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_pact_infer_python.txt 'lite_train_lite_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_pact_infer_python.txt 'lite_train_lite_infer'
+```
+#### 2.2.3 混合精度训练链条
+此外,`test_train_inference_python.sh`还包含混合精度训练模式,命令如下:
+以ch_PP-OCRv2_det为例,如需测试其他模型更换配置即可。
+
+```shell
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt 'lite_train_lite_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_PP-OCRv2_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt 'lite_train_lite_infer'
+```
### 2.3 精度测试
diff --git a/test_tipc/docs/win_test_train_inference_python.md b/test_tipc/docs/win_test_train_inference_python.md
index 6e3ce93bb3123133075b9d65c64850a87de5f828..d631c38873867ef1fa6e9a03582df26b59e309a5 100644
--- a/test_tipc/docs/win_test_train_inference_python.md
+++ b/test_tipc/docs/win_test_train_inference_python.md
@@ -8,7 +8,7 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt
| 算法名称 | 模型名称 | 单机单卡 | 单机多卡 | 多机多卡 | 模型压缩(单机多卡) |
| :---- | :---- | :---- | :---- | :---- | :---- |
-| DB | ch_ppocr_mobile_v2.0_det| 正常训练 混合精度 | - | - | 正常训练:FPGM裁剪、PACT量化 离线量化(无需训练) |
+| DB | ch_ppocr_mobile_v2_0_det| 正常训练 混合精度 | - | - | 正常训练:FPGM裁剪、PACT量化 |
- 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型`和`量化模型`,这两类模型对应的预测功能汇总如下:
@@ -29,19 +29,14 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt
### 2.1 安装依赖
-- 安装PaddlePaddle >= 2.0
+- 安装PaddlePaddle >= 2.3
- 安装PaddleOCR依赖
```
pip install -r ../requirements.txt
```
- 安装autolog(规范化日志输出工具)
```
- git clone https://github.com/LDOUBLEV/AutoLog
- cd AutoLog
- pip install -r requirements.txt
- python setup.py bdist_wheel
- pip install ./dist/auto_log-1.0.0-py3-none-any.whl
- cd ../
+ pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
```
- 安装PaddleSlim (可选)
```
@@ -51,54 +46,46 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt
### 2.2 功能测试
-先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。
+先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`,model_name/lite_train_lite_infer/*.log`格式的日志文件。
-`test_train_inference_python.sh`包含5种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
+`test_train_inference_python.sh`包含基础链条的4种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
- 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度;
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_lite_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_lite_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_lite_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_lite_infer'
```
- 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理;
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_whole_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_whole_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_whole_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_whole_infer'
```
- 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度;
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer'
# 用法1:
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer'
# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer' '1'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer' '1'
```
- 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度;
```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_train_whole_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_train_whole_infer'
+bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_train_whole_infer'
+bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2_0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_train_whole_infer'
```
-- 模式5:klquant_whole_infer,测试离线量化;
-```shell
-bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_windows_gpu_cpu.txt 'klquant_whole_infer'
-bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_windows_gpu_cpu.txt 'klquant_whole_infer'
-```
-
-
运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件:
```
-test_tipc/output/
+test_tipc/output/model_name/lite_train_lite_infer/
|- results_python.log # 运行指令状态的日志
|- norm_train_gpus_0_autocast_null/ # GPU 0号卡上正常训练的训练日志和模型保存文件夹
-|- pact_train_gpus_0_autocast_null/ # GPU 0号卡上量化训练的训练日志和模型保存文件夹
......
-|- python_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log # CPU上开启Mkldnn线程数设置为1,测试batch_size=1条件下的预测运行日志
-|- python_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log # GPU上开启TensorRT,测试batch_size=1的半精度预测日志
+|- python_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log # CPU上关闭Mkldnn线程数设置为6,测试batch_size=1条件下的fp32精度预测运行日志
+|- python_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log # GPU上关闭TensorRT,测试batch_size=1的fp32精度预测日志
......
```
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index 25bfcce2a33ec0c7df1a7c898977f532c92513eb..a4ba31928bba4a00a560461392f7011244af5e0c 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -21,7 +21,10 @@ model_name=$(func_parser_value "${lines[1]}")
trainer_list=$(func_parser_value "${lines[14]}")
if [ ${MODE} = "benchmark_train" ];then
- pip install -r requirements.txt
+ python_name_list=$(func_parser_value "${lines[2]}")
+ array=(${python_name_list})
+ python_name=${array[0]}
+ ${python_name} -m pip install -r requirements.txt
if [[ ${model_name} =~ "ch_ppocr_mobile_v2_0_det" || ${model_name} =~ "det_mv3_db_v2_0" ]];then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
rm -rf ./train_data/icdar2015
@@ -29,6 +32,13 @@ if [ ${MODE} = "benchmark_train" ];then
cd ./train_data/ && tar xf icdar2015_benckmark.tar
ln -s ./icdar2015_benckmark ./icdar2015
cd ../
+ if [[ ${model_name} =~ "ch_ppocr_mobile_v2_0_det" ]];then
+ # expand gt.txt 2 times
+ cd ./train_data/icdar2015/text_localization
+ for i in `seq 2`;do cp train_icdar2015_label.txt dup$i.txt;done
+ cat dup* > train_icdar2015_label.txt && rm -rf dup*
+ cd ../../../
+ fi
fi
if [[ ${model_name} =~ "ch_ppocr_server_v2_0_det" || ${model_name} =~ "ch_PP-OCRv3_det" ]];then
rm -rf ./train_data/icdar2015
@@ -97,6 +107,15 @@ if [ ${MODE} = "benchmark_train" ];then
ln -s ./pubtabnet_benckmark ./pubtabnet
cd ../
fi
+ if [[ ${model_name} == "slanet" ]];then
+ wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_train.tar --no-check-certificate
+ cd ./pretrain_models/ && tar xf en_ppstructure_mobile_v2.0_SLANet_train.tar && cd ../
+ rm -rf ./train_data/pubtabnet
+ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/pubtabnet_benckmark.tar --no-check-certificate
+ cd ./train_data/ && tar xf pubtabnet_benckmark.tar
+ ln -s ./pubtabnet_benckmark ./pubtabnet
+ cd ../
+ fi
if [[ ${model_name} == "det_r50_dcn_fce_ctw_v2_0" ]]; then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/det_r50_dcn_fce_ctw_v2.0_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf det_r50_dcn_fce_ctw_v2.0_train.tar && cd ../
@@ -107,7 +126,8 @@ if [ ${MODE} = "benchmark_train" ];then
cd ../
fi
if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then
- pip install -r ppstructure/kie/requirements.txt
+ ${python_name} -m pip install -r ppstructure/kie/requirements.txt
+ ${python_name} -m pip install opencv-python -U
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
cd ./train_data/ && tar xf XFUND.tar
# expand gt.txt 10 times
@@ -121,6 +141,11 @@ if [ ${MODE} = "benchmark_train" ];then
fi
if [ ${MODE} = "lite_train_lite_infer" ];then
+ python_name_list=$(func_parser_value "${lines[2]}")
+ array=(${python_name_list})
+ python_name=${array[0]}
+ ${python_name} -m pip install -r requirements.txt
+ ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
# pretrain lite train data
wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate
@@ -211,6 +236,10 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
if [ ${model_name} == "ch_ppocr_mobile_v2_0_rec_FPGM" ]; then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf ch_ppocr_mobile_v2.0_rec_train.tar && cd ../
+ ${python_name} -m pip install paddleslim
+ fi
+ if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_FPGM" ]; then
+ ${python_name} -m pip install paddleslim
fi
if [ ${model_name} == "det_mv3_east_v2_0" ]; then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_east_v2.0_train.tar --no-check-certificate
@@ -228,11 +257,27 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_r32_gaspin_bilstm_att_train.tar --no-check-certificate
cd ./pretrain_models/ && tar xf rec_r32_gaspin_bilstm_att_train.tar && cd ../
fi
- if [ ${model_name} == "layoutxlm_ser" ] || [ ${model_name} == "vi_layoutxlm_ser" ]; then
- pip install -r ppstructure/kie/requirements.txt
+ if [ ${model_name} == "layoutxlm_ser" ]; then
+ ${python_name} -m pip install -r ppstructure/kie/requirements.txt
+ ${python_name} -m pip install opencv-python -U
wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
cd ./train_data/ && tar xf XFUND.tar
cd ../
+
+ wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh.tar --no-check-certificate
+ cd ./pretrain_models/ && tar xf ser_LayoutXLM_xfun_zh.tar && cd ../
+ fi
+ if [ ${model_name} == "vi_layoutxlm_ser" ]; then
+ ${python_name} -m pip install -r ppstructure/kie/requirements.txt
+ ${python_name} -m pip install opencv-python -U
+ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
+ cd ./train_data/ && tar xf XFUND.tar
+ cd ../
+ fi
+ if [ ${model_name} == "det_r18_ct" ]; then
+ wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet18_vd_pretrained.pdparams --no-check-certificate
+ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/ct_tipc/total_text_lite2.tar --no-check-certificate
+ cd ./train_data && tar xf total_text_lite2.tar && ln -s total_text_lite2 total_text && cd ../
fi
elif [ ${MODE} = "whole_train_whole_infer" ];then
@@ -302,9 +347,18 @@ elif [ ${MODE} = "lite_train_whole_infer" ];then
cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
fi
elif [ ${MODE} = "whole_infer" ];then
+ python_name_list=$(func_parser_value "${lines[2]}")
+ array=(${python_name_list})
+ python_name=${array[0]}
+ ${python_name} -m pip install paddleslim
+ ${python_name} -m pip install -r requirements.txt
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate
cd ./inference && tar xf rec_inference.tar && tar xf ch_det_data_50.tar && cd ../
+ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
+ cd ./train_data/ && tar xf XFUND.tar && cd ../
+ head -n 2 train_data/XFUND/zh_val/val.json > train_data/XFUND/zh_val/val_lite.json
+ mv train_data/XFUND/zh_val/val_lite.json train_data/XFUND/zh_val/val.json
if [ ${model_name} = "ch_ppocr_mobile_v2_0_det" ]; then
eval_model_name="ch_ppocr_mobile_v2.0_det_train"
rm -rf ./train_data/icdar2015
@@ -470,6 +524,12 @@ elif [ ${MODE} = "whole_infer" ];then
wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate
cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
fi
+ if [[ ${model_name} =~ "layoutxlm_ser" ]]; then
+ ${python_name} -m pip install -r ppstructure/kie/requirements.txt
+ ${python_name} -m pip install opencv-python -U
+ wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh_infer.tar --no-check-certificate
+ cd ./inference/ && tar xf ser_LayoutXLM_xfun_zh_infer.tar & cd ../
+ fi
fi
if [[ ${model_name} =~ "KL" ]]; then
@@ -522,6 +582,12 @@ if [[ ${model_name} =~ "KL" ]]; then
cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
cd ./train_data/ && tar xf pubtabnet.tar && cd ../
fi
+ if [[ ${model_name} =~ "layoutxlm_ser_KL" ]]; then
+ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/ppstructure/dataset/XFUND.tar --no-check-certificate
+ cd ./train_data/ && tar xf XFUND.tar && cd ../
+ wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/pplayout/ser_LayoutXLM_xfun_zh_infer.tar --no-check-certificate
+ cd ./inference/ && tar xf ser_LayoutXLM_xfun_zh_infer.tar & cd ../
+ fi
fi
if [ ${MODE} = "cpp_infer" ];then
@@ -626,6 +692,12 @@ if [ ${MODE} = "cpp_infer" ];then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar --no-check-certificate
cd ./inference && tar xf ch_PP-OCRv3_det_infer.tar && tar xf ch_PP-OCRv3_rec_infer.tar && tar xf ch_det_data_50.tar && cd ../
fi
+ elif [ ${model_name} = "en_table_structure_KL" ];then
+ wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar --no-check-certificate
+ wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar --no-check-certificate
+ wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar --no-check-certificate
+ cd ./inference/ && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar && cd ../
+ fi
fi
if [ ${MODE} = "serving_infer" ];then
@@ -637,6 +709,7 @@ if [ ${MODE} = "serving_infer" ];then
${python_name} -m pip install paddle-serving-server-gpu
${python_name} -m pip install paddle_serving_client
${python_name} -m pip install paddle-serving-app
+ ${python_name} -m pip install https://paddleocr.bj.bcebos.com/libs/auto_log-1.2.0-py3-none-any.whl
# wget model
if [ ${model_name} == "ch_ppocr_mobile_v2_0_det_KL" ] || [ ${model_name} == "ch_ppocr_mobile_v2.0_rec_KL" ] ; then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/tipc_fake_model/ch_ppocr_mobile_v2.0_det_klquant_infer.tar --no-check-certificate
@@ -688,8 +761,7 @@ fi
if [ ${MODE} = "paddle2onnx_infer" ];then
# prepare serving env
python_name=$(func_parser_value "${lines[2]}")
- ${python_name} -m pip install paddle2onnx
- ${python_name} -m pip install onnxruntime
+ ${python_name} -m pip install paddle2onnx onnxruntime onnx
# wget model
if [[ ${model_name} =~ "ch_ppocr_mobile_v2_0" ]]; then
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar --no-check-certificate
diff --git a/test_tipc/test_paddle2onnx.sh b/test_tipc/test_paddle2onnx.sh
index bace6b2d4684e0ad40ffbd76b37a78ddf1e70722..04bfb590f7c6e64cf136d3feef8594994cb86877 100644
--- a/test_tipc/test_paddle2onnx.sh
+++ b/test_tipc/test_paddle2onnx.sh
@@ -63,7 +63,7 @@ function func_paddle2onnx(){
set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}")
set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}")
trans_det_log="${LOG_PATH}/trans_model_det.log"
- trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} > ${trans_det_log} 2>&1 "
+ trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} --enable_dev_version=False > ${trans_det_log} 2>&1 "
eval $trans_model_cmd
last_status=${PIPESTATUS[0]}
status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_det_log}"
@@ -75,7 +75,7 @@ function func_paddle2onnx(){
set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}")
set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}")
trans_rec_log="${LOG_PATH}/trans_model_rec.log"
- trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} > ${trans_rec_log} 2>&1 "
+ trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} --enable_dev_version=False > ${trans_rec_log} 2>&1 "
eval $trans_model_cmd
last_status=${PIPESTATUS[0]}
status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_rec_log}"
@@ -88,7 +88,7 @@ function func_paddle2onnx(){
set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}")
set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}")
trans_det_log="${LOG_PATH}/trans_model_det.log"
- trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} > ${trans_det_log} 2>&1 "
+ trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} --enable_dev_version=False > ${trans_det_log} 2>&1 "
eval $trans_model_cmd
last_status=${PIPESTATUS[0]}
status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_det_log}"
@@ -101,7 +101,7 @@ function func_paddle2onnx(){
set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}")
set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}")
trans_rec_log="${LOG_PATH}/trans_model_rec.log"
- trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} > ${trans_rec_log} 2>&1 "
+ trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker} --enable_dev_version=False > ${trans_rec_log} 2>&1 "
eval $trans_model_cmd
last_status=${PIPESTATUS[0]}
status_check $last_status "${trans_model_cmd}" "${status_log}" "${model_name}" "${trans_rec_log}"
diff --git a/test_tipc/test_train_inference_python_npu.sh b/test_tipc/test_train_inference_python_npu.sh
new file mode 100644
index 0000000000000000000000000000000000000000..bab70fc78ee902515c0fccb57d9215d86f2a6589
--- /dev/null
+++ b/test_tipc/test_train_inference_python_npu.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+function readlinkf() {
+ perl -MCwd -e 'print Cwd::abs_path shift' "$1";
+}
+
+function func_parser_config() {
+ strs=$1
+ IFS=" "
+ array=(${strs})
+ tmp=${array[2]}
+ echo ${tmp}
+}
+
+BASEDIR=$(dirname "$0")
+REPO_ROOT_PATH=$(readlinkf ${BASEDIR}/../)
+
+FILENAME=$1
+
+# disable mkldnn on non x86_64 env
+arch=$(uname -i)
+if [ $arch != 'x86_64' ]; then
+ sed -i 's/--enable_mkldnn:True|False/--enable_mkldnn:False/g' $FILENAME
+ sed -i 's/--enable_mkldnn:True/--enable_mkldnn:False/g' $FILENAME
+fi
+
+# change gpu to npu in tipc txt configs
+sed -i 's/use_gpu/use_npu/g' $FILENAME
+# disable benchmark as AutoLog required nvidia-smi command
+sed -i 's/--benchmark:True/--benchmark:False/g' $FILENAME
+dataline=`cat $FILENAME`
+
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+
+# replace training config file
+grep -n 'tools/.*yml' $FILENAME | cut -d ":" -f 1 \
+| while read line_num ; do
+ train_cmd=$(func_parser_value "${lines[line_num-1]}")
+ trainer_config=$(func_parser_config ${train_cmd})
+ sed -i 's/use_gpu/use_npu/g' "$REPO_ROOT_PATH/$trainer_config"
+done
+
+# change gpu to npu in execution script
+sed -i 's/\"gpu\"/\"npu\"/g' test_tipc/test_train_inference_python.sh
+
+# pass parameters to test_train_inference_python.sh
+cmd='bash test_tipc/test_train_inference_python.sh ${FILENAME} $2'
+echo -e '\033[1;32m Started to run command: ${cmd}! \033[0m'
+eval $cmd
diff --git a/test_tipc/test_train_inference_python_xpu.sh b/test_tipc/test_train_inference_python_xpu.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7c6dc1e52a67caf9c858b2f8b6561b3919134b0b
--- /dev/null
+++ b/test_tipc/test_train_inference_python_xpu.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+function readlinkf() {
+ perl -MCwd -e 'print Cwd::abs_path shift' "$1";
+}
+
+function func_parser_config() {
+ strs=$1
+ IFS=" "
+ array=(${strs})
+ tmp=${array[2]}
+ echo ${tmp}
+}
+
+BASEDIR=$(dirname "$0")
+REPO_ROOT_PATH=$(readlinkf ${BASEDIR}/../)
+
+FILENAME=$1
+
+# disable mkldnn on non x86_64 env
+arch=$(uname -i)
+if [ $arch != 'x86_64' ]; then
+ sed -i 's/--enable_mkldnn:True|False/--enable_mkldnn:False/g' $FILENAME
+ sed -i 's/--enable_mkldnn:True/--enable_mkldnn:False/g' $FILENAME
+fi
+
+# change gpu to xpu in tipc txt configs
+sed -i 's/use_gpu/use_xpu/g' $FILENAME
+# disable benchmark as AutoLog required nvidia-smi command
+sed -i 's/--benchmark:True/--benchmark:False/g' $FILENAME
+dataline=`cat $FILENAME`
+
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+
+# replace training config file
+grep -n 'tools/.*yml' $FILENAME | cut -d ":" -f 1 \
+| while read line_num ; do
+ train_cmd=$(func_parser_value "${lines[line_num-1]}")
+ trainer_config=$(func_parser_config ${train_cmd})
+ sed -i 's/use_gpu/use_xpu/g' "$REPO_ROOT_PATH/$trainer_config"
+done
+
+# change gpu to xpu in execution script
+sed -i 's/\"gpu\"/\"xpu\"/g' test_tipc/test_train_inference_python.sh
+
+# pass parameters to test_train_inference_python.sh
+cmd='bash test_tipc/test_train_inference_python.sh ${FILENAME} $2'
+echo -e '\033[1;32m Started to run command: ${cmd}! \033[0m'
+eval $cmd
diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py
index 9f5c480d3c55367a02eacb48bed6ae3d38282f05..00fa2e9b7fafd949c59a0eebd43f2f88ae717320 100755
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -127,6 +127,9 @@ class TextDetector(object):
postprocess_params["beta"] = args.beta
postprocess_params["fourier_degree"] = args.fourier_degree
postprocess_params["box_type"] = args.det_fce_box_type
+ elif self.det_algorithm == "CT":
+ pre_process_list[0] = {'ScaleAlignedShort': {'short_size': 640}}
+ postprocess_params['name'] = 'CTPostProcess'
else:
logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
sys.exit(0)
@@ -253,6 +256,9 @@ class TextDetector(object):
elif self.det_algorithm == 'FCE':
for i, output in enumerate(outputs):
preds['level_{}'.format(i)] = output
+ elif self.det_algorithm == "CT":
+ preds['maps'] = outputs[0]
+ preds['score'] = outputs[1]
else:
raise NotImplementedError
@@ -260,7 +266,7 @@ class TextDetector(object):
post_result = self.postprocess_op(preds, shape_list)
dt_boxes = post_result[0]['points']
if (self.det_algorithm == "SAST" and self.det_sast_polygon) or (
- self.det_algorithm in ["PSE", "FCE"] and
+ self.det_algorithm in ["PSE", "FCE", "CT"] and
self.postprocess_op.box_type == 'poly'):
dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
else:
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 9baf66d7f469a3bf6c9a140e034aee3a635a5c8e..07b2172cd3c6a624d4b1026163dcb811edebde02 100644
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -23,6 +23,7 @@ from PIL import Image, ImageDraw, ImageFont
import math
from paddle import inference
import time
+import random
from ppocr.utils.logging import get_logger
@@ -35,6 +36,7 @@ def init_args():
# params for prediction engine
parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--use_xpu", type=str2bool, default=False)
+ parser.add_argument("--use_npu", type=str2bool, default=False)
parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--min_subgraph_size", type=int, default=15)
@@ -226,24 +228,25 @@ def create_predictor(args, mode, logger):
use_calib_mode=False)
# collect shape
- if args.shape_info_filename is not None:
- if not os.path.exists(args.shape_info_filename):
- config.collect_shape_range_info(
- args.shape_info_filename)
+ trt_shape_f = f"{os.path.dirname(args.shape_info_filename)}/{mode}_{os.path.basename(args.shape_info_filename)}"
+ if trt_shape_f is not None:
+ if not os.path.exists(trt_shape_f):
+ config.collect_shape_range_info(trt_shape_f)
logger.info(
- f"collect dynamic shape info into : {args.shape_info_filename}"
+ f"collect dynamic shape info into : {trt_shape_f}"
)
else:
logger.info(
- f"dynamic shape info file( {args.shape_info_filename} ) already exists, not need to generate again."
+ f"dynamic shape info file( {trt_shape_f} ) already exists, not need to generate again."
)
- config.enable_tuned_tensorrt_dynamic_shape(
- args.shape_info_filename, True)
+ config.enable_tuned_tensorrt_dynamic_shape(trt_shape_f, True)
else:
logger.info(
f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning"
)
+ elif args.use_npu:
+ config.enable_npu()
elif args.use_xpu:
config.enable_xpu(10 * 1024 * 1024)
else:
@@ -397,56 +400,81 @@ def draw_ocr(image,
def draw_ocr_box_txt(image,
boxes,
- txts,
+ txts=None,
scores=None,
drop_score=0.5,
- font_path="./doc/simfang.ttf"):
+ font_path="./doc/fonts/simfang.ttf"):
h, w = image.height, image.width
img_left = image.copy()
- img_right = Image.new('RGB', (w, h), (255, 255, 255))
-
- import random
-
+ img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
random.seed(0)
+
draw_left = ImageDraw.Draw(img_left)
- draw_right = ImageDraw.Draw(img_right)
+ if txts is None or len(txts) != len(boxes):
+ txts = [None] * len(boxes)
for idx, (box, txt) in enumerate(zip(boxes, txts)):
if scores is not None and scores[idx] < drop_score:
continue
color = (random.randint(0, 255), random.randint(0, 255),
random.randint(0, 255))
draw_left.polygon(box, fill=color)
- draw_right.polygon(
- [
- box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
- box[2][1], box[3][0], box[3][1]
- ],
- outline=color)
- box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
- 1])**2)
- box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
- 1])**2)
- if box_height > 2 * box_width:
- font_size = max(int(box_width * 0.9), 10)
- font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
- cur_y = box[0][1]
- for c in txt:
- char_size = font.getsize(c)
- draw_right.text(
- (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
- cur_y += char_size[1]
- else:
- font_size = max(int(box_height * 0.8), 10)
- font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
- draw_right.text(
- [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
+ img_right_text = draw_box_txt_fine((w, h), box, txt, font_path)
+ pts = np.array(box, np.int32).reshape((-1, 1, 2))
+ cv2.polylines(img_right_text, [pts], True, color, 1)
+ img_right = cv2.bitwise_and(img_right, img_right_text)
img_left = Image.blend(image, img_left, 0.5)
img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
img_show.paste(img_left, (0, 0, w, h))
- img_show.paste(img_right, (w, 0, w * 2, h))
+ img_show.paste(Image.fromarray(img_right), (w, 0, w * 2, h))
return np.array(img_show)
+def draw_box_txt_fine(img_size, box, txt, font_path="./doc/fonts/simfang.ttf"):
+ box_height = int(
+ math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][1])**2))
+ box_width = int(
+ math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][1])**2))
+
+ if box_height > 2 * box_width and box_height > 30:
+ img_text = Image.new('RGB', (box_height, box_width), (255, 255, 255))
+ draw_text = ImageDraw.Draw(img_text)
+ if txt:
+ font = create_font(txt, (box_height, box_width), font_path)
+ draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
+ img_text = img_text.transpose(Image.ROTATE_270)
+ else:
+ img_text = Image.new('RGB', (box_width, box_height), (255, 255, 255))
+ draw_text = ImageDraw.Draw(img_text)
+ if txt:
+ font = create_font(txt, (box_width, box_height), font_path)
+ draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
+
+ pts1 = np.float32(
+ [[0, 0], [box_width, 0], [box_width, box_height], [0, box_height]])
+ pts2 = np.array(box, dtype=np.float32)
+ M = cv2.getPerspectiveTransform(pts1, pts2)
+
+ img_text = np.array(img_text, dtype=np.uint8)
+ img_right_text = cv2.warpPerspective(
+ img_text,
+ M,
+ img_size,
+ flags=cv2.INTER_NEAREST,
+ borderMode=cv2.BORDER_CONSTANT,
+ borderValue=(255, 255, 255))
+ return img_right_text
+
+
+def create_font(txt, sz, font_path="./doc/fonts/simfang.ttf"):
+ font_size = int(sz[1] * 0.99)
+ font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+ length = font.getsize(txt)[0]
+ if length > sz[0]:
+ font_size = int(font_size * sz[0] / length)
+ font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+ return font
+
+
def str_count(s):
"""
Count the number of Chinese characters,
diff --git a/tools/infer_e2e.py b/tools/infer_e2e.py
index d3e6b28fca0a3ff32ea940747712d6c71aa290fd..37fdcbaadc2984c9cf4fb105b7122db31b99be30 100755
--- a/tools/infer_e2e.py
+++ b/tools/infer_e2e.py
@@ -37,6 +37,46 @@ from ppocr.postprocess import build_post_process
from ppocr.utils.save_load import load_model
from ppocr.utils.utility import get_image_file_list
import tools.program as program
+from PIL import Image, ImageDraw, ImageFont
+import math
+
+
+def draw_e2e_res_for_chinese(image,
+ boxes,
+ txts,
+ config,
+ img_name,
+ font_path="./doc/simfang.ttf"):
+ h, w = image.height, image.width
+ img_left = image.copy()
+ img_right = Image.new('RGB', (w, h), (255, 255, 255))
+
+ import random
+
+ random.seed(0)
+ draw_left = ImageDraw.Draw(img_left)
+ draw_right = ImageDraw.Draw(img_right)
+ for idx, (box, txt) in enumerate(zip(boxes, txts)):
+ box = np.array(box)
+ box = [tuple(x) for x in box]
+ color = (random.randint(0, 255), random.randint(0, 255),
+ random.randint(0, 255))
+ draw_left.polygon(box, fill=color)
+ draw_right.polygon(box, outline=color)
+ font = ImageFont.truetype(font_path, 15, encoding="utf-8")
+ draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
+ img_left = Image.blend(image, img_left, 0.5)
+ img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
+ img_show.paste(img_left, (0, 0, w, h))
+ img_show.paste(img_right, (w, 0, w * 2, h))
+
+ save_e2e_path = os.path.dirname(config['Global'][
+ 'save_res_path']) + "/e2e_results/"
+ if not os.path.exists(save_e2e_path):
+ os.makedirs(save_e2e_path)
+ save_path = os.path.join(save_e2e_path, os.path.basename(img_name))
+ cv2.imwrite(save_path, np.array(img_show)[:, :, ::-1])
+ logger.info("The e2e Image saved in {}".format(save_path))
def draw_e2e_res(dt_boxes, strs, config, img, img_name):
@@ -113,7 +153,19 @@ def main():
otstr = file + "\t" + json.dumps(dt_boxes_json) + "\n"
fout.write(otstr.encode())
src_img = cv2.imread(file)
- draw_e2e_res(points, strs, config, src_img, file)
+ if global_config['infer_visual_type'] == 'EN':
+ draw_e2e_res(points, strs, config, src_img, file)
+ elif global_config['infer_visual_type'] == 'CN':
+ src_img = Image.fromarray(
+ cv2.cvtColor(src_img, cv2.COLOR_BGR2RGB))
+ draw_e2e_res_for_chinese(
+ src_img,
+ points,
+ strs,
+ config,
+ file,
+ font_path="./doc/fonts/simfang.ttf")
+
logger.info("success!")
diff --git a/tools/program.py b/tools/program.py
index 16d3d4035af933cda01b422ea56e9e2895ec2b88..9117d51b95b343c46982f212d4e5faa069b7b44a 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -114,7 +114,7 @@ def merge_config(config, opts):
return config
-def check_device(use_gpu, use_xpu=False):
+def check_device(use_gpu, use_xpu=False, use_npu=False):
"""
Log error and exit when set use_gpu=true in paddlepaddle
cpu version.
@@ -134,24 +134,8 @@ def check_device(use_gpu, use_xpu=False):
if use_xpu and not paddle.device.is_compiled_with_xpu():
print(err.format("use_xpu", "xpu", "xpu", "use_xpu"))
sys.exit(1)
- except Exception as e:
- pass
-
-
-def check_xpu(use_xpu):
- """
- Log error and exit when set use_xpu=true in paddlepaddle
- cpu/gpu version.
- """
- err = "Config use_xpu cannot be set as true while you are " \
- "using paddlepaddle cpu/gpu version ! \nPlease try: \n" \
- "\t1. Install paddlepaddle-xpu to run model on XPU \n" \
- "\t2. Set use_xpu as false in config file to run " \
- "model on CPU/GPU"
-
- try:
- if use_xpu and not paddle.is_compiled_with_xpu():
- print(err)
+ if use_npu and not paddle.device.is_compiled_with_npu():
+ print(err.format("use_npu", "npu", "npu", "use_npu"))
sys.exit(1)
except Exception as e:
pass
@@ -279,7 +263,9 @@ def train(config,
model_average = True
# use amp
if scaler:
- with paddle.amp.auto_cast(level=amp_level, custom_black_list=amp_custom_black_list):
+ with paddle.amp.auto_cast(
+ level=amp_level,
+ custom_black_list=amp_custom_black_list):
if model_type == 'table' or extra_input:
preds = model(images, data=batch[1:])
elif model_type in ["kie"]:
@@ -479,7 +465,7 @@ def eval(model,
extra_input=False,
scaler=None,
amp_level='O2',
- amp_custom_black_list = []):
+ amp_custom_black_list=[]):
model.eval()
with paddle.no_grad():
total_frame = 0.0
@@ -500,7 +486,9 @@ def eval(model,
# use amp
if scaler:
- with paddle.amp.auto_cast(level=amp_level, custom_black_list=amp_custom_black_list):
+ with paddle.amp.auto_cast(
+ level=amp_level,
+ custom_black_list=amp_custom_black_list):
if model_type == 'table' or extra_input:
preds = model(images, data=batch[1:])
elif model_type in ["kie"]:
@@ -627,14 +615,9 @@ def preprocess(is_train=False):
logger = get_logger(log_file=log_file)
# check if set use_gpu=True in paddlepaddle cpu version
- use_gpu = config['Global']['use_gpu']
+ use_gpu = config['Global'].get('use_gpu', False)
use_xpu = config['Global'].get('use_xpu', False)
-
- # check if set use_xpu=True in paddlepaddle cpu/gpu version
- use_xpu = False
- if 'use_xpu' in config['Global']:
- use_xpu = config['Global']['use_xpu']
- check_xpu(use_xpu)
+ use_npu = config['Global'].get('use_npu', False)
alg = config['Architecture']['algorithm']
assert alg in [
@@ -642,15 +625,17 @@ def preprocess(is_train=False):
'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE',
'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'LayoutLMv2', 'PREN', 'FCE',
'SVTR', 'ViTSTR', 'ABINet', 'DB++', 'TableMaster', 'SPIN', 'VisionLAN',
- 'Gestalt', 'SLANet', 'RobustScanner'
+ 'Gestalt', 'SLANet', 'RobustScanner', 'CT'
]
if use_xpu:
device = 'xpu:{0}'.format(os.getenv('FLAGS_selected_xpus', 0))
+ elif use_npu:
+ device = 'npu:{0}'.format(os.getenv('FLAGS_selected_npus', 0))
else:
device = 'gpu:{}'.format(dist.ParallelEnv()
.dev_id) if use_gpu else 'cpu'
- check_device(use_gpu, use_xpu)
+ check_device(use_gpu, use_xpu, use_npu)
device = paddle.set_device(device)
diff --git a/tools/train.py b/tools/train.py
index d0f200189e34265b3c080ac9e25eb80d29c705b7..970a52624af7b2831d88956f857cd4271086bcca 100755
--- a/tools/train.py
+++ b/tools/train.py
@@ -119,6 +119,7 @@ def main(config, device, logger, vdl_writer):
config['Loss']['ignore_index'] = char_num - 1
model = build_model(config['Architecture'])
+
use_sync_bn = config["Global"].get("use_sync_bn", False)
if use_sync_bn:
model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
@@ -138,7 +139,7 @@ def main(config, device, logger, vdl_writer):
# build metric
eval_class = build_metric(config['Metric'])
-
+
logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
if valid_dataloader is not None:
logger.info('valid dataloader has {} iters'.format(
@@ -146,7 +147,7 @@ def main(config, device, logger, vdl_writer):
use_amp = config["Global"].get("use_amp", False)
amp_level = config["Global"].get("amp_level", 'O2')
- amp_custom_black_list = config['Global'].get('amp_custom_black_list',[])
+ amp_custom_black_list = config['Global'].get('amp_custom_black_list', [])
if use_amp:
AMP_RELATED_FLAGS_SETTING = {
'FLAGS_cudnn_batchnorm_spatial_persistent': 1,
@@ -161,20 +162,24 @@ def main(config, device, logger, vdl_writer):
use_dynamic_loss_scaling=use_dynamic_loss_scaling)
if amp_level == "O2":
model, optimizer = paddle.amp.decorate(
- models=model, optimizers=optimizer, level=amp_level, master_weight=True)
+ models=model,
+ optimizers=optimizer,
+ level=amp_level,
+ master_weight=True)
else:
scaler = None
# load pretrain model
pre_best_model_dict = load_model(config, model, optimizer,
config['Architecture']["model_type"])
-
+
if config['Global']['distributed']:
model = paddle.DataParallel(model)
# start train
program.train(config, train_dataloader, valid_dataloader, device, model,
loss_class, optimizer, lr_scheduler, post_process_class,
- eval_class, pre_best_model_dict, logger, vdl_writer, scaler,amp_level, amp_custom_black_list)
+ eval_class, pre_best_model_dict, logger, vdl_writer, scaler,
+ amp_level, amp_custom_black_list)
def test_reader(config, device, logger):
diff --git a/train.sh b/train.sh
index 4225470cb9f545b874e5f806af22405895e8f6c7..6fa04ea3febe8982016a35d83f119c0a483e3bb8 100644
--- a/train.sh
+++ b/train.sh
@@ -1,2 +1,2 @@
# recommended paddle.__version__ == 2.0.0
-python3 -m paddle.distributed.launch --log_dir=./debug/ --gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
+python3 -m paddle.distributed.launch --log_dir=./debug/ --gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
\ No newline at end of file
|