diff --git a/configs/cls/ch_PP-OCRv3/ch_PP-OCRv3_rotnet.yml b/configs/cls/ch_PP-OCRv3/ch_PP-OCRv3_rotnet.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ffeba07995860f964e22b8b9d2538320d80f651
--- /dev/null
+++ b/configs/cls/ch_PP-OCRv3/ch_PP-OCRv3_rotnet.yml
@@ -0,0 +1,99 @@
+Global:
+ debug: false
+ use_gpu: true
+ epoch_num: 100
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/rec_ppocr_v3_rotnet
+ save_epoch_step: 3
+ eval_batch_step: [0, 2000]
+ cal_metric_during_train: true
+ pretrained_model: null
+ checkpoints: null
+ save_inference_dir: null
+ use_visualdl: false
+ infer_img: doc/imgs_words/ch/word_1.jpg
+ character_dict_path: ppocr/utils/ppocr_keys_v1.txt
+ max_text_length: 25
+ infer_mode: false
+ use_space_char: true
+ save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt
+Optimizer:
+ name: Adam
+ beta1: 0.9
+ beta2: 0.999
+ lr:
+ name: Cosine
+ learning_rate: 0.001
+ regularizer:
+ name: L2
+ factor: 1.0e-05
+Architecture:
+ model_type: cls
+ algorithm: CLS
+ Transform: null
+ Backbone:
+ name: MobileNetV1Enhance
+ scale: 0.5
+ last_conv_stride: [1, 2]
+ last_pool_type: avg
+ Neck:
+ Head:
+ name: ClsHead
+ class_dim: 4
+
+Loss:
+ name: ClsLoss
+ main_indicator: acc
+
+PostProcess:
+ name: ClsPostProcess
+
+Metric:
+ name: ClsMetric
+ main_indicator: acc
+
+Train:
+ dataset:
+ name: SimpleDataSet
+ data_dir: ./train_data
+ label_file_list:
+ - ./train_data/train_list.txt
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: false
+ - RecAug:
+ use_tia: False
+ - RandAugment:
+ - SSLRotateResize:
+ image_shape: [3, 48, 320]
+ - KeepKeys:
+ keep_keys: ["image", "label"]
+ loader:
+ collate_fn: "SSLRotateCollate"
+ shuffle: true
+ batch_size_per_card: 32
+ drop_last: true
+ num_workers: 8
+Eval:
+ dataset:
+ name: SimpleDataSet
+ data_dir: ./train_data
+ label_file_list:
+ - ./train_data/val_list.txt
+ transforms:
+ - DecodeImage:
+ img_mode: BGR
+ channel_first: false
+ - SSLRotateResize:
+ image_shape: [3, 48, 320]
+ - KeepKeys:
+ keep_keys: ["image", "label"]
+ loader:
+ collate_fn: "SSLRotateCollate"
+ shuffle: false
+ drop_last: false
+ batch_size_per_card: 64
+ num_workers: 8
+profiler_options: null
diff --git a/configs/rec/rec_svtrnet.yml b/configs/rec/rec_svtrnet.yml
new file mode 100644
index 0000000000000000000000000000000000000000..233d5e276577cad0144456ef7df1e20de99891f9
--- /dev/null
+++ b/configs/rec/rec_svtrnet.yml
@@ -0,0 +1,117 @@
+Global:
+ use_gpu: True
+ epoch_num: 20
+ log_smooth_window: 20
+ print_batch_step: 10
+ save_model_dir: ./output/rec/svtr/
+ save_epoch_step: 1
+ # evaluation is run every 2000 iterations after the 0th iteration
+ eval_batch_step: [0, 2000]
+ cal_metric_during_train: True
+ pretrained_model:
+ checkpoints:
+ save_inference_dir:
+ use_visualdl: False
+ infer_img: doc/imgs_words_en/word_10.png
+ # for data or label process
+ character_dict_path:
+ character_type: en
+ max_text_length: 25
+ infer_mode: False
+ use_space_char: False
+ save_res_path: ./output/rec/predicts_svtr_tiny.txt
+
+
+Optimizer:
+ name: AdamW
+ beta1: 0.9
+ beta2: 0.99
+ epsilon: 0.00000008
+ weight_decay: 0.05
+ no_weight_decay_name: norm pos_embed
+ one_dim_param_no_weight_decay: true
+ lr:
+ name: Cosine
+ learning_rate: 0.0005
+ warmup_epoch: 2
+
+Architecture:
+ model_type: rec
+ algorithm: SVTR
+ Transform:
+ name: STN_ON
+ tps_inputsize: [32, 64]
+ tps_outputsize: [32, 100]
+ num_control_points: 20
+ tps_margins: [0.05,0.05]
+ stn_activation: none
+ Backbone:
+ name: SVTRNet
+ img_size: [32, 100]
+ out_char_num: 25
+ out_channels: 192
+ patch_merging: 'Conv'
+ embed_dim: [64, 128, 256]
+ depth: [3, 6, 3]
+ num_heads: [2, 4, 8]
+ mixer: ['Local','Local','Local','Local','Local','Local','Global','Global','Global','Global','Global','Global']
+ local_mixer: [[7, 11], [7, 11], [7, 11]]
+ last_stage: True
+ prenorm: false
+ Neck:
+ name: SequenceEncoder
+ encoder_type: reshape
+ Head:
+ name: CTCHead
+
+Loss:
+ name: CTCLoss
+
+PostProcess:
+ name: CTCLabelDecode
+
+Metric:
+ name: RecMetric
+ main_indicator: acc
+
+Train:
+ dataset:
+ name: LMDBDataSet
+ data_dir: ./train_data/data_lmdb_release/training/
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - CTCLabelEncode: # Class handling label
+ - RecResizeImg:
+ character_dict_path:
+ image_shape: [3, 64, 256]
+ padding: False
+ - KeepKeys:
+ keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+ loader:
+ shuffle: True
+ batch_size_per_card: 512
+ drop_last: True
+ num_workers: 4
+
+Eval:
+ dataset:
+ name: LMDBDataSet
+ data_dir: ./train_data/data_lmdb_release/validation/
+ transforms:
+ - DecodeImage: # load image
+ img_mode: BGR
+ channel_first: False
+ - CTCLabelEncode: # Class handling label
+ - RecResizeImg:
+ character_dict_path:
+ image_shape: [3, 64, 256]
+ padding: False
+ - KeepKeys:
+ keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+ loader:
+ shuffle: False
+ drop_last: False
+ batch_size_per_card: 256
+ num_workers: 2
diff --git a/deploy/README.md b/deploy/README.md
index 033662a7535e10c2c468436d4f01f06d84996fb7..69e2438996a1329e801f842ef78d2d6e115c5831 100644
--- a/deploy/README.md
+++ b/deploy/README.md
@@ -22,9 +22,11 @@ PP-OCR has supported muti deployment schemes. Click the link to get the specific
- [Python Inference](../doc/doc_en/inference_ppocr_en.md)
- [C++ Inference](./cpp_infer/readme.md)
-- [Serving](./pdserving/README.md)
-- [Paddle-Lite](./lite/readme.md)
+- [Serving (Python/C++)](./pdserving/README.md)
+- [Paddle-Lite (ARM CPU/OpenCL ARM GPU/Metal ARM GPU)](./lite/readme.md)
- [Paddle.js](./paddlejs/README.md)
+- [Jetson Inference]()
+- [XPU Inference]()
- [Paddle2ONNX](./paddle2onnx/readme.md)
If you need the deployment tutorial of academic algorithm models other than PP-OCR, please directly enter the main page of corresponding algorithms, [entrance](../doc/doc_en/algorithm_overview_en.md)。
\ No newline at end of file
diff --git a/deploy/README_ch.md b/deploy/README_ch.md
index 96b49ddd9b94bff877ae1ac1d0e6f6e90612ec85..63ae59537316480a302dca7c3714db3c1003553e 100644
--- a/deploy/README_ch.md
+++ b/deploy/README_ch.md
@@ -22,9 +22,11 @@ PP-OCR模型已打通多种场景部署方案,点击链接获取具体的使
- [Python 推理](../doc/doc_ch/inference_ppocr.md)
- [C++ 推理](./cpp_infer/readme_ch.md)
-- [Serving 服务化部署](./pdserving/README_CN.md)
-- [Paddle-Lite 端侧部署](./lite/readme_ch.md)
-- [Paddle.js 服务化部署](./paddlejs/README_ch.md)
+- [Serving 服务化部署(Python/C++)](./pdserving/README_CN.md)
+- [Paddle-Lite 端侧部署(ARM CPU/OpenCL ARM GPU/Metal ARM GPU)](./lite/readme_ch.md)
+- [Paddle.js 部署](./paddlejs/README_ch.md)
+- [Jetson 推理]()
+- [XPU 推理]()
- [Paddle2ONNX 推理](./paddle2onnx/readme_ch.md)
需要PP-OCR以外的学术算法模型的推理部署,请直接进入相应算法主页面,[入口](../doc/doc_ch/algorithm_overview.md)。
\ No newline at end of file
diff --git a/deploy/slim/quantization/export_model.py b/deploy/slim/quantization/export_model.py
index 822fd5da4c30a934d0e590ab1067f9f9188213c2..90f79dab34a5f20d4556ae4b10ad1d4e1f8b7f0d 100755
--- a/deploy/slim/quantization/export_model.py
+++ b/deploy/slim/quantization/export_model.py
@@ -35,17 +35,7 @@ from ppocr.metrics import build_metric
import tools.program as program
from paddleslim.dygraph.quant import QAT
from ppocr.data import build_dataloader
-
-
-def export_single_model(quanter, model, infer_shape, save_path, logger):
- quanter.save_quantized_model(
- model,
- save_path,
- input_spec=[
- paddle.static.InputSpec(
- shape=[None] + infer_shape, dtype='float32')
- ])
- logger.info('inference QAT model is saved to {}'.format(save_path))
+from tools.export_model import export_single_model
def main():
@@ -84,17 +74,54 @@ def main():
config['Global'])
# build model
- # for rec algorithm
if hasattr(post_process_class, 'character'):
char_num = len(getattr(post_process_class, 'character'))
if config['Architecture']["algorithm"] in ["Distillation",
]: # distillation model
for key in config['Architecture']["Models"]:
- config['Architecture']["Models"][key]["Head"][
- 'out_channels'] = char_num
+ if config['Architecture']['Models'][key]['Head'][
+ 'name'] == 'MultiHead': # for multi head
+ if config['PostProcess'][
+ 'name'] == 'DistillationSARLabelDecode':
+ char_num = char_num - 2
+ # update SARLoss params
+ assert list(config['Loss']['loss_config_list'][-1].keys())[
+ 0] == 'DistillationSARLoss'
+ config['Loss']['loss_config_list'][-1][
+ 'DistillationSARLoss']['ignore_index'] = char_num + 1
+ out_channels_list = {}
+ out_channels_list['CTCLabelDecode'] = char_num
+ out_channels_list['SARLabelDecode'] = char_num + 2
+ config['Architecture']['Models'][key]['Head'][
+ 'out_channels_list'] = out_channels_list
+ else:
+ config['Architecture']["Models"][key]["Head"][
+ 'out_channels'] = char_num
+ elif config['Architecture']['Head'][
+ 'name'] == 'MultiHead': # for multi head
+ if config['PostProcess']['name'] == 'SARLabelDecode':
+ char_num = char_num - 2
+ # update SARLoss params
+ assert list(config['Loss']['loss_config_list'][1].keys())[
+ 0] == 'SARLoss'
+ if config['Loss']['loss_config_list'][1]['SARLoss'] is None:
+ config['Loss']['loss_config_list'][1]['SARLoss'] = {
+ 'ignore_index': char_num + 1
+ }
+ else:
+ config['Loss']['loss_config_list'][1]['SARLoss'][
+ 'ignore_index'] = char_num + 1
+ out_channels_list = {}
+ out_channels_list['CTCLabelDecode'] = char_num
+ out_channels_list['SARLabelDecode'] = char_num + 2
+ config['Architecture']['Head'][
+ 'out_channels_list'] = out_channels_list
else: # base rec model
config['Architecture']["Head"]['out_channels'] = char_num
+ if config['PostProcess']['name'] == 'SARLabelDecode': # for SAR model
+ config['Loss']['ignore_index'] = char_num - 1
+
model = build_model(config['Architecture'])
# get QAT model
@@ -120,21 +147,22 @@ def main():
for k, v in metric.items():
logger.info('{}:{}'.format(k, v))
- infer_shape = [3, 32, 100] if model_type == "rec" else [3, 640, 640]
-
save_path = config["Global"]["save_inference_dir"]
arch_config = config["Architecture"]
+
+ arch_config = config["Architecture"]
+
if arch_config["algorithm"] in ["Distillation", ]: # distillation model
+ archs = list(arch_config["Models"].values())
for idx, name in enumerate(model.model_name_list):
model.model_list[idx].eval()
sub_model_save_path = os.path.join(save_path, name, "inference")
- export_single_model(quanter, model.model_list[idx], infer_shape,
- sub_model_save_path, logger)
+ export_single_model(model.model_list[idx], archs[idx],
+ sub_model_save_path, logger, quanter)
else:
save_path = os.path.join(save_path, "inference")
- model.eval()
- export_single_model(quanter, model, infer_shape, save_path, logger)
+ export_single_model(model, arch_config, save_path, logger, quanter)
if __name__ == "__main__":
diff --git a/deploy/slim/quantization/quant.py b/deploy/slim/quantization/quant.py
index 1dffaab0eef35ec41c27c9c6e00f25dda048d490..f7acb185add5d40b749e7442111891869dfaeb22 100755
--- a/deploy/slim/quantization/quant.py
+++ b/deploy/slim/quantization/quant.py
@@ -112,10 +112,48 @@ def main(config, device, logger, vdl_writer):
if config['Architecture']["algorithm"] in ["Distillation",
]: # distillation model
for key in config['Architecture']["Models"]:
- config['Architecture']["Models"][key]["Head"][
- 'out_channels'] = char_num
+ if config['Architecture']['Models'][key]['Head'][
+ 'name'] == 'MultiHead': # for multi head
+ if config['PostProcess'][
+ 'name'] == 'DistillationSARLabelDecode':
+ char_num = char_num - 2
+ # update SARLoss params
+ assert list(config['Loss']['loss_config_list'][-1].keys())[
+ 0] == 'DistillationSARLoss'
+ config['Loss']['loss_config_list'][-1][
+ 'DistillationSARLoss']['ignore_index'] = char_num + 1
+ out_channels_list = {}
+ out_channels_list['CTCLabelDecode'] = char_num
+ out_channels_list['SARLabelDecode'] = char_num + 2
+ config['Architecture']['Models'][key]['Head'][
+ 'out_channels_list'] = out_channels_list
+ else:
+ config['Architecture']["Models"][key]["Head"][
+ 'out_channels'] = char_num
+ elif config['Architecture']['Head'][
+ 'name'] == 'MultiHead': # for multi head
+ if config['PostProcess']['name'] == 'SARLabelDecode':
+ char_num = char_num - 2
+ # update SARLoss params
+ assert list(config['Loss']['loss_config_list'][1].keys())[
+ 0] == 'SARLoss'
+ if config['Loss']['loss_config_list'][1]['SARLoss'] is None:
+ config['Loss']['loss_config_list'][1]['SARLoss'] = {
+ 'ignore_index': char_num + 1
+ }
+ else:
+ config['Loss']['loss_config_list'][1]['SARLoss'][
+ 'ignore_index'] = char_num + 1
+ out_channels_list = {}
+ out_channels_list['CTCLabelDecode'] = char_num
+ out_channels_list['SARLabelDecode'] = char_num + 2
+ config['Architecture']['Head'][
+ 'out_channels_list'] = out_channels_list
else: # base rec model
config['Architecture']["Head"]['out_channels'] = char_num
+
+ if config['PostProcess']['name'] == 'SARLabelDecode': # for SAR model
+ config['Loss']['ignore_index'] = char_num - 1
model = build_model(config['Architecture'])
pre_best_model_dict = dict()
@@ -137,7 +175,7 @@ def main(config, device, logger, vdl_writer):
config['Optimizer'],
epochs=config['Global']['epoch_num'],
step_each_epoch=len(train_dataloader),
- parameters=model.parameters())
+ model=model)
# resume PACT training process
if config["Global"]["checkpoints"] is not None:
diff --git a/doc/doc_ch/algorithm_det_db.md b/doc/doc_ch/algorithm_det_db.md
index 7f94ceaee06ac41a42c785f26bffa30005a98355..90837c2ac1ebbc04ee47cbb74ed6466352710e88 100644
--- a/doc/doc_ch/algorithm_det_db.md
+++ b/doc/doc_ch/algorithm_det_db.md
@@ -25,8 +25,8 @@
|模型|骨干网络|配置文件|precision|recall|Hmean|下载链接|
| --- | --- | --- | --- | --- | --- | --- |
-|DB|ResNet50_vd|configs/det/det_r50_vd_db.yml|86.41%|78.72%|82.38%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
-|DB|MobileNetV3|configs/det/det_mv3_db.yml|77.29%|73.08%|75.12%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
+|DB|ResNet50_vd|[configs/det/det_r50_vd_db.yml](../../configs/det/det_r50_vd_db.yml)|86.41%|78.72%|82.38%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
+|DB|MobileNetV3|[configs/det/det_mv3_db.yml](../../configs/det/det_mv3_db.yml)|77.29%|73.08%|75.12%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
diff --git a/doc/doc_ch/dataset/layout_datasets.md b/doc/doc_ch/dataset/layout_datasets.md
index 45ac3a1127014eed420bb816f9ee07963efff533..e7055b4e607aae358a9ec1e93f3640b2b68ea4a1 100644
--- a/doc/doc_ch/dataset/layout_datasets.md
+++ b/doc/doc_ch/dataset/layout_datasets.md
@@ -27,7 +27,7 @@
#### 2、CDLA数据集
- **数据来源**:https://github.com/buptlihang/CDLA
-- **数据简介**:publaynet数据集的训练集合中包含5000张图像,验证集合中包含1000张图像。总共包含10个类别,分别是: `Text, Title, Figure, Figure caption, Table, Table caption, Header, Footer, Reference, Equation`。部分图像以及标注框可视化如下所示。
+- **数据简介**:CDLA据集的训练集合中包含5000张图像,验证集合中包含1000张图像。总共包含10个类别,分别是: `Text, Title, Figure, Figure caption, Table, Table caption, Header, Footer, Reference, Equation`。部分图像以及标注框可视化如下所示。
diff --git a/doc/doc_ch/ppocr_introduction.md b/doc/doc_ch/ppocr_introduction.md
index d9b5a4e0231dcec271c12942dfdb108854b530ae..2e25ebc9501d2e916b86867bf265490aa0971be0 100644
--- a/doc/doc_ch/ppocr_introduction.md
+++ b/doc/doc_ch/ppocr_introduction.md
@@ -17,6 +17,8 @@
PP-OCR是PaddleOCR自研的实用的超轻量OCR系统。在实现[前沿算法](algorithm.md)的基础上,考虑精度与速度的平衡,进行**模型瘦身**和**深度优化**,使其尽可能满足产业落地需求。
+#### PP-OCR
+
PP-OCR是一个两阶段的OCR系统,其中文本检测算法选用[DB](algorithm_det_db.md),文本识别算法选用[CRNN](algorithm_rec_crnn.md),并在检测和识别模块之间添加[文本方向分类器](angle_class.md),以应对不同方向的文本识别。
PP-OCR系统pipeline如下:
@@ -28,9 +30,13 @@ PP-OCR系统pipeline如下:
PP-OCR系统在持续迭代优化,目前已发布PP-OCR和PP-OCRv2两个版本:
-[1] PP-OCR从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身(如绿框所示),最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941
+PP-OCR从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身(如绿框所示),最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941
+
+#### PP-OCRv2
+
+PP-OCRv2在PP-OCR的基础上,进一步在5个方面重点优化,检测模型采用CML协同互学习知识蒸馏策略和CopyPaste数据增广策略;识别模型采用LCNet轻量级骨干网络、UDML 改进知识蒸馏策略和[Enhanced CTC loss](./doc/doc_ch/enhanced_ctc_loss.md)损失函数改进(如上图红框所示),进一步在推理速度和预测效果上取得明显提升。更多细节请参考PP-OCRv2[技术报告](https://arxiv.org/abs/2109.03144)。
-[2] PP-OCRv2在PP-OCR的基础上,进一步在5个方面重点优化,检测模型采用CML协同互学习知识蒸馏策略和CopyPaste数据增广策略;识别模型采用LCNet轻量级骨干网络、UDML 改进知识蒸馏策略和[Enhanced CTC loss](./doc/doc_ch/enhanced_ctc_loss.md)损失函数改进(如上图红框所示),进一步在推理速度和预测效果上取得明显提升。更多细节请参考PP-OCRv2[技术报告](https://arxiv.org/abs/2109.03144)。
+#### PP-OCRv3
diff --git a/doc/doc_en/algorithm_det_db_en.md b/doc/doc_en/algorithm_det_db_en.md
index b387a8ec217b351164d7cac878539bab19157a6e..f5f333a039acded88f0f28d302821c5eb10d7402 100644
--- a/doc/doc_en/algorithm_det_db_en.md
+++ b/doc/doc_en/algorithm_det_db_en.md
@@ -25,8 +25,8 @@ On the ICDAR2015 dataset, the text detection result is as follows:
|Model|Backbone|Configuration|Precision|Recall|Hmean|Download|
| --- | --- | --- | --- | --- | --- | --- |
-|DB|ResNet50_vd|configs/det/det_r50_vd_db.yml|86.41%|78.72%|82.38%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
-|DB|MobileNetV3|configs/det/det_mv3_db.yml|77.29%|73.08%|75.12%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
+|DB|ResNet50_vd|[configs/det/det_r50_vd_db.yml](../../configs/det/det_r50_vd_db.yml)|86.41%|78.72%|82.38%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
+|DB|MobileNetV3|[configs/det/det_mv3_db.yml](../../configs/det/det_mv3_db.yml)|77.29%|73.08%|75.12%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
diff --git a/doc/joinus.PNG b/doc/joinus.PNG
index 133e42b8de6c4ec2c0ae4f85aab0d4a7fb425526..6eacac65d268a17dd717fff1790d5c0c84acc5ea 100644
Binary files a/doc/joinus.PNG and b/doc/joinus.PNG differ
diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py
index 60ab7bd0b4ceab846982c8744d5b277ee17185df..78c3279656e184a3a34bff3847d3936b5e8977b6 100644
--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -72,6 +72,7 @@ def build_dataloader(config, mode, device, logger, seed=None):
use_shared_memory = loader_config['use_shared_memory']
else:
use_shared_memory = True
+
if mode == "Train":
# Distribute data to multiple cards
batch_sampler = DistributedBatchSampler(
diff --git a/ppocr/data/collate_fn.py b/ppocr/data/collate_fn.py
index 89c6b4fd5ae151e1d703ea5c59abf0177dfc3a8b..0da6060f042a0e60cdf211d8bc13aede32d5930a 100644
--- a/ppocr/data/collate_fn.py
+++ b/ppocr/data/collate_fn.py
@@ -56,3 +56,17 @@ class ListCollator(object):
for idx in to_tensor_idxs:
data_dict[idx] = paddle.to_tensor(data_dict[idx])
return list(data_dict.values())
+
+
+class SSLRotateCollate(object):
+ """
+ bach: [
+ [(4*3xH*W), (4,)]
+ [(4*3xH*W), (4,)]
+ ...
+ ]
+ """
+
+ def __call__(self, batch):
+ output = [np.concatenate(d, axis=0) for d in zip(*batch)]
+ return output
diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py
index c24886aa89dbd0957a313dded862de5893fb6817..20aaf48e119d68e6c37ce9246a87701fb149d5e7 100644
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -23,7 +23,8 @@ from .random_crop_data import EastRandomCropData, RandomCropImgMask
from .make_pse_gt import MakePseGt
from .rec_img_aug import RecAug, RecConAug, RecResizeImg, ClsResizeImg, \
- SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg, PRENResizeImg
+ SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg, PRENResizeImg, SVTRRecResizeImg
+from .ssl_img_aug import SSLRotateResize
from .randaugment import RandAugment
from .copy_paste import CopyPaste
from .ColorJitter import ColorJitter
diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py
index 86366d7a4f2e908426e67015b4338e44350da492..c9bc2e7722e8027ce870e4969bfcdab720495c28 100644
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -113,14 +113,14 @@ class BaseRecLabelEncode(object):
dict_character = list(self.character_str)
self.lower = True
else:
- self.character_str = ""
+ self.character_str = []
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
line = line.decode('utf-8').strip("\n").strip("\r\n")
- self.character_str += line
+ self.character_str.append(line)
if use_space_char:
- self.character_str += " "
+ self.character_str.append(" ")
dict_character = list(self.character_str)
dict_character = self.add_special_char(dict_character)
self.dict = {}
diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py
index 960a11be16a9090d80b5c5a27069246d1bcaa3e7..2f70b51a3b88422274353046209c6d0d4dc79489 100644
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -16,6 +16,7 @@ import math
import cv2
import numpy as np
import random
+import copy
from PIL import Image
from .text_image_aug import tia_perspective, tia_stretch, tia_distort
@@ -81,7 +82,7 @@ class ClsResizeImg(object):
def __call__(self, data):
img = data['image']
- norm_img = resize_norm_img(img, self.image_shape)
+ norm_img, _ = resize_norm_img(img, self.image_shape)
data['image'] = norm_img
return data
@@ -206,6 +207,25 @@ class PRENResizeImg(object):
return data
+class SVTRRecResizeImg(object):
+ def __init__(self,
+ image_shape,
+ infer_mode=False,
+ character_dict_path='./ppocr/utils/ppocr_keys_v1.txt',
+ padding=True,
+ **kwargs):
+ self.image_shape = image_shape
+ self.infer_mode = infer_mode
+ self.character_dict_path = character_dict_path
+ self.padding = padding
+
+ def __call__(self, data):
+ img = data['image']
+ norm_img = resize_norm_img_svtr(img, self.image_shape, self.padding)
+ data['image'] = norm_img
+ return data
+
+
def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
imgC, imgH, imgW_min, imgW_max = image_shape
h = img.shape[0]
@@ -324,6 +344,58 @@ def resize_norm_img_srn(img, image_shape):
return np.reshape(img_black, (c, row, col)).astype(np.float32)
+def resize_norm_img_svtr(img, image_shape, padding=False):
+ imgC, imgH, imgW = image_shape
+ h = img.shape[0]
+ w = img.shape[1]
+ if not padding:
+ if h > 2.0 * w:
+ image = Image.fromarray(img)
+ image1 = image.rotate(90, expand=True)
+ image2 = image.rotate(-90, expand=True)
+ img1 = np.array(image1)
+ img2 = np.array(image2)
+ else:
+ img1 = copy.deepcopy(img)
+ img2 = copy.deepcopy(img)
+
+ resized_image = cv2.resize(
+ img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
+ resized_image1 = cv2.resize(
+ img1, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
+ resized_image2 = cv2.resize(
+ img2, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
+ resized_w = imgW
+ else:
+ ratio = w / float(h)
+ if math.ceil(imgH * ratio) > imgW:
+ resized_w = imgW
+ else:
+ resized_w = int(math.ceil(imgH * ratio))
+ resized_image = cv2.resize(img, (resized_w, imgH))
+ resized_image = resized_image.astype('float32')
+ resized_image1 = resized_image1.astype('float32')
+ resized_image2 = resized_image2.astype('float32')
+ if image_shape[0] == 1:
+ resized_image = resized_image / 255
+ resized_image = resized_image[np.newaxis, :]
+ else:
+ resized_image = resized_image.transpose((2, 0, 1)) / 255
+ resized_image1 = resized_image1.transpose((2, 0, 1)) / 255
+ resized_image2 = resized_image2.transpose((2, 0, 1)) / 255
+ resized_image -= 0.5
+ resized_image /= 0.5
+ resized_image1 -= 0.5
+ resized_image1 /= 0.5
+ resized_image2 -= 0.5
+ resized_image2 /= 0.5
+ padding_im = np.zeros((3, imgC, imgH, imgW), dtype=np.float32)
+ padding_im[0, :, :, 0:resized_w] = resized_image
+ padding_im[1, :, :, 0:resized_w] = resized_image1
+ padding_im[2, :, :, 0:resized_w] = resized_image2
+ return padding_im
+
+
def srn_other_inputs(image_shape, num_heads, max_text_length):
imgC, imgH, imgW = image_shape
diff --git a/ppocr/data/imaug/ssl_img_aug.py b/ppocr/data/imaug/ssl_img_aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9ed6ac3e230ae85754bf40189c392c7e6e29b63
--- /dev/null
+++ b/ppocr/data/imaug/ssl_img_aug.py
@@ -0,0 +1,60 @@
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import cv2
+import numpy as np
+import random
+from PIL import Image
+
+from .rec_img_aug import resize_norm_img
+
+
+class SSLRotateResize(object):
+ def __init__(self,
+ image_shape,
+ padding=False,
+ select_all=True,
+ mode="train",
+ **kwargs):
+ self.image_shape = image_shape
+ self.padding = padding
+ self.select_all = select_all
+ self.mode = mode
+
+ def __call__(self, data):
+ img = data["image"]
+
+ data["image_r90"] = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
+ data["image_r180"] = cv2.rotate(data["image_r90"],
+ cv2.ROTATE_90_CLOCKWISE)
+ data["image_r270"] = cv2.rotate(data["image_r180"],
+ cv2.ROTATE_90_CLOCKWISE)
+
+ images = []
+ for key in ["image", "image_r90", "image_r180", "image_r270"]:
+ images.append(
+ resize_norm_img(
+ data.pop(key),
+ image_shape=self.image_shape,
+ padding=self.padding)[0])
+ data["image"] = np.stack(images, axis=0)
+ data["label"] = np.array(list(range(4)))
+ if not self.select_all:
+ data["image"] = data["image"][0::2] # just choose 0 and 180
+ data["label"] = data["label"][0:2] # label needs to be continuous
+ if self.mode == "test":
+ data["image"] = data["image"][0]
+ data["label"] = data["label"][0]
+ return data
diff --git a/ppocr/modeling/backbones/rec_svtrnet.py b/ppocr/modeling/backbones/rec_svtrnet.py
index bef8f3688d8efaa113a81d1acb135273746e6a93..5ded74378c60e6f08a4adf68671afaa1168737b6 100644
--- a/ppocr/modeling/backbones/rec_svtrnet.py
+++ b/ppocr/modeling/backbones/rec_svtrnet.py
@@ -296,47 +296,49 @@ class PatchEmbed(nn.Layer):
if sub_num == 2:
self.proj = nn.Sequential(
ConvBNLayer(
- in_channels,
- embed_dim // 2,
- 3,
- 2,
- 1,
+ in_channels=in_channels,
+ out_channels=embed_dim // 2,
+ kernel_size=3,
+ stride=2,
+ padding=1,
act=nn.GELU,
bias_attr=None),
ConvBNLayer(
- embed_dim // 2,
- embed_dim,
- 3,
- 2,
- 1,
+ in_channels=embed_dim // 2,
+ out_channels=embed_dim,
+ kernel_size=3,
+ stride=2,
+ padding=1,
act=nn.GELU,
bias_attr=None))
if sub_num == 3:
self.proj = nn.Sequential(
ConvBNLayer(
- in_channels,
- embed_dim // 4,
- 3,
- 2,
- 1,
+ in_channels=in_channels,
+ out_channels=embed_dim // 4,
+ kernel_size=3,
+ stride=2,
+ padding=1,
act=nn.GELU,
bias_attr=None),
ConvBNLayer(
- embed_dim // 4,
- embed_dim // 2,
- 3,
- 2,
- 1,
+ in_channels=embed_dim // 4,
+ out_channels=embed_dim // 2,
+ kernel_size=3,
+ stride=2,
+ padding=1,
act=nn.GELU,
bias_attr=None),
ConvBNLayer(
embed_dim // 2,
embed_dim,
- 3,
- 2,
- 1,
+ in_channels=embed_dim // 2,
+ out_channels=embed_dim,
+ kernel_size=3,
+ stride=2,
+ padding=1,
act=nn.GELU,
- bias_attr=None), )
+ bias_attr=None))
def forward(self, x):
B, C, H, W = x.shape
@@ -455,7 +457,7 @@ class SVTRNet(nn.Layer):
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
- act_layer=nn.Swish,
+ act_layer=eval(act),
attn_drop=attn_drop_rate,
drop_path=dpr[0:depth[0]][i],
norm_layer=norm_layer,
diff --git a/ppocr/modeling/transforms/stn.py b/ppocr/modeling/transforms/stn.py
index 6f2bdda050f217d8253740001901fbff4065782a..1b15d5b8a7b7a1b1ab686d20acea750437463939 100644
--- a/ppocr/modeling/transforms/stn.py
+++ b/ppocr/modeling/transforms/stn.py
@@ -128,6 +128,8 @@ class STN_ON(nn.Layer):
self.out_channels = in_channels
def forward(self, image):
+ if len(image.shape)==5:
+ image = image.reshape([0, image.shape[-3], image.shape[-2], image.shape[-1]])
stn_input = paddle.nn.functional.interpolate(
image, self.tps_inputsize, mode="bilinear", align_corners=True)
stn_img_feat, ctrl_points = self.stn_head(stn_input)
diff --git a/ppocr/modeling/transforms/tps_spatial_transformer.py b/ppocr/modeling/transforms/tps_spatial_transformer.py
index 043bb56b8a526c12b2e0799bf41e128c6499c1fc..cb1cb10aaa98dffa2f720dc81afdf82d25e071ca 100644
--- a/ppocr/modeling/transforms/tps_spatial_transformer.py
+++ b/ppocr/modeling/transforms/tps_spatial_transformer.py
@@ -138,9 +138,9 @@ class TPSSpatialTransformer(nn.Layer):
assert source_control_points.shape[2] == 2
batch_size = paddle.shape(source_control_points)[0]
- self.padding_matrix = paddle.expand(
+ padding_matrix = paddle.expand(
self.padding_matrix, shape=[batch_size, 3, 2])
- Y = paddle.concat([source_control_points, self.padding_matrix], 1)
+ Y = paddle.concat([source_control_points, padding_matrix], 1)
mapping_matrix = paddle.matmul(self.inverse_kernel, Y)
source_coordinate = paddle.matmul(self.target_coordinate_repr,
mapping_matrix)
diff --git a/ppocr/optimizer/__init__.py b/ppocr/optimizer/__init__.py
index 4110fb47678583cff826a9bc855b3fb378a533f9..a6bd2ebb4a81427245dc10e446cd2da101d53bd4 100644
--- a/ppocr/optimizer/__init__.py
+++ b/ppocr/optimizer/__init__.py
@@ -30,7 +30,7 @@ def build_lr_scheduler(lr_config, epochs, step_each_epoch):
return lr
-def build_optimizer(config, epochs, step_each_epoch, parameters):
+def build_optimizer(config, epochs, step_each_epoch, model):
from . import regularizer, optimizer
config = copy.deepcopy(config)
# step1 build lr
@@ -43,6 +43,8 @@ def build_optimizer(config, epochs, step_each_epoch, parameters):
if not hasattr(regularizer, reg_name):
reg_name += 'Decay'
reg = getattr(regularizer, reg_name)(**reg_config)()
+ elif 'weight_decay' in config:
+ reg = config.pop('weight_decay')
else:
reg = None
@@ -57,4 +59,4 @@ def build_optimizer(config, epochs, step_each_epoch, parameters):
weight_decay=reg,
grad_clip=grad_clip,
**config)
- return optim(parameters), lr
+ return optim(model), lr
diff --git a/ppocr/optimizer/optimizer.py b/ppocr/optimizer/optimizer.py
index b98081227e180edbf023a8b5b7a0b82bb7c631e5..c450a3a3684eb44cdc758a2b27783b5a81945c38 100644
--- a/ppocr/optimizer/optimizer.py
+++ b/ppocr/optimizer/optimizer.py
@@ -42,13 +42,13 @@ class Momentum(object):
self.weight_decay = weight_decay
self.grad_clip = grad_clip
- def __call__(self, parameters):
+ def __call__(self, model):
opt = optim.Momentum(
learning_rate=self.learning_rate,
momentum=self.momentum,
weight_decay=self.weight_decay,
grad_clip=self.grad_clip,
- parameters=parameters)
+ parameters=model.parameters())
return opt
@@ -75,7 +75,7 @@ class Adam(object):
self.name = name
self.lazy_mode = lazy_mode
- def __call__(self, parameters):
+ def __call__(self, model):
opt = optim.Adam(
learning_rate=self.learning_rate,
beta1=self.beta1,
@@ -85,7 +85,7 @@ class Adam(object):
grad_clip=self.grad_clip,
name=self.name,
lazy_mode=self.lazy_mode,
- parameters=parameters)
+ parameters=model.parameters())
return opt
@@ -117,7 +117,7 @@ class RMSProp(object):
self.weight_decay = weight_decay
self.grad_clip = grad_clip
- def __call__(self, parameters):
+ def __call__(self, model):
opt = optim.RMSProp(
learning_rate=self.learning_rate,
momentum=self.momentum,
@@ -125,7 +125,7 @@ class RMSProp(object):
epsilon=self.epsilon,
weight_decay=self.weight_decay,
grad_clip=self.grad_clip,
- parameters=parameters)
+ parameters=model.parameters())
return opt
@@ -148,7 +148,7 @@ class Adadelta(object):
self.grad_clip = grad_clip
self.name = name
- def __call__(self, parameters):
+ def __call__(self, model):
opt = optim.Adadelta(
learning_rate=self.learning_rate,
epsilon=self.epsilon,
@@ -156,7 +156,7 @@ class Adadelta(object):
weight_decay=self.weight_decay,
grad_clip=self.grad_clip,
name=self.name,
- parameters=parameters)
+ parameters=model.parameters())
return opt
@@ -165,31 +165,55 @@ class AdamW(object):
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
- epsilon=1e-08,
+ epsilon=1e-8,
weight_decay=0.01,
+ multi_precision=False,
grad_clip=None,
+ no_weight_decay_name=None,
+ one_dim_param_no_weight_decay=False,
name=None,
lazy_mode=False,
- **kwargs):
+ **args):
+ super().__init__()
self.learning_rate = learning_rate
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
- self.learning_rate = learning_rate
+ self.grad_clip = grad_clip
self.weight_decay = 0.01 if weight_decay is None else weight_decay
self.grad_clip = grad_clip
self.name = name
self.lazy_mode = lazy_mode
-
- def __call__(self, parameters):
+ self.multi_precision = multi_precision
+ self.no_weight_decay_name_list = no_weight_decay_name.split(
+ ) if no_weight_decay_name else []
+ self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay
+
+ def __call__(self, model):
+ parameters = model.parameters()
+
+ self.no_weight_decay_param_name_list = [
+ p.name for n, p in model.named_parameters() if any(nd in n for nd in self.no_weight_decay_name_list)
+ ]
+
+ if self.one_dim_param_no_weight_decay:
+ self.no_weight_decay_param_name_list += [
+ p.name for n, p in model.named_parameters() if len(p.shape) == 1
+ ]
+
opt = optim.AdamW(
learning_rate=self.learning_rate,
beta1=self.beta1,
beta2=self.beta2,
epsilon=self.epsilon,
+ parameters=parameters,
weight_decay=self.weight_decay,
+ multi_precision=self.multi_precision,
grad_clip=self.grad_clip,
name=self.name,
lazy_mode=self.lazy_mode,
- parameters=parameters)
+ apply_decay_param_fun=self._apply_decay_param_fun)
return opt
+
+ def _apply_decay_param_fun(self, name):
+ return name not in self.no_weight_decay_param_name_list
\ No newline at end of file
diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py
index f50b5f1c5f8e617066bb47636c8f4d2b171b6ecb..390f6f4560f9814a3af757a4fd16c55fe93d01f9 100644
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -27,7 +27,7 @@ from .sast_postprocess import SASTPostProcess
from .fce_postprocess import FCEPostProcess
from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, \
DistillationCTCLabelDecode, TableLabelDecode, NRTRLabelDecode, SARLabelDecode, \
- SEEDLabelDecode, PRENLabelDecode
+ SEEDLabelDecode, PRENLabelDecode, SVTRLabelDecode
from .cls_postprocess import ClsPostProcess
from .pg_postprocess import PGPostProcess
from .vqa_token_ser_layoutlm_postprocess import VQASerTokenLayoutLMPostProcess
@@ -42,7 +42,7 @@ def build_post_process(config, global_config=None):
'DistillationDBPostProcess', 'NRTRLabelDecode', 'SARLabelDecode',
'SEEDLabelDecode', 'VQASerTokenLayoutLMPostProcess',
'VQAReTokenLayoutLMPostProcess', 'PRENLabelDecode',
- 'DistillationSARLabelDecode'
+ 'DistillationSARLabelDecode', 'SVTRLabelDecode'
]
if config['name'] == 'PSEPostProcess':
diff --git a/ppocr/postprocess/cls_postprocess.py b/ppocr/postprocess/cls_postprocess.py
index 77e7f46d6f774ffb81f8e9cbd6b100c780665dca..9a27ba0831358564d99a6ec698a5019eae1c25f7 100644
--- a/ppocr/postprocess/cls_postprocess.py
+++ b/ppocr/postprocess/cls_postprocess.py
@@ -17,17 +17,26 @@ import paddle
class ClsPostProcess(object):
""" Convert between text-label and text-index """
- def __init__(self, label_list, **kwargs):
+ def __init__(self, label_list=None, key=None, **kwargs):
super(ClsPostProcess, self).__init__()
self.label_list = label_list
+ self.key = key
def __call__(self, preds, label=None, *args, **kwargs):
+ if self.key is not None:
+ preds = preds[self.key]
+
+ label_list = self.label_list
+ if label_list is None:
+ label_list = {idx: idx for idx in range(preds.shape[-1])}
+
if isinstance(preds, paddle.Tensor):
preds = preds.numpy()
+
pred_idxs = preds.argmax(axis=1)
- decode_out = [(self.label_list[idx], preds[i, idx])
+ decode_out = [(label_list[idx], preds[i, idx])
for i, idx in enumerate(pred_idxs)]
if label is None:
return decode_out
- label = [(self.label_list[idx], 1.0) for idx in label]
+ label = [(label_list[idx], 1.0) for idx in label]
return decode_out, label
diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py
index bf0fd890bf25949361665d212bf8e1a657054e5b..50f11f899fb4dd49da75199095772a92cc4a8d7b 100644
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -752,3 +752,40 @@ class PRENLabelDecode(BaseRecLabelDecode):
return text
label = self.decode(label)
return text, label
+
+
+class SVTRLabelDecode(BaseRecLabelDecode):
+ """ Convert between text-label and text-index """
+
+ def __init__(self, character_dict_path=None, use_space_char=False,
+ **kwargs):
+ super(SVTRLabelDecode, self).__init__(character_dict_path,
+ use_space_char)
+
+ def __call__(self, preds, label=None, *args, **kwargs):
+ if isinstance(preds, tuple):
+ preds = preds[-1]
+ if isinstance(preds, paddle.Tensor):
+ preds = preds.numpy()
+ preds_idx = preds.argmax(axis=-1)
+ preds_prob = preds.max(axis=-1)
+
+ text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
+ return_text = []
+ for i in range(0, len(text), 3):
+ text0 = text[i]
+ text1 = text[i + 1]
+ text2 = text[i + 2]
+
+ text_pred = [text0[0], text1[0], text2[0]]
+ text_prob = [text0[1], text1[1], text2[1]]
+ id_max = text_prob.index(max(text_prob))
+ return_text.append((text_pred[id_max], text_prob[id_max]))
+ if label is None:
+ return return_text
+ label = self.decode(label)
+ return return_text, label
+
+ def add_special_char(self, dict_character):
+ dict_character = ['blank'] + dict_character
+ return dict_character
\ No newline at end of file
diff --git a/ppocr/utils/dict/ka_dict.txt b/ppocr/utils/dict/ka_dict.txt
index 33d605c4de106c3c4b2504f5b3c42cdadd076dd8..d506b691bd1a6c55299ad89a72cf3a69a2c879a9 100644
--- a/ppocr/utils/dict/ka_dict.txt
+++ b/ppocr/utils/dict/ka_dict.txt
@@ -21,7 +21,7 @@ l
8
.
j
-p
+p
ಗ
ು
ಣ
diff --git a/ppocr/utils/dict/ta_dict.txt b/ppocr/utils/dict/ta_dict.txt
index d1bae501ad2556bb59b16a6c4b27a27091a6cbcf..19d81892c205627f296adbf8b20ea41aba2de5d0 100644
--- a/ppocr/utils/dict/ta_dict.txt
+++ b/ppocr/utils/dict/ta_dict.txt
@@ -22,7 +22,7 @@ l
8
.
j
-p
+p
ப
ூ
த
diff --git a/test_tipc/configs/ch_PP-OCRv2_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..033d40a80a3569f8bfd408cdb6df37e7ba5ecd0c
--- /dev/null
+++ b/test_tipc/configs/ch_PP-OCRv2_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:ch_PPOCRv2_det
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o
+quant_export:null
+fpgm_export:
+distill_export:null
+export1:null
+export2:null
+inference_dir:Student
+infer_model:./inference/ch_PP-OCRv2_det_infer/
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+null:null
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
diff --git a/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d922a4a5dad67da81e3c9cf7bed48a0431a88b84
--- /dev/null
+++ b/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:ch_PPOCRv2_det_PACT
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
+null:null
+##
+trainer:pact_train
+norm_train:null
+pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:null
+quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o
+fpgm_export:
+distill_export:null
+export1:null
+export2:null
+inference_dir:Student
+infer_model:./inference/ch_PP-OCRv2_det_infer/
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+null:null
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
diff --git a/test_tipc/configs/ch_PP-OCRv2_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c438cb8a3b6907c9ca352e90605d8b4f6fb17fd
--- /dev/null
+++ b/test_tipc/configs/ch_PP-OCRv2_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:PPOCRv2_ocr_rec
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=16|whole_train_whole_infer=128
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./inference/rec_inference
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o
+quant_export:
+fpgm_export:
+distill_export:null
+export1:null
+export2:null
+inference_dir:Student
+infer_model:./inference/ch_PP-OCRv2_rec_infer
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_rec.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1|6
+--use_tensorrt:False|True
+--precision:fp32|int8
+--rec_model_dir:
+--image_dir:./inference/rec_inference
+null:null
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,32,320]}]
diff --git a/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e22d8a564b008206611469048b424b528dd379bd
--- /dev/null
+++ b/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:ch_PPOCRv2_rec_PACT
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=16|whole_train_whole_infer=128
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./inference/rec_inference
+null:null
+##
+trainer:pact_train
+norm_train:null
+pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:null
+quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o
+fpgm_export: null
+distill_export:null
+export1:null
+export2:null
+inference_dir:Student
+infer_model:./inference/ch_PP-OCRv2_rec_slim_quant_infer
+infer_export:null
+infer_quant:True
+inference:tools/infer/predict_rec.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1|6
+--use_tensorrt:False|True
+--precision:fp32|int8
+--rec_model_dir:
+--image_dir:./inference/rec_inference
+null:null
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,32,320]}]
diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/det_mv3_db.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/det_mv3_db.yml
deleted file mode 100644
index 5eada6d53dd3364238bdfc6a3c40515ca0726688..0000000000000000000000000000000000000000
--- a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/det_mv3_db.yml
+++ /dev/null
@@ -1,126 +0,0 @@
-Global:
- use_gpu: false
- epoch_num: 5
- log_smooth_window: 20
- print_batch_step: 1
- save_model_dir: ./output/db_mv3/
- save_epoch_step: 1200
- # evaluation is run every 2000 iterations
- eval_batch_step: [0, 400]
- cal_metric_during_train: False
- pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
- checkpoints:
- save_inference_dir:
- use_visualdl: False
- infer_img: doc/imgs_en/img_10.jpg
- save_res_path: ./output/det_db/predicts_db.txt
-
-Architecture:
- model_type: det
- algorithm: DB
- Transform:
- Backbone:
- name: MobileNetV3
- scale: 0.5
- model_name: large
- disable_se: False
- Neck:
- name: DBFPN
- out_channels: 256
- Head:
- name: DBHead
- k: 50
-
-Loss:
- name: DBLoss
- balance_loss: true
- main_loss_type: DiceLoss
- alpha: 5
- beta: 10
- ohem_ratio: 3
-
-Optimizer:
- name: Adam #Momentum
- #momentum: 0.9
- beta1: 0.9
- beta2: 0.999
- lr:
- learning_rate: 0.001
- regularizer:
- name: 'L2'
- factor: 0
-
-PostProcess:
- name: DBPostProcess
- thresh: 0.3
- box_thresh: 0.6
- max_candidates: 1000
- unclip_ratio: 1.5
-
-Metric:
- name: DetMetric
- main_indicator: hmean
-
-Train:
- dataset:
- name: SimpleDataSet
- data_dir: ./train_data/icdar2015/text_localization/
- label_file_list:
- - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
- ratio_list: [1.0]
- transforms:
- - DecodeImage: # load image
- img_mode: BGR
- channel_first: False
- - DetLabelEncode: # Class handling label
- - Resize:
- size: [640, 640]
- - MakeBorderMap:
- shrink_ratio: 0.4
- thresh_min: 0.3
- thresh_max: 0.7
- - MakeShrinkMap:
- shrink_ratio: 0.4
- min_text_size: 8
- - NormalizeImage:
- scale: 1./255.
- mean: [0.485, 0.456, 0.406]
- std: [0.229, 0.224, 0.225]
- order: 'hwc'
- - ToCHWImage:
- - KeepKeys:
- keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
- loader:
- shuffle: False
- drop_last: False
- batch_size_per_card: 1
- num_workers: 0
- use_shared_memory: False
-
-Eval:
- dataset:
- name: SimpleDataSet
- data_dir: ./train_data/icdar2015/text_localization/
- label_file_list:
- - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
- transforms:
- - DecodeImage: # load image
- img_mode: BGR
- channel_first: False
- - DetLabelEncode: # Class handling label
- - DetResizeForTest:
- image_shape: [736, 1280]
- - NormalizeImage:
- scale: 1./255.
- mean: [0.485, 0.456, 0.406]
- std: [0.229, 0.224, 0.225]
- order: 'hwc'
- - ToCHWImage:
- - KeepKeys:
- keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
- loader:
- shuffle: False
- drop_last: False
- batch_size_per_card: 1 # must be 1
- num_workers: 0
- use_shared_memory: False
diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
index ff1c7432df75f78fd6c45d995f50a9642d44637c..593e7ec7ed42af9b65c520852ff6372f89890170 100644
--- a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -1,10 +1,10 @@
===========================train_params===========================
-model_name:ocr_det
+model_name:ch_ppocr_mobile_v2.0_det
python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:amp
-Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300
+Global.epoch_num:lite_train_lite_infer=100|whole_train_whole_infer=300
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
Global.pretrained_model:null
@@ -12,10 +12,10 @@ train_model_name:latest
train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
null:null
##
-trainer:norm_train|pact_train|fpgm_train
-norm_train:tools/train.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
-pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o
-fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
+trainer:norm_train
+norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
+pact_train:null
+fpgm_train:null
distill_train:null
null:null
null:null
@@ -26,10 +26,10 @@ null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
-Global.pretrained_model:
-norm_export:tools/export_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o
-quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o
-fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o
+Global.checkpoints:
+norm_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o
+quant_export:null
+fpgm_export:null
distill_export:null
export1:null
export2:null
@@ -49,3 +49,5 @@ inference:tools/infer/predict_det.py
null:null
--benchmark:True
null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
\ No newline at end of file
diff --git a/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_infer_python.txt
similarity index 97%
rename from test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt
rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_infer_python.txt
index 331d6bdb7103294eb1b33b9978e5f99c2212195b..47ccf2e69e75bc8c215be8d1837e5248d1b4b513 100644
--- a/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_infer_python.txt
@@ -1,5 +1,5 @@
===========================train_params===========================
-model_name:ocr_det
+model_name:ch_ppocr_mobile_v2.0_det_FPGM
python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5a95f026850b750bfadb85e0955f7426e5e73cb6
--- /dev/null
+++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:ch_ppocr_mobile_v2.0_det_FPGM
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
+null:null
+##
+trainer:fpgm_train
+norm_train:null
+pact_train:null
+fpgm_train:deploy/slim/prune/sensitivity_anal.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:null
+quant_export:null
+fpgm_export:deploy/slim/prune/export_prune_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o
+distill_export:null
+export1:null
+export2:null
+inference_dir:null
+train_model:null
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+null:null
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
\ No newline at end of file
diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f9bec12ada6894fcffbe697ae4da2f0df95cc62
--- /dev/null
+++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:ch_ppocr_mobile_v2.0_det_PACT
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=20|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
+null:null
+##
+trainer:pact_train
+norm_train:null
+pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:null
+quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+inference_dir:null
+train_model:./inference/ch_ppocr_mobile_v2.0_det_prune_infer/
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+null:null
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..30fb939bff646adf301191f88a9a499acf9c61de
--- /dev/null
+++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:ch_ppocr_mobile_v2.0_rec
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./inference/rec_inference
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c configs/rec/rec_icdar15_train.yml -o
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c configs/rec/rec_icdar15_train.yml -o
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c configs/rec/rec_icdar15_train.yml -o
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:./inference/ch_ppocr_mobile_v2.0_rec_train/best_accuracy
+infer_export:tools/export_model.py -c configs/rec/rec_icdar15_train.yml -o
+infer_quant:False
+inference:tools/infer/predict_rec.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1|6
+--use_tensorrt:True|False
+--precision:fp32|int8
+--rec_model_dir:
+--image_dir:./inference/rec_inference
+--save_log_path:./test/output/
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,32,100]}]
diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fda9cf4ddec6d3ab64045a4a7fdbb62183212021
--- /dev/null
+++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:ch_ppocr_mobile_v2.0_rec_FPGM
+python:python3.7
+gpu_list:0
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/ic15_data/test/word_1.png
+null:null
+##
+trainer:fpgm_train
+norm_train:null
+pact_train:null
+fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./pretrain_models/ch_ppocr_mobile_v2.0_rec_train/best_accuracy
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:null
+quant_export:null
+fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml -o
+distill_export:null
+export1:null
+export2:null
+inference_dir:null
+train_model:null
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_rec.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|int8
+--rec_model_dir:
+--image_dir:./inference/rec_inference
+null:null
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,32,320]}]
\ No newline at end of file
diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abed3cfba9b3f8c0ed626dbfcbda8621d8787001
--- /dev/null
+++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:ch_ppocr_mobile_v2.0_rec_PACT
+python:python3.7
+gpu_list:0
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128
+Global.checkpoints:null
+train_model_name:latest
+train_infer_img_dir:./train_data/ic15_data/test/word_1.png
+null:null
+##
+trainer:pact_train
+norm_train:null
+pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:null
+quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+inference_dir:null
+infer_model:./inference/ch_ppocr_mobile_v2.0_rec_slim_infer/
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_image_shape="3,32,100"
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1|6
+--use_tensorrt:False|True
+--precision:fp32|int8
+--rec_model_dir:
+--image_dir:./inference/rec_inference
+--save_log_path:./test/output/
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,32,320]}]
diff --git a/test_tipc/configs/ch_ppocr_server_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_server_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e3764e8c6f62c72ffb8ceb268c8ceee660d02de
--- /dev/null
+++ b/test_tipc/configs/ch_ppocr_server_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:ch_ppocr_server_v2.0_det
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_lite_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml -o
+quant_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml -o
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml -o
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:./inference/ch_ppocr_server_v2.0_det_train/best_accuracy
+infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml -o
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+--save_log_path:null
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
\ No newline at end of file
diff --git a/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
new file mode 100644
index 0000000000000000000000000000000000000000..78c15047fb522127075591cc9687392af77a300a
--- /dev/null
+++ b/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
@@ -0,0 +1,53 @@
+===========================train_params===========================
+model_name:ch_ppocr_server_v2.0_rec
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:amp
+Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./inference/rec_inference
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:./inference/ch_ppocr_server_v2.0_rec_train/best_accuracy
+infer_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
+infer_quant:False
+inference:tools/infer/predict_rec.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1|6
+--use_tensorrt:True|False
+--precision:fp32|int8
+--rec_model_dir:
+--image_dir:./inference/rec_inference
+--save_log_path:./test/output/
+--benchmark:True
+null:null
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,32,100]}]
diff --git a/tools/eval.py b/tools/eval.py
index 1038090ab4e8da139ff180feec2495a5f401fc54..7fd4fa7ada7b1550bcca8766f5acb9b4d4ed2049 100755
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -74,9 +74,11 @@ def main():
model = build_model(config['Architecture'])
extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"]
+ extra_input = False
if config['Architecture']['algorithm'] == 'Distillation':
- extra_input = config['Architecture']['Models']['Teacher'][
- 'algorithm'] in extra_input_models
+ for key in config['Architecture']["Models"]:
+ extra_input = extra_input or config['Architecture']['Models'][key][
+ 'algorithm'] in extra_input_models
else:
extra_input = config['Architecture']['algorithm'] in extra_input_models
if "model_type" in config['Architecture'].keys():
diff --git a/tools/export_model.py b/tools/export_model.py
index 96cc05a2449ca005a4ea5767fc64c777a90d6114..1f9f29e396fe4960914ae802769b65d20c103bd3 100755
--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -31,7 +31,7 @@ from ppocr.utils.logging import get_logger
from tools.program import load_config, merge_config, ArgsParser
-def export_single_model(model, arch_config, save_path, logger):
+def export_single_model(model, arch_config, save_path, logger, quanter=None):
if arch_config["algorithm"] == "SRN":
max_text_length = arch_config["Head"]["max_text_length"]
other_shape = [
@@ -61,6 +61,11 @@ def export_single_model(model, arch_config, save_path, logger):
paddle.static.InputSpec(
shape=[None, 3, 48, -1], dtype="float32"),
]
+ else:
+ other_shape = [
+ paddle.static.InputSpec(
+ shape=[None, 3, 64, 256], dtype="float32"),
+ ]
model = to_static(model, input_spec=other_shape)
elif arch_config["algorithm"] == "PREN":
other_shape = [
@@ -90,7 +95,10 @@ def export_single_model(model, arch_config, save_path, logger):
shape=[None] + infer_shape, dtype="float32")
])
- paddle.jit.save(model, save_path)
+ if quanter is None:
+ paddle.jit.save(model, save_path)
+ else:
+ quanter.save_quantized_model(model, save_path)
logger.info("inference model is saved to {}".format(save_path))
return
@@ -120,7 +128,6 @@ def main():
char_num = char_num - 2
out_channels_list['CTCLabelDecode'] = char_num
out_channels_list['SARLabelDecode'] = char_num + 2
- loss_list = config['Loss']['loss_config_list']
config['Architecture']['Models'][key]['Head'][
'out_channels_list'] = out_channels_list
else:
diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py
index d4fbc3888ce4b42138fa3eda7774156e7e751fcd..2abc0220937175f95ee4c1e4b0b949d24d5fa3e8 100755
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -131,6 +131,17 @@ class TextRecognizer(object):
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image
return padding_im
+
+ def resize_norm_img_svtr(self, img, image_shape):
+
+ imgC, imgH, imgW = image_shape
+ resized_image = cv2.resize(
+ img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
+ resized_image = resized_image.astype('float32')
+ resized_image = resized_image.transpose((2, 0, 1)) / 255
+ resized_image -= 0.5
+ resized_image /= 0.5
+ return resized_image
def resize_norm_img_srn(self, img, image_shape):
imgC, imgH, imgW = image_shape
@@ -263,12 +274,8 @@ class TextRecognizer(object):
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no):
- if self.rec_algorithm != "SRN" and self.rec_algorithm != "SAR":
- norm_img = self.resize_norm_img(img_list[indices[ino]],
- max_wh_ratio)
- norm_img = norm_img[np.newaxis, :]
- norm_img_batch.append(norm_img)
- elif self.rec_algorithm == "SAR":
+
+ if self.rec_algorithm == "SAR":
norm_img, _, _, valid_ratio = self.resize_norm_img_sar(
img_list[indices[ino]], self.rec_image_shape)
norm_img = norm_img[np.newaxis, :]
@@ -276,7 +283,7 @@ class TextRecognizer(object):
valid_ratios = []
valid_ratios.append(valid_ratio)
norm_img_batch.append(norm_img)
- else:
+ elif self.rec_algorithm == "SRN":
norm_img = self.process_image_srn(
img_list[indices[ino]], self.rec_image_shape, 8, 25)
encoder_word_pos_list = []
@@ -288,6 +295,16 @@ class TextRecognizer(object):
gsrm_slf_attn_bias1_list.append(norm_img[3])
gsrm_slf_attn_bias2_list.append(norm_img[4])
norm_img_batch.append(norm_img[0])
+ elif self.rec_algorithm == "SVTR":
+ norm_img = self.resize_norm_img_svtr(
+ img_list[indices[ino]], self.rec_image_shape)
+ norm_img = norm_img[np.newaxis, :]
+ norm_img_batch.append(norm_img)
+ else:
+ norm_img = self.resize_norm_img(img_list[indices[ino]],
+ max_wh_ratio)
+ norm_img = norm_img[np.newaxis, :]
+ norm_img_batch.append(norm_img)
norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy()
if self.benchmark:
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index b16aecd496ec291fcbe9c66dccf3ec04bb662034..c92e8e152a9ee4d86d269aec7ff5645f23cad443 100644
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -271,9 +271,10 @@ def create_predictor(args, mode, logger):
elif mode == "rec":
if args.rec_algorithm != "CRNN":
use_dynamic_shape = False
- min_input_shape = {"x": [1, 3, 32, 10]}
- max_input_shape = {"x": [args.rec_batch_num, 3, 32, 1536]}
- opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]}
+ imgH = int(args.rec_image_shape.split(',')[-2])
+ min_input_shape = {"x": [1, 3, imgH, 10]}
+ max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 1536]}
+ opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]}
elif mode == "cls":
min_input_shape = {"x": [1, 3, 48, 10]}
max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]}
@@ -300,8 +301,8 @@ def create_predictor(args, mode, logger):
# enable memory optim
config.enable_memory_optim()
config.disable_glog_info()
-
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
+ config.delete_pass("matmul_transpose_reshape_fuse_pass")
if mode == 'table':
config.delete_pass("fc_fuse_pass") # not supported for table
config.switch_use_feed_fetch_ops(False)
diff --git a/tools/infer_cls.py b/tools/infer_cls.py
index 4be30bbb3c2f8bbf6a59179220faa942e6cc27b8..7fd6b536fbe50fb1240d84ca3a5e87236940c0f5 100755
--- a/tools/infer_cls.py
+++ b/tools/infer_cls.py
@@ -57,6 +57,8 @@ def main():
continue
elif op_name == 'KeepKeys':
op[op_name]['keep_keys'] = ['image']
+ elif op_name == "SSLRotateResize":
+ op[op_name]["mode"] = "test"
transforms.append(op)
global_config['infer_mode'] = True
ops = create_operators(transforms, global_config)
diff --git a/tools/program.py b/tools/program.py
index 1742f6c9557929accd52a1748add68f0e569a6b9..90fd309ae9e1ae23723d8e67c62a905e79a073d3 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -202,9 +202,11 @@ def train(config,
use_srn = config['Architecture']['algorithm'] == "SRN"
extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"]
+ extra_input = False
if config['Architecture']['algorithm'] == 'Distillation':
- extra_input = config['Architecture']['Models']['Teacher'][
- 'algorithm'] in extra_input_models
+ for key in config['Architecture']["Models"]:
+ extra_input = extra_input or config['Architecture']['Models'][key][
+ 'algorithm'] in extra_input_models
else:
extra_input = config['Architecture']['algorithm'] in extra_input_models
try:
diff --git a/tools/train.py b/tools/train.py
index 77e600ab6641f0baade072bb853db0d6d44052a4..42aba548d6bf5fc35f033ef2baca0fb54d79e75a 100755
--- a/tools/train.py
+++ b/tools/train.py
@@ -129,7 +129,7 @@ def main(config, device, logger, vdl_writer):
config['Optimizer'],
epochs=config['Global']['epoch_num'],
step_each_epoch=len(train_dataloader),
- parameters=model.parameters())
+ model=model)
# build metric
eval_class = build_metric(config['Metric'])