diff --git a/configs/cls/ch_PP-OCRv3/ch_PP-OCRv3_rotnet.yml b/configs/cls/ch_PP-OCRv3/ch_PP-OCRv3_rotnet.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ffeba07995860f964e22b8b9d2538320d80f651 --- /dev/null +++ b/configs/cls/ch_PP-OCRv3/ch_PP-OCRv3_rotnet.yml @@ -0,0 +1,99 @@ +Global: + debug: false + use_gpu: true + epoch_num: 100 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_ppocr_v3_rotnet + save_epoch_step: 3 + eval_batch_step: [0, 2000] + cal_metric_during_train: true + pretrained_model: null + checkpoints: null + save_inference_dir: null + use_visualdl: false + infer_img: doc/imgs_words/ch/word_1.jpg + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + max_text_length: 25 + infer_mode: false + use_space_char: true + save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: L2 + factor: 1.0e-05 +Architecture: + model_type: cls + algorithm: CLS + Transform: null + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [1, 2] + last_pool_type: avg + Neck: + Head: + name: ClsHead + class_dim: 4 + +Loss: + name: ClsLoss + main_indicator: acc + +PostProcess: + name: ClsPostProcess + +Metric: + name: ClsMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data + label_file_list: + - ./train_data/train_list.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - RecAug: + use_tia: False + - RandAugment: + - SSLRotateResize: + image_shape: [3, 48, 320] + - KeepKeys: + keep_keys: ["image", "label"] + loader: + collate_fn: "SSLRotateCollate" + shuffle: true + batch_size_per_card: 32 + drop_last: true + num_workers: 8 +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data + label_file_list: + - ./train_data/val_list.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - SSLRotateResize: + image_shape: [3, 48, 320] + - KeepKeys: + keep_keys: ["image", "label"] + loader: + collate_fn: "SSLRotateCollate" + shuffle: false + drop_last: false + batch_size_per_card: 64 + num_workers: 8 +profiler_options: null diff --git a/configs/rec/rec_svtrnet.yml b/configs/rec/rec_svtrnet.yml new file mode 100644 index 0000000000000000000000000000000000000000..233d5e276577cad0144456ef7df1e20de99891f9 --- /dev/null +++ b/configs/rec/rec_svtrnet.yml @@ -0,0 +1,117 @@ +Global: + use_gpu: True + epoch_num: 20 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/svtr/ + save_epoch_step: 1 + # evaluation is run every 2000 iterations after the 0th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: + character_type: en + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_svtr_tiny.txt + + +Optimizer: + name: AdamW + beta1: 0.9 + beta2: 0.99 + epsilon: 0.00000008 + weight_decay: 0.05 + no_weight_decay_name: norm pos_embed + one_dim_param_no_weight_decay: true + lr: + name: Cosine + learning_rate: 0.0005 + warmup_epoch: 2 + +Architecture: + model_type: rec + algorithm: SVTR + Transform: + name: STN_ON + tps_inputsize: [32, 64] + tps_outputsize: [32, 100] + num_control_points: 20 + tps_margins: [0.05,0.05] + stn_activation: none + Backbone: + name: SVTRNet + img_size: [32, 100] + out_char_num: 25 + out_channels: 192 + patch_merging: 'Conv' + embed_dim: [64, 128, 256] + depth: [3, 6, 3] + num_heads: [2, 4, 8] + mixer: ['Local','Local','Local','Local','Local','Local','Global','Global','Global','Global','Global','Global'] + local_mixer: [[7, 11], [7, 11], [7, 11]] + last_stage: True + prenorm: false + Neck: + name: SequenceEncoder + encoder_type: reshape + Head: + name: CTCHead + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: LMDBDataSet + data_dir: ./train_data/data_lmdb_release/training/ + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + character_dict_path: + image_shape: [3, 64, 256] + padding: False + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 512 + drop_last: True + num_workers: 4 + +Eval: + dataset: + name: LMDBDataSet + data_dir: ./train_data/data_lmdb_release/validation/ + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + character_dict_path: + image_shape: [3, 64, 256] + padding: False + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 2 diff --git a/deploy/README.md b/deploy/README.md index 033662a7535e10c2c468436d4f01f06d84996fb7..69e2438996a1329e801f842ef78d2d6e115c5831 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -22,9 +22,11 @@ PP-OCR has supported muti deployment schemes. Click the link to get the specific - [Python Inference](../doc/doc_en/inference_ppocr_en.md) - [C++ Inference](./cpp_infer/readme.md) -- [Serving](./pdserving/README.md) -- [Paddle-Lite](./lite/readme.md) +- [Serving (Python/C++)](./pdserving/README.md) +- [Paddle-Lite (ARM CPU/OpenCL ARM GPU/Metal ARM GPU)](./lite/readme.md) - [Paddle.js](./paddlejs/README.md) +- [Jetson Inference]() +- [XPU Inference]() - [Paddle2ONNX](./paddle2onnx/readme.md) If you need the deployment tutorial of academic algorithm models other than PP-OCR, please directly enter the main page of corresponding algorithms, [entrance](../doc/doc_en/algorithm_overview_en.md)。 \ No newline at end of file diff --git a/deploy/README_ch.md b/deploy/README_ch.md index 96b49ddd9b94bff877ae1ac1d0e6f6e90612ec85..63ae59537316480a302dca7c3714db3c1003553e 100644 --- a/deploy/README_ch.md +++ b/deploy/README_ch.md @@ -22,9 +22,11 @@ PP-OCR模型已打通多种场景部署方案,点击链接获取具体的使 - [Python 推理](../doc/doc_ch/inference_ppocr.md) - [C++ 推理](./cpp_infer/readme_ch.md) -- [Serving 服务化部署](./pdserving/README_CN.md) -- [Paddle-Lite 端侧部署](./lite/readme_ch.md) -- [Paddle.js 服务化部署](./paddlejs/README_ch.md) +- [Serving 服务化部署(Python/C++)](./pdserving/README_CN.md) +- [Paddle-Lite 端侧部署(ARM CPU/OpenCL ARM GPU/Metal ARM GPU)](./lite/readme_ch.md) +- [Paddle.js 部署](./paddlejs/README_ch.md) +- [Jetson 推理]() +- [XPU 推理]() - [Paddle2ONNX 推理](./paddle2onnx/readme_ch.md) 需要PP-OCR以外的学术算法模型的推理部署,请直接进入相应算法主页面,[入口](../doc/doc_ch/algorithm_overview.md)。 \ No newline at end of file diff --git a/deploy/slim/quantization/export_model.py b/deploy/slim/quantization/export_model.py index 822fd5da4c30a934d0e590ab1067f9f9188213c2..90f79dab34a5f20d4556ae4b10ad1d4e1f8b7f0d 100755 --- a/deploy/slim/quantization/export_model.py +++ b/deploy/slim/quantization/export_model.py @@ -35,17 +35,7 @@ from ppocr.metrics import build_metric import tools.program as program from paddleslim.dygraph.quant import QAT from ppocr.data import build_dataloader - - -def export_single_model(quanter, model, infer_shape, save_path, logger): - quanter.save_quantized_model( - model, - save_path, - input_spec=[ - paddle.static.InputSpec( - shape=[None] + infer_shape, dtype='float32') - ]) - logger.info('inference QAT model is saved to {}'.format(save_path)) +from tools.export_model import export_single_model def main(): @@ -84,17 +74,54 @@ def main(): config['Global']) # build model - # for rec algorithm if hasattr(post_process_class, 'character'): char_num = len(getattr(post_process_class, 'character')) if config['Architecture']["algorithm"] in ["Distillation", ]: # distillation model for key in config['Architecture']["Models"]: - config['Architecture']["Models"][key]["Head"][ - 'out_channels'] = char_num + if config['Architecture']['Models'][key]['Head'][ + 'name'] == 'MultiHead': # for multi head + if config['PostProcess'][ + 'name'] == 'DistillationSARLabelDecode': + char_num = char_num - 2 + # update SARLoss params + assert list(config['Loss']['loss_config_list'][-1].keys())[ + 0] == 'DistillationSARLoss' + config['Loss']['loss_config_list'][-1][ + 'DistillationSARLoss']['ignore_index'] = char_num + 1 + out_channels_list = {} + out_channels_list['CTCLabelDecode'] = char_num + out_channels_list['SARLabelDecode'] = char_num + 2 + config['Architecture']['Models'][key]['Head'][ + 'out_channels_list'] = out_channels_list + else: + config['Architecture']["Models"][key]["Head"][ + 'out_channels'] = char_num + elif config['Architecture']['Head'][ + 'name'] == 'MultiHead': # for multi head + if config['PostProcess']['name'] == 'SARLabelDecode': + char_num = char_num - 2 + # update SARLoss params + assert list(config['Loss']['loss_config_list'][1].keys())[ + 0] == 'SARLoss' + if config['Loss']['loss_config_list'][1]['SARLoss'] is None: + config['Loss']['loss_config_list'][1]['SARLoss'] = { + 'ignore_index': char_num + 1 + } + else: + config['Loss']['loss_config_list'][1]['SARLoss'][ + 'ignore_index'] = char_num + 1 + out_channels_list = {} + out_channels_list['CTCLabelDecode'] = char_num + out_channels_list['SARLabelDecode'] = char_num + 2 + config['Architecture']['Head'][ + 'out_channels_list'] = out_channels_list else: # base rec model config['Architecture']["Head"]['out_channels'] = char_num + if config['PostProcess']['name'] == 'SARLabelDecode': # for SAR model + config['Loss']['ignore_index'] = char_num - 1 + model = build_model(config['Architecture']) # get QAT model @@ -120,21 +147,22 @@ def main(): for k, v in metric.items(): logger.info('{}:{}'.format(k, v)) - infer_shape = [3, 32, 100] if model_type == "rec" else [3, 640, 640] - save_path = config["Global"]["save_inference_dir"] arch_config = config["Architecture"] + + arch_config = config["Architecture"] + if arch_config["algorithm"] in ["Distillation", ]: # distillation model + archs = list(arch_config["Models"].values()) for idx, name in enumerate(model.model_name_list): model.model_list[idx].eval() sub_model_save_path = os.path.join(save_path, name, "inference") - export_single_model(quanter, model.model_list[idx], infer_shape, - sub_model_save_path, logger) + export_single_model(model.model_list[idx], archs[idx], + sub_model_save_path, logger, quanter) else: save_path = os.path.join(save_path, "inference") - model.eval() - export_single_model(quanter, model, infer_shape, save_path, logger) + export_single_model(model, arch_config, save_path, logger, quanter) if __name__ == "__main__": diff --git a/deploy/slim/quantization/quant.py b/deploy/slim/quantization/quant.py index 1dffaab0eef35ec41c27c9c6e00f25dda048d490..f7acb185add5d40b749e7442111891869dfaeb22 100755 --- a/deploy/slim/quantization/quant.py +++ b/deploy/slim/quantization/quant.py @@ -112,10 +112,48 @@ def main(config, device, logger, vdl_writer): if config['Architecture']["algorithm"] in ["Distillation", ]: # distillation model for key in config['Architecture']["Models"]: - config['Architecture']["Models"][key]["Head"][ - 'out_channels'] = char_num + if config['Architecture']['Models'][key]['Head'][ + 'name'] == 'MultiHead': # for multi head + if config['PostProcess'][ + 'name'] == 'DistillationSARLabelDecode': + char_num = char_num - 2 + # update SARLoss params + assert list(config['Loss']['loss_config_list'][-1].keys())[ + 0] == 'DistillationSARLoss' + config['Loss']['loss_config_list'][-1][ + 'DistillationSARLoss']['ignore_index'] = char_num + 1 + out_channels_list = {} + out_channels_list['CTCLabelDecode'] = char_num + out_channels_list['SARLabelDecode'] = char_num + 2 + config['Architecture']['Models'][key]['Head'][ + 'out_channels_list'] = out_channels_list + else: + config['Architecture']["Models"][key]["Head"][ + 'out_channels'] = char_num + elif config['Architecture']['Head'][ + 'name'] == 'MultiHead': # for multi head + if config['PostProcess']['name'] == 'SARLabelDecode': + char_num = char_num - 2 + # update SARLoss params + assert list(config['Loss']['loss_config_list'][1].keys())[ + 0] == 'SARLoss' + if config['Loss']['loss_config_list'][1]['SARLoss'] is None: + config['Loss']['loss_config_list'][1]['SARLoss'] = { + 'ignore_index': char_num + 1 + } + else: + config['Loss']['loss_config_list'][1]['SARLoss'][ + 'ignore_index'] = char_num + 1 + out_channels_list = {} + out_channels_list['CTCLabelDecode'] = char_num + out_channels_list['SARLabelDecode'] = char_num + 2 + config['Architecture']['Head'][ + 'out_channels_list'] = out_channels_list else: # base rec model config['Architecture']["Head"]['out_channels'] = char_num + + if config['PostProcess']['name'] == 'SARLabelDecode': # for SAR model + config['Loss']['ignore_index'] = char_num - 1 model = build_model(config['Architecture']) pre_best_model_dict = dict() @@ -137,7 +175,7 @@ def main(config, device, logger, vdl_writer): config['Optimizer'], epochs=config['Global']['epoch_num'], step_each_epoch=len(train_dataloader), - parameters=model.parameters()) + model=model) # resume PACT training process if config["Global"]["checkpoints"] is not None: diff --git a/doc/doc_ch/algorithm_det_db.md b/doc/doc_ch/algorithm_det_db.md index 7f94ceaee06ac41a42c785f26bffa30005a98355..90837c2ac1ebbc04ee47cbb74ed6466352710e88 100644 --- a/doc/doc_ch/algorithm_det_db.md +++ b/doc/doc_ch/algorithm_det_db.md @@ -25,8 +25,8 @@ |模型|骨干网络|配置文件|precision|recall|Hmean|下载链接| | --- | --- | --- | --- | --- | --- | --- | -|DB|ResNet50_vd|configs/det/det_r50_vd_db.yml|86.41%|78.72%|82.38%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| -|DB|MobileNetV3|configs/det/det_mv3_db.yml|77.29%|73.08%|75.12%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| +|DB|ResNet50_vd|[configs/det/det_r50_vd_db.yml](../../configs/det/det_r50_vd_db.yml)|86.41%|78.72%|82.38%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| +|DB|MobileNetV3|[configs/det/det_mv3_db.yml](../../configs/det/det_mv3_db.yml)|77.29%|73.08%|75.12%|[训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| diff --git a/doc/doc_ch/dataset/layout_datasets.md b/doc/doc_ch/dataset/layout_datasets.md index 45ac3a1127014eed420bb816f9ee07963efff533..e7055b4e607aae358a9ec1e93f3640b2b68ea4a1 100644 --- a/doc/doc_ch/dataset/layout_datasets.md +++ b/doc/doc_ch/dataset/layout_datasets.md @@ -27,7 +27,7 @@ #### 2、CDLA数据集 - **数据来源**:https://github.com/buptlihang/CDLA -- **数据简介**:publaynet数据集的训练集合中包含5000张图像,验证集合中包含1000张图像。总共包含10个类别,分别是: `Text, Title, Figure, Figure caption, Table, Table caption, Header, Footer, Reference, Equation`。部分图像以及标注框可视化如下所示。 +- **数据简介**:CDLA据集的训练集合中包含5000张图像,验证集合中包含1000张图像。总共包含10个类别,分别是: `Text, Title, Figure, Figure caption, Table, Table caption, Header, Footer, Reference, Equation`。部分图像以及标注框可视化如下所示。
diff --git a/doc/doc_ch/ppocr_introduction.md b/doc/doc_ch/ppocr_introduction.md index d9b5a4e0231dcec271c12942dfdb108854b530ae..2e25ebc9501d2e916b86867bf265490aa0971be0 100644 --- a/doc/doc_ch/ppocr_introduction.md +++ b/doc/doc_ch/ppocr_introduction.md @@ -17,6 +17,8 @@ PP-OCR是PaddleOCR自研的实用的超轻量OCR系统。在实现[前沿算法](algorithm.md)的基础上,考虑精度与速度的平衡,进行**模型瘦身**和**深度优化**,使其尽可能满足产业落地需求。 +#### PP-OCR + PP-OCR是一个两阶段的OCR系统,其中文本检测算法选用[DB](algorithm_det_db.md),文本识别算法选用[CRNN](algorithm_rec_crnn.md),并在检测和识别模块之间添加[文本方向分类器](angle_class.md),以应对不同方向的文本识别。 PP-OCR系统pipeline如下: @@ -28,9 +30,13 @@ PP-OCR系统pipeline如下: PP-OCR系统在持续迭代优化,目前已发布PP-OCR和PP-OCRv2两个版本: -[1] PP-OCR从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身(如绿框所示),最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941 +PP-OCR从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面,采用19个有效策略,对各个模块的模型进行效果调优和瘦身(如绿框所示),最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941 + +#### PP-OCRv2 + +PP-OCRv2在PP-OCR的基础上,进一步在5个方面重点优化,检测模型采用CML协同互学习知识蒸馏策略和CopyPaste数据增广策略;识别模型采用LCNet轻量级骨干网络、UDML 改进知识蒸馏策略和[Enhanced CTC loss](./doc/doc_ch/enhanced_ctc_loss.md)损失函数改进(如上图红框所示),进一步在推理速度和预测效果上取得明显提升。更多细节请参考PP-OCRv2[技术报告](https://arxiv.org/abs/2109.03144)。 -[2] PP-OCRv2在PP-OCR的基础上,进一步在5个方面重点优化,检测模型采用CML协同互学习知识蒸馏策略和CopyPaste数据增广策略;识别模型采用LCNet轻量级骨干网络、UDML 改进知识蒸馏策略和[Enhanced CTC loss](./doc/doc_ch/enhanced_ctc_loss.md)损失函数改进(如上图红框所示),进一步在推理速度和预测效果上取得明显提升。更多细节请参考PP-OCRv2[技术报告](https://arxiv.org/abs/2109.03144)。 +#### PP-OCRv3 diff --git a/doc/doc_en/algorithm_det_db_en.md b/doc/doc_en/algorithm_det_db_en.md index b387a8ec217b351164d7cac878539bab19157a6e..f5f333a039acded88f0f28d302821c5eb10d7402 100644 --- a/doc/doc_en/algorithm_det_db_en.md +++ b/doc/doc_en/algorithm_det_db_en.md @@ -25,8 +25,8 @@ On the ICDAR2015 dataset, the text detection result is as follows: |Model|Backbone|Configuration|Precision|Recall|Hmean|Download| | --- | --- | --- | --- | --- | --- | --- | -|DB|ResNet50_vd|configs/det/det_r50_vd_db.yml|86.41%|78.72%|82.38%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| -|DB|MobileNetV3|configs/det/det_mv3_db.yml|77.29%|73.08%|75.12%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| +|DB|ResNet50_vd|[configs/det/det_r50_vd_db.yml](../../configs/det/det_r50_vd_db.yml)|86.41%|78.72%|82.38%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| +|DB|MobileNetV3|[configs/det/det_mv3_db.yml](../../configs/det/det_mv3_db.yml)|77.29%|73.08%|75.12%|[trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| diff --git a/doc/joinus.PNG b/doc/joinus.PNG index 133e42b8de6c4ec2c0ae4f85aab0d4a7fb425526..6eacac65d268a17dd717fff1790d5c0c84acc5ea 100644 Binary files a/doc/joinus.PNG and b/doc/joinus.PNG differ diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py index 60ab7bd0b4ceab846982c8744d5b277ee17185df..78c3279656e184a3a34bff3847d3936b5e8977b6 100644 --- a/ppocr/data/__init__.py +++ b/ppocr/data/__init__.py @@ -72,6 +72,7 @@ def build_dataloader(config, mode, device, logger, seed=None): use_shared_memory = loader_config['use_shared_memory'] else: use_shared_memory = True + if mode == "Train": # Distribute data to multiple cards batch_sampler = DistributedBatchSampler( diff --git a/ppocr/data/collate_fn.py b/ppocr/data/collate_fn.py index 89c6b4fd5ae151e1d703ea5c59abf0177dfc3a8b..0da6060f042a0e60cdf211d8bc13aede32d5930a 100644 --- a/ppocr/data/collate_fn.py +++ b/ppocr/data/collate_fn.py @@ -56,3 +56,17 @@ class ListCollator(object): for idx in to_tensor_idxs: data_dict[idx] = paddle.to_tensor(data_dict[idx]) return list(data_dict.values()) + + +class SSLRotateCollate(object): + """ + bach: [ + [(4*3xH*W), (4,)] + [(4*3xH*W), (4,)] + ... + ] + """ + + def __call__(self, batch): + output = [np.concatenate(d, axis=0) for d in zip(*batch)] + return output diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py index c24886aa89dbd0957a313dded862de5893fb6817..20aaf48e119d68e6c37ce9246a87701fb149d5e7 100644 --- a/ppocr/data/imaug/__init__.py +++ b/ppocr/data/imaug/__init__.py @@ -23,7 +23,8 @@ from .random_crop_data import EastRandomCropData, RandomCropImgMask from .make_pse_gt import MakePseGt from .rec_img_aug import RecAug, RecConAug, RecResizeImg, ClsResizeImg, \ - SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg, PRENResizeImg + SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg, PRENResizeImg, SVTRRecResizeImg +from .ssl_img_aug import SSLRotateResize from .randaugment import RandAugment from .copy_paste import CopyPaste from .ColorJitter import ColorJitter diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index 86366d7a4f2e908426e67015b4338e44350da492..c9bc2e7722e8027ce870e4969bfcdab720495c28 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -113,14 +113,14 @@ class BaseRecLabelEncode(object): dict_character = list(self.character_str) self.lower = True else: - self.character_str = "" + self.character_str = [] with open(character_dict_path, "rb") as fin: lines = fin.readlines() for line in lines: line = line.decode('utf-8').strip("\n").strip("\r\n") - self.character_str += line + self.character_str.append(line) if use_space_char: - self.character_str += " " + self.character_str.append(" ") dict_character = list(self.character_str) dict_character = self.add_special_char(dict_character) self.dict = {} diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py index 960a11be16a9090d80b5c5a27069246d1bcaa3e7..2f70b51a3b88422274353046209c6d0d4dc79489 100644 --- a/ppocr/data/imaug/rec_img_aug.py +++ b/ppocr/data/imaug/rec_img_aug.py @@ -16,6 +16,7 @@ import math import cv2 import numpy as np import random +import copy from PIL import Image from .text_image_aug import tia_perspective, tia_stretch, tia_distort @@ -81,7 +82,7 @@ class ClsResizeImg(object): def __call__(self, data): img = data['image'] - norm_img = resize_norm_img(img, self.image_shape) + norm_img, _ = resize_norm_img(img, self.image_shape) data['image'] = norm_img return data @@ -206,6 +207,25 @@ class PRENResizeImg(object): return data +class SVTRRecResizeImg(object): + def __init__(self, + image_shape, + infer_mode=False, + character_dict_path='./ppocr/utils/ppocr_keys_v1.txt', + padding=True, + **kwargs): + self.image_shape = image_shape + self.infer_mode = infer_mode + self.character_dict_path = character_dict_path + self.padding = padding + + def __call__(self, data): + img = data['image'] + norm_img = resize_norm_img_svtr(img, self.image_shape, self.padding) + data['image'] = norm_img + return data + + def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25): imgC, imgH, imgW_min, imgW_max = image_shape h = img.shape[0] @@ -324,6 +344,58 @@ def resize_norm_img_srn(img, image_shape): return np.reshape(img_black, (c, row, col)).astype(np.float32) +def resize_norm_img_svtr(img, image_shape, padding=False): + imgC, imgH, imgW = image_shape + h = img.shape[0] + w = img.shape[1] + if not padding: + if h > 2.0 * w: + image = Image.fromarray(img) + image1 = image.rotate(90, expand=True) + image2 = image.rotate(-90, expand=True) + img1 = np.array(image1) + img2 = np.array(image2) + else: + img1 = copy.deepcopy(img) + img2 = copy.deepcopy(img) + + resized_image = cv2.resize( + img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) + resized_image1 = cv2.resize( + img1, (imgW, imgH), interpolation=cv2.INTER_LINEAR) + resized_image2 = cv2.resize( + img2, (imgW, imgH), interpolation=cv2.INTER_LINEAR) + resized_w = imgW + else: + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + resized_image1 = resized_image1.astype('float32') + resized_image2 = resized_image2.astype('float32') + if image_shape[0] == 1: + resized_image = resized_image / 255 + resized_image = resized_image[np.newaxis, :] + else: + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image1 = resized_image1.transpose((2, 0, 1)) / 255 + resized_image2 = resized_image2.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + resized_image1 -= 0.5 + resized_image1 /= 0.5 + resized_image2 -= 0.5 + resized_image2 /= 0.5 + padding_im = np.zeros((3, imgC, imgH, imgW), dtype=np.float32) + padding_im[0, :, :, 0:resized_w] = resized_image + padding_im[1, :, :, 0:resized_w] = resized_image1 + padding_im[2, :, :, 0:resized_w] = resized_image2 + return padding_im + + def srn_other_inputs(image_shape, num_heads, max_text_length): imgC, imgH, imgW = image_shape diff --git a/ppocr/data/imaug/ssl_img_aug.py b/ppocr/data/imaug/ssl_img_aug.py new file mode 100644 index 0000000000000000000000000000000000000000..f9ed6ac3e230ae85754bf40189c392c7e6e29b63 --- /dev/null +++ b/ppocr/data/imaug/ssl_img_aug.py @@ -0,0 +1,60 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import cv2 +import numpy as np +import random +from PIL import Image + +from .rec_img_aug import resize_norm_img + + +class SSLRotateResize(object): + def __init__(self, + image_shape, + padding=False, + select_all=True, + mode="train", + **kwargs): + self.image_shape = image_shape + self.padding = padding + self.select_all = select_all + self.mode = mode + + def __call__(self, data): + img = data["image"] + + data["image_r90"] = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) + data["image_r180"] = cv2.rotate(data["image_r90"], + cv2.ROTATE_90_CLOCKWISE) + data["image_r270"] = cv2.rotate(data["image_r180"], + cv2.ROTATE_90_CLOCKWISE) + + images = [] + for key in ["image", "image_r90", "image_r180", "image_r270"]: + images.append( + resize_norm_img( + data.pop(key), + image_shape=self.image_shape, + padding=self.padding)[0]) + data["image"] = np.stack(images, axis=0) + data["label"] = np.array(list(range(4))) + if not self.select_all: + data["image"] = data["image"][0::2] # just choose 0 and 180 + data["label"] = data["label"][0:2] # label needs to be continuous + if self.mode == "test": + data["image"] = data["image"][0] + data["label"] = data["label"][0] + return data diff --git a/ppocr/modeling/backbones/rec_svtrnet.py b/ppocr/modeling/backbones/rec_svtrnet.py index bef8f3688d8efaa113a81d1acb135273746e6a93..5ded74378c60e6f08a4adf68671afaa1168737b6 100644 --- a/ppocr/modeling/backbones/rec_svtrnet.py +++ b/ppocr/modeling/backbones/rec_svtrnet.py @@ -296,47 +296,49 @@ class PatchEmbed(nn.Layer): if sub_num == 2: self.proj = nn.Sequential( ConvBNLayer( - in_channels, - embed_dim // 2, - 3, - 2, - 1, + in_channels=in_channels, + out_channels=embed_dim // 2, + kernel_size=3, + stride=2, + padding=1, act=nn.GELU, bias_attr=None), ConvBNLayer( - embed_dim // 2, - embed_dim, - 3, - 2, - 1, + in_channels=embed_dim // 2, + out_channels=embed_dim, + kernel_size=3, + stride=2, + padding=1, act=nn.GELU, bias_attr=None)) if sub_num == 3: self.proj = nn.Sequential( ConvBNLayer( - in_channels, - embed_dim // 4, - 3, - 2, - 1, + in_channels=in_channels, + out_channels=embed_dim // 4, + kernel_size=3, + stride=2, + padding=1, act=nn.GELU, bias_attr=None), ConvBNLayer( - embed_dim // 4, - embed_dim // 2, - 3, - 2, - 1, + in_channels=embed_dim // 4, + out_channels=embed_dim // 2, + kernel_size=3, + stride=2, + padding=1, act=nn.GELU, bias_attr=None), ConvBNLayer( embed_dim // 2, embed_dim, - 3, - 2, - 1, + in_channels=embed_dim // 2, + out_channels=embed_dim, + kernel_size=3, + stride=2, + padding=1, act=nn.GELU, - bias_attr=None), ) + bias_attr=None)) def forward(self, x): B, C, H, W = x.shape @@ -455,7 +457,7 @@ class SVTRNet(nn.Layer): qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, - act_layer=nn.Swish, + act_layer=eval(act), attn_drop=attn_drop_rate, drop_path=dpr[0:depth[0]][i], norm_layer=norm_layer, diff --git a/ppocr/modeling/transforms/stn.py b/ppocr/modeling/transforms/stn.py index 6f2bdda050f217d8253740001901fbff4065782a..1b15d5b8a7b7a1b1ab686d20acea750437463939 100644 --- a/ppocr/modeling/transforms/stn.py +++ b/ppocr/modeling/transforms/stn.py @@ -128,6 +128,8 @@ class STN_ON(nn.Layer): self.out_channels = in_channels def forward(self, image): + if len(image.shape)==5: + image = image.reshape([0, image.shape[-3], image.shape[-2], image.shape[-1]]) stn_input = paddle.nn.functional.interpolate( image, self.tps_inputsize, mode="bilinear", align_corners=True) stn_img_feat, ctrl_points = self.stn_head(stn_input) diff --git a/ppocr/modeling/transforms/tps_spatial_transformer.py b/ppocr/modeling/transforms/tps_spatial_transformer.py index 043bb56b8a526c12b2e0799bf41e128c6499c1fc..cb1cb10aaa98dffa2f720dc81afdf82d25e071ca 100644 --- a/ppocr/modeling/transforms/tps_spatial_transformer.py +++ b/ppocr/modeling/transforms/tps_spatial_transformer.py @@ -138,9 +138,9 @@ class TPSSpatialTransformer(nn.Layer): assert source_control_points.shape[2] == 2 batch_size = paddle.shape(source_control_points)[0] - self.padding_matrix = paddle.expand( + padding_matrix = paddle.expand( self.padding_matrix, shape=[batch_size, 3, 2]) - Y = paddle.concat([source_control_points, self.padding_matrix], 1) + Y = paddle.concat([source_control_points, padding_matrix], 1) mapping_matrix = paddle.matmul(self.inverse_kernel, Y) source_coordinate = paddle.matmul(self.target_coordinate_repr, mapping_matrix) diff --git a/ppocr/optimizer/__init__.py b/ppocr/optimizer/__init__.py index 4110fb47678583cff826a9bc855b3fb378a533f9..a6bd2ebb4a81427245dc10e446cd2da101d53bd4 100644 --- a/ppocr/optimizer/__init__.py +++ b/ppocr/optimizer/__init__.py @@ -30,7 +30,7 @@ def build_lr_scheduler(lr_config, epochs, step_each_epoch): return lr -def build_optimizer(config, epochs, step_each_epoch, parameters): +def build_optimizer(config, epochs, step_each_epoch, model): from . import regularizer, optimizer config = copy.deepcopy(config) # step1 build lr @@ -43,6 +43,8 @@ def build_optimizer(config, epochs, step_each_epoch, parameters): if not hasattr(regularizer, reg_name): reg_name += 'Decay' reg = getattr(regularizer, reg_name)(**reg_config)() + elif 'weight_decay' in config: + reg = config.pop('weight_decay') else: reg = None @@ -57,4 +59,4 @@ def build_optimizer(config, epochs, step_each_epoch, parameters): weight_decay=reg, grad_clip=grad_clip, **config) - return optim(parameters), lr + return optim(model), lr diff --git a/ppocr/optimizer/optimizer.py b/ppocr/optimizer/optimizer.py index b98081227e180edbf023a8b5b7a0b82bb7c631e5..c450a3a3684eb44cdc758a2b27783b5a81945c38 100644 --- a/ppocr/optimizer/optimizer.py +++ b/ppocr/optimizer/optimizer.py @@ -42,13 +42,13 @@ class Momentum(object): self.weight_decay = weight_decay self.grad_clip = grad_clip - def __call__(self, parameters): + def __call__(self, model): opt = optim.Momentum( learning_rate=self.learning_rate, momentum=self.momentum, weight_decay=self.weight_decay, grad_clip=self.grad_clip, - parameters=parameters) + parameters=model.parameters()) return opt @@ -75,7 +75,7 @@ class Adam(object): self.name = name self.lazy_mode = lazy_mode - def __call__(self, parameters): + def __call__(self, model): opt = optim.Adam( learning_rate=self.learning_rate, beta1=self.beta1, @@ -85,7 +85,7 @@ class Adam(object): grad_clip=self.grad_clip, name=self.name, lazy_mode=self.lazy_mode, - parameters=parameters) + parameters=model.parameters()) return opt @@ -117,7 +117,7 @@ class RMSProp(object): self.weight_decay = weight_decay self.grad_clip = grad_clip - def __call__(self, parameters): + def __call__(self, model): opt = optim.RMSProp( learning_rate=self.learning_rate, momentum=self.momentum, @@ -125,7 +125,7 @@ class RMSProp(object): epsilon=self.epsilon, weight_decay=self.weight_decay, grad_clip=self.grad_clip, - parameters=parameters) + parameters=model.parameters()) return opt @@ -148,7 +148,7 @@ class Adadelta(object): self.grad_clip = grad_clip self.name = name - def __call__(self, parameters): + def __call__(self, model): opt = optim.Adadelta( learning_rate=self.learning_rate, epsilon=self.epsilon, @@ -156,7 +156,7 @@ class Adadelta(object): weight_decay=self.weight_decay, grad_clip=self.grad_clip, name=self.name, - parameters=parameters) + parameters=model.parameters()) return opt @@ -165,31 +165,55 @@ class AdamW(object): learning_rate=0.001, beta1=0.9, beta2=0.999, - epsilon=1e-08, + epsilon=1e-8, weight_decay=0.01, + multi_precision=False, grad_clip=None, + no_weight_decay_name=None, + one_dim_param_no_weight_decay=False, name=None, lazy_mode=False, - **kwargs): + **args): + super().__init__() self.learning_rate = learning_rate self.beta1 = beta1 self.beta2 = beta2 self.epsilon = epsilon - self.learning_rate = learning_rate + self.grad_clip = grad_clip self.weight_decay = 0.01 if weight_decay is None else weight_decay self.grad_clip = grad_clip self.name = name self.lazy_mode = lazy_mode - - def __call__(self, parameters): + self.multi_precision = multi_precision + self.no_weight_decay_name_list = no_weight_decay_name.split( + ) if no_weight_decay_name else [] + self.one_dim_param_no_weight_decay = one_dim_param_no_weight_decay + + def __call__(self, model): + parameters = model.parameters() + + self.no_weight_decay_param_name_list = [ + p.name for n, p in model.named_parameters() if any(nd in n for nd in self.no_weight_decay_name_list) + ] + + if self.one_dim_param_no_weight_decay: + self.no_weight_decay_param_name_list += [ + p.name for n, p in model.named_parameters() if len(p.shape) == 1 + ] + opt = optim.AdamW( learning_rate=self.learning_rate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon, + parameters=parameters, weight_decay=self.weight_decay, + multi_precision=self.multi_precision, grad_clip=self.grad_clip, name=self.name, lazy_mode=self.lazy_mode, - parameters=parameters) + apply_decay_param_fun=self._apply_decay_param_fun) return opt + + def _apply_decay_param_fun(self, name): + return name not in self.no_weight_decay_param_name_list \ No newline at end of file diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py index f50b5f1c5f8e617066bb47636c8f4d2b171b6ecb..390f6f4560f9814a3af757a4fd16c55fe93d01f9 100644 --- a/ppocr/postprocess/__init__.py +++ b/ppocr/postprocess/__init__.py @@ -27,7 +27,7 @@ from .sast_postprocess import SASTPostProcess from .fce_postprocess import FCEPostProcess from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, \ DistillationCTCLabelDecode, TableLabelDecode, NRTRLabelDecode, SARLabelDecode, \ - SEEDLabelDecode, PRENLabelDecode + SEEDLabelDecode, PRENLabelDecode, SVTRLabelDecode from .cls_postprocess import ClsPostProcess from .pg_postprocess import PGPostProcess from .vqa_token_ser_layoutlm_postprocess import VQASerTokenLayoutLMPostProcess @@ -42,7 +42,7 @@ def build_post_process(config, global_config=None): 'DistillationDBPostProcess', 'NRTRLabelDecode', 'SARLabelDecode', 'SEEDLabelDecode', 'VQASerTokenLayoutLMPostProcess', 'VQAReTokenLayoutLMPostProcess', 'PRENLabelDecode', - 'DistillationSARLabelDecode' + 'DistillationSARLabelDecode', 'SVTRLabelDecode' ] if config['name'] == 'PSEPostProcess': diff --git a/ppocr/postprocess/cls_postprocess.py b/ppocr/postprocess/cls_postprocess.py index 77e7f46d6f774ffb81f8e9cbd6b100c780665dca..9a27ba0831358564d99a6ec698a5019eae1c25f7 100644 --- a/ppocr/postprocess/cls_postprocess.py +++ b/ppocr/postprocess/cls_postprocess.py @@ -17,17 +17,26 @@ import paddle class ClsPostProcess(object): """ Convert between text-label and text-index """ - def __init__(self, label_list, **kwargs): + def __init__(self, label_list=None, key=None, **kwargs): super(ClsPostProcess, self).__init__() self.label_list = label_list + self.key = key def __call__(self, preds, label=None, *args, **kwargs): + if self.key is not None: + preds = preds[self.key] + + label_list = self.label_list + if label_list is None: + label_list = {idx: idx for idx in range(preds.shape[-1])} + if isinstance(preds, paddle.Tensor): preds = preds.numpy() + pred_idxs = preds.argmax(axis=1) - decode_out = [(self.label_list[idx], preds[i, idx]) + decode_out = [(label_list[idx], preds[i, idx]) for i, idx in enumerate(pred_idxs)] if label is None: return decode_out - label = [(self.label_list[idx], 1.0) for idx in label] + label = [(label_list[idx], 1.0) for idx in label] return decode_out, label diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index bf0fd890bf25949361665d212bf8e1a657054e5b..50f11f899fb4dd49da75199095772a92cc4a8d7b 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -752,3 +752,40 @@ class PRENLabelDecode(BaseRecLabelDecode): return text label = self.decode(label) return text, label + + +class SVTRLabelDecode(BaseRecLabelDecode): + """ Convert between text-label and text-index """ + + def __init__(self, character_dict_path=None, use_space_char=False, + **kwargs): + super(SVTRLabelDecode, self).__init__(character_dict_path, + use_space_char) + + def __call__(self, preds, label=None, *args, **kwargs): + if isinstance(preds, tuple): + preds = preds[-1] + if isinstance(preds, paddle.Tensor): + preds = preds.numpy() + preds_idx = preds.argmax(axis=-1) + preds_prob = preds.max(axis=-1) + + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) + return_text = [] + for i in range(0, len(text), 3): + text0 = text[i] + text1 = text[i + 1] + text2 = text[i + 2] + + text_pred = [text0[0], text1[0], text2[0]] + text_prob = [text0[1], text1[1], text2[1]] + id_max = text_prob.index(max(text_prob)) + return_text.append((text_pred[id_max], text_prob[id_max])) + if label is None: + return return_text + label = self.decode(label) + return return_text, label + + def add_special_char(self, dict_character): + dict_character = ['blank'] + dict_character + return dict_character \ No newline at end of file diff --git a/ppocr/utils/dict/ka_dict.txt b/ppocr/utils/dict/ka_dict.txt index 33d605c4de106c3c4b2504f5b3c42cdadd076dd8..d506b691bd1a6c55299ad89a72cf3a69a2c879a9 100644 --- a/ppocr/utils/dict/ka_dict.txt +++ b/ppocr/utils/dict/ka_dict.txt @@ -21,7 +21,7 @@ l 8 . j -p +p ಗ ು ಣ diff --git a/ppocr/utils/dict/ta_dict.txt b/ppocr/utils/dict/ta_dict.txt index d1bae501ad2556bb59b16a6c4b27a27091a6cbcf..19d81892c205627f296adbf8b20ea41aba2de5d0 100644 --- a/ppocr/utils/dict/ta_dict.txt +++ b/ppocr/utils/dict/ta_dict.txt @@ -22,7 +22,7 @@ l 8 . j -p +p ப ூ த diff --git a/test_tipc/configs/ch_PP-OCRv2_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..033d40a80a3569f8bfd408cdb6df37e7ba5ecd0c --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:ch_PPOCRv2_det +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:tools/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o +quant_export:null +fpgm_export: +distill_export:null +export1:null +export2:null +inference_dir:Student +infer_model:./inference/ch_PP-OCRv2_det_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}] diff --git a/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..d922a4a5dad67da81e3c9cf7bed48a0431a88b84 --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:ch_PPOCRv2_det_PACT +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:pact_train +norm_train:null +pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:null +quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o +fpgm_export: +distill_export:null +export1:null +export2:null +inference_dir:Student +infer_model:./inference/ch_PP-OCRv2_det_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}] diff --git a/test_tipc/configs/ch_PP-OCRv2_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c438cb8a3b6907c9ca352e90605d8b4f6fb17fd --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:PPOCRv2_ocr_rec +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=16|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:tools/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o +quant_export: +fpgm_export: +distill_export:null +export1:null +export2:null +inference_dir:Student +infer_model:./inference/ch_PP-OCRv2_rec_infer +infer_export:null +infer_quant:False +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:False|True +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +null:null +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,32,320]}] diff --git a/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..e22d8a564b008206611469048b424b528dd379bd --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:ch_PPOCRv2_rec_PACT +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=16|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:pact_train +norm_train:null +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:null +quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o +fpgm_export: null +distill_export:null +export1:null +export2:null +inference_dir:Student +infer_model:./inference/ch_PP-OCRv2_rec_slim_quant_infer +infer_export:null +infer_quant:True +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:False|True +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +null:null +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,32,320]}] diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/det_mv3_db.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/det_mv3_db.yml deleted file mode 100644 index 5eada6d53dd3364238bdfc6a3c40515ca0726688..0000000000000000000000000000000000000000 --- a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/det_mv3_db.yml +++ /dev/null @@ -1,126 +0,0 @@ -Global: - use_gpu: false - epoch_num: 5 - log_smooth_window: 20 - print_batch_step: 1 - save_model_dir: ./output/db_mv3/ - save_epoch_step: 1200 - # evaluation is run every 2000 iterations - eval_batch_step: [0, 400] - cal_metric_during_train: False - pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained - checkpoints: - save_inference_dir: - use_visualdl: False - infer_img: doc/imgs_en/img_10.jpg - save_res_path: ./output/det_db/predicts_db.txt - -Architecture: - model_type: det - algorithm: DB - Transform: - Backbone: - name: MobileNetV3 - scale: 0.5 - model_name: large - disable_se: False - Neck: - name: DBFPN - out_channels: 256 - Head: - name: DBHead - k: 50 - -Loss: - name: DBLoss - balance_loss: true - main_loss_type: DiceLoss - alpha: 5 - beta: 10 - ohem_ratio: 3 - -Optimizer: - name: Adam #Momentum - #momentum: 0.9 - beta1: 0.9 - beta2: 0.999 - lr: - learning_rate: 0.001 - regularizer: - name: 'L2' - factor: 0 - -PostProcess: - name: DBPostProcess - thresh: 0.3 - box_thresh: 0.6 - max_candidates: 1000 - unclip_ratio: 1.5 - -Metric: - name: DetMetric - main_indicator: hmean - -Train: - dataset: - name: SimpleDataSet - data_dir: ./train_data/icdar2015/text_localization/ - label_file_list: - - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt - ratio_list: [1.0] - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - DetLabelEncode: # Class handling label - - Resize: - size: [640, 640] - - MakeBorderMap: - shrink_ratio: 0.4 - thresh_min: 0.3 - thresh_max: 0.7 - - MakeShrinkMap: - shrink_ratio: 0.4 - min_text_size: 8 - - NormalizeImage: - scale: 1./255. - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - order: 'hwc' - - ToCHWImage: - - KeepKeys: - keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list - loader: - shuffle: False - drop_last: False - batch_size_per_card: 1 - num_workers: 0 - use_shared_memory: False - -Eval: - dataset: - name: SimpleDataSet - data_dir: ./train_data/icdar2015/text_localization/ - label_file_list: - - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt - transforms: - - DecodeImage: # load image - img_mode: BGR - channel_first: False - - DetLabelEncode: # Class handling label - - DetResizeForTest: - image_shape: [736, 1280] - - NormalizeImage: - scale: 1./255. - mean: [0.485, 0.456, 0.406] - std: [0.229, 0.224, 0.225] - order: 'hwc' - - ToCHWImage: - - KeepKeys: - keep_keys: ['image', 'shape', 'polys', 'ignore_tags'] - loader: - shuffle: False - drop_last: False - batch_size_per_card: 1 # must be 1 - num_workers: 0 - use_shared_memory: False diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index ff1c7432df75f78fd6c45d995f50a9642d44637c..593e7ec7ed42af9b65c520852ff6372f89890170 100644 --- a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -1,10 +1,10 @@ ===========================train_params=========================== -model_name:ocr_det +model_name:ch_ppocr_mobile_v2.0_det python:python3.7 gpu_list:0|0,1 Global.use_gpu:True|True Global.auto_cast:amp -Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.epoch_num:lite_train_lite_infer=100|whole_train_whole_infer=300 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 Global.pretrained_model:null @@ -12,10 +12,10 @@ train_model_name:latest train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ null:null ## -trainer:norm_train|pact_train|fpgm_train -norm_train:tools/train.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained -pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o -fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +trainer:norm_train +norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:null +fpgm_train:null distill_train:null null:null null:null @@ -26,10 +26,10 @@ null:null ## ===========================infer_params=========================== Global.save_inference_dir:./output/ -Global.pretrained_model: -norm_export:tools/export_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o -quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o -fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o +Global.checkpoints: +norm_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +quant_export:null +fpgm_export:null distill_export:null export1:null export2:null @@ -49,3 +49,5 @@ inference:tools/infer/predict_det.py null:null --benchmark:True null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}] \ No newline at end of file diff --git a/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_infer_python.txt similarity index 97% rename from test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_infer_python.txt index 331d6bdb7103294eb1b33b9978e5f99c2212195b..47ccf2e69e75bc8c215be8d1837e5248d1b4b513 100644 --- a/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_infer_python.txt @@ -1,5 +1,5 @@ ===========================train_params=========================== -model_name:ocr_det +model_name:ch_ppocr_mobile_v2.0_det_FPGM python:python3.7 gpu_list:0|0,1 Global.use_gpu:True|True diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..5a95f026850b750bfadb85e0955f7426e5e73cb6 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_det_FPGM +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:fpgm_train +norm_train:null +pact_train:null +fpgm_train:deploy/slim/prune/sensitivity_anal.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:null +quant_export:null +fpgm_export:deploy/slim/prune/export_prune_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:null +infer_export:null +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}] \ No newline at end of file diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f9bec12ada6894fcffbe697ae4da2f0df95cc62 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_det_PACT +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=20|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:pact_train +norm_train:null +pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:null +quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:./inference/ch_ppocr_mobile_v2.0_det_prune_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}] diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..30fb939bff646adf301191f88a9a499acf9c61de --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c configs/rec/rec_icdar15_train.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c configs/rec/rec_icdar15_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:tools/export_model.py -c configs/rec/rec_icdar15_train.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/ch_ppocr_mobile_v2.0_rec_train/best_accuracy +infer_export:tools/export_model.py -c configs/rec/rec_icdar15_train.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,32,100]}] diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..fda9cf4ddec6d3ab64045a4a7fdbb62183212021 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec_FPGM +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/ic15_data/test/word_1.png +null:null +## +trainer:fpgm_train +norm_train:null +pact_train:null +fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./pretrain_models/ch_ppocr_mobile_v2.0_rec_train/best_accuracy +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:null +quant_export:null +fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml -o +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:null +infer_export:null +infer_quant:False +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +null:null +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,32,320]}] \ No newline at end of file diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..abed3cfba9b3f8c0ed626dbfcbda8621d8787001 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec_PACT +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.checkpoints:null +train_model_name:latest +train_infer_img_dir:./train_data/ic15_data/test/word_1.png +null:null +## +trainer:pact_train +norm_train:null +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:null +quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +infer_model:./inference/ch_ppocr_mobile_v2.0_rec_slim_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_image_shape="3,32,100" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:False|True +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,32,320]}] diff --git a/test_tipc/configs/ch_ppocr_server_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_server_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..3e3764e8c6f62c72ffb8ceb268c8ceee660d02de --- /dev/null +++ b/test_tipc/configs/ch_ppocr_server_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:ch_ppocr_server_v2.0_det +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_lite_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml -o +quant_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/ch_ppocr_server_v2.0_det_train/best_accuracy +infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}] \ No newline at end of file diff --git a/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..78c15047fb522127075591cc9687392af77a300a --- /dev/null +++ b/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:ch_ppocr_server_v2.0_rec +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/ch_ppocr_server_v2.0_rec_train/best_accuracy +infer_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null +===========================infer_benchmark_params========================== +random_infer_input:[{float32,[3,32,100]}] diff --git a/tools/eval.py b/tools/eval.py index 1038090ab4e8da139ff180feec2495a5f401fc54..7fd4fa7ada7b1550bcca8766f5acb9b4d4ed2049 100755 --- a/tools/eval.py +++ b/tools/eval.py @@ -74,9 +74,11 @@ def main(): model = build_model(config['Architecture']) extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"] + extra_input = False if config['Architecture']['algorithm'] == 'Distillation': - extra_input = config['Architecture']['Models']['Teacher'][ - 'algorithm'] in extra_input_models + for key in config['Architecture']["Models"]: + extra_input = extra_input or config['Architecture']['Models'][key][ + 'algorithm'] in extra_input_models else: extra_input = config['Architecture']['algorithm'] in extra_input_models if "model_type" in config['Architecture'].keys(): diff --git a/tools/export_model.py b/tools/export_model.py index 96cc05a2449ca005a4ea5767fc64c777a90d6114..1f9f29e396fe4960914ae802769b65d20c103bd3 100755 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -31,7 +31,7 @@ from ppocr.utils.logging import get_logger from tools.program import load_config, merge_config, ArgsParser -def export_single_model(model, arch_config, save_path, logger): +def export_single_model(model, arch_config, save_path, logger, quanter=None): if arch_config["algorithm"] == "SRN": max_text_length = arch_config["Head"]["max_text_length"] other_shape = [ @@ -61,6 +61,11 @@ def export_single_model(model, arch_config, save_path, logger): paddle.static.InputSpec( shape=[None, 3, 48, -1], dtype="float32"), ] + else: + other_shape = [ + paddle.static.InputSpec( + shape=[None, 3, 64, 256], dtype="float32"), + ] model = to_static(model, input_spec=other_shape) elif arch_config["algorithm"] == "PREN": other_shape = [ @@ -90,7 +95,10 @@ def export_single_model(model, arch_config, save_path, logger): shape=[None] + infer_shape, dtype="float32") ]) - paddle.jit.save(model, save_path) + if quanter is None: + paddle.jit.save(model, save_path) + else: + quanter.save_quantized_model(model, save_path) logger.info("inference model is saved to {}".format(save_path)) return @@ -120,7 +128,6 @@ def main(): char_num = char_num - 2 out_channels_list['CTCLabelDecode'] = char_num out_channels_list['SARLabelDecode'] = char_num + 2 - loss_list = config['Loss']['loss_config_list'] config['Architecture']['Models'][key]['Head'][ 'out_channels_list'] = out_channels_list else: diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index d4fbc3888ce4b42138fa3eda7774156e7e751fcd..2abc0220937175f95ee4c1e4b0b949d24d5fa3e8 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -131,6 +131,17 @@ class TextRecognizer(object): padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) padding_im[:, :, 0:resized_w] = resized_image return padding_im + + def resize_norm_img_svtr(self, img, image_shape): + + imgC, imgH, imgW = image_shape + resized_image = cv2.resize( + img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + return resized_image def resize_norm_img_srn(self, img, image_shape): imgC, imgH, imgW = image_shape @@ -263,12 +274,8 @@ class TextRecognizer(object): wh_ratio = w * 1.0 / h max_wh_ratio = max(max_wh_ratio, wh_ratio) for ino in range(beg_img_no, end_img_no): - if self.rec_algorithm != "SRN" and self.rec_algorithm != "SAR": - norm_img = self.resize_norm_img(img_list[indices[ino]], - max_wh_ratio) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - elif self.rec_algorithm == "SAR": + + if self.rec_algorithm == "SAR": norm_img, _, _, valid_ratio = self.resize_norm_img_sar( img_list[indices[ino]], self.rec_image_shape) norm_img = norm_img[np.newaxis, :] @@ -276,7 +283,7 @@ class TextRecognizer(object): valid_ratios = [] valid_ratios.append(valid_ratio) norm_img_batch.append(norm_img) - else: + elif self.rec_algorithm == "SRN": norm_img = self.process_image_srn( img_list[indices[ino]], self.rec_image_shape, 8, 25) encoder_word_pos_list = [] @@ -288,6 +295,16 @@ class TextRecognizer(object): gsrm_slf_attn_bias1_list.append(norm_img[3]) gsrm_slf_attn_bias2_list.append(norm_img[4]) norm_img_batch.append(norm_img[0]) + elif self.rec_algorithm == "SVTR": + norm_img = self.resize_norm_img_svtr( + img_list[indices[ino]], self.rec_image_shape) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + else: + norm_img = self.resize_norm_img(img_list[indices[ino]], + max_wh_ratio) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = norm_img_batch.copy() if self.benchmark: diff --git a/tools/infer/utility.py b/tools/infer/utility.py index b16aecd496ec291fcbe9c66dccf3ec04bb662034..c92e8e152a9ee4d86d269aec7ff5645f23cad443 100644 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -271,9 +271,10 @@ def create_predictor(args, mode, logger): elif mode == "rec": if args.rec_algorithm != "CRNN": use_dynamic_shape = False - min_input_shape = {"x": [1, 3, 32, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, 32, 1536]} - opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]} + imgH = int(args.rec_image_shape.split(',')[-2]) + min_input_shape = {"x": [1, 3, imgH, 10]} + max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 1536]} + opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]} elif mode == "cls": min_input_shape = {"x": [1, 3, 48, 10]} max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]} @@ -300,8 +301,8 @@ def create_predictor(args, mode, logger): # enable memory optim config.enable_memory_optim() config.disable_glog_info() - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") + config.delete_pass("matmul_transpose_reshape_fuse_pass") if mode == 'table': config.delete_pass("fc_fuse_pass") # not supported for table config.switch_use_feed_fetch_ops(False) diff --git a/tools/infer_cls.py b/tools/infer_cls.py index 4be30bbb3c2f8bbf6a59179220faa942e6cc27b8..7fd6b536fbe50fb1240d84ca3a5e87236940c0f5 100755 --- a/tools/infer_cls.py +++ b/tools/infer_cls.py @@ -57,6 +57,8 @@ def main(): continue elif op_name == 'KeepKeys': op[op_name]['keep_keys'] = ['image'] + elif op_name == "SSLRotateResize": + op[op_name]["mode"] = "test" transforms.append(op) global_config['infer_mode'] = True ops = create_operators(transforms, global_config) diff --git a/tools/program.py b/tools/program.py index 1742f6c9557929accd52a1748add68f0e569a6b9..90fd309ae9e1ae23723d8e67c62a905e79a073d3 100755 --- a/tools/program.py +++ b/tools/program.py @@ -202,9 +202,11 @@ def train(config, use_srn = config['Architecture']['algorithm'] == "SRN" extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"] + extra_input = False if config['Architecture']['algorithm'] == 'Distillation': - extra_input = config['Architecture']['Models']['Teacher'][ - 'algorithm'] in extra_input_models + for key in config['Architecture']["Models"]: + extra_input = extra_input or config['Architecture']['Models'][key][ + 'algorithm'] in extra_input_models else: extra_input = config['Architecture']['algorithm'] in extra_input_models try: diff --git a/tools/train.py b/tools/train.py index 77e600ab6641f0baade072bb853db0d6d44052a4..42aba548d6bf5fc35f033ef2baca0fb54d79e75a 100755 --- a/tools/train.py +++ b/tools/train.py @@ -129,7 +129,7 @@ def main(config, device, logger, vdl_writer): config['Optimizer'], epochs=config['Global']['epoch_num'], step_each_epoch=len(train_dataloader), - parameters=model.parameters()) + model=model) # build metric eval_class = build_metric(config['Metric'])