diff --git a/doc/doc_ch/detection.md b/doc/doc_ch/detection.md index cfc9d52bf280400982a9fcd9941ddc4cce3f5e5c..f76ae7f842fb6b7002e084be59dc7ccb31f39771 100644 --- a/doc/doc_ch/detection.md +++ b/doc/doc_ch/detection.md @@ -247,3 +247,7 @@ Q1: 训练模型转inference 模型之后预测效果不一致? **A**:此类问题出现较多,问题多是trained model预测时候的预处理、后处理参数和inference model预测的时候的预处理、后处理参数不一致导致的。以det_mv3_db.yml配置文件训练的模型为例,训练模型、inference模型预测结果不一致问题解决方式如下: - 检查[trained model预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L116),和[inference model的预测预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/predict_det.py#L42)函数是否一致。算法在评估的时候,输入图像大小会影响精度,为了和论文保持一致,训练icdar15配置文件中将图像resize到[736, 1280],但是在inference model预测的时候只有一套默认参数,会考虑到预测速度问题,默认限制图像最长边为960做resize的。训练模型预处理和inference模型的预处理函数位于[ppocr/data/imaug/operators.py](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/ppocr/data/imaug/operators.py#L147) - 检查[trained model后处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L51),和[inference 后处理参数](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/utility.py#L50)是否一致。 + +Q1: 训练EAST模型提示找不到lanms库? + +**A**:执行pip3 install lanms-nova 即可。 diff --git a/ppocr/postprocess/east_postprocess.py b/ppocr/postprocess/east_postprocess.py index ec6bf663854d3391bf8c584aa749dc6d1805d344..c194c81c6911aac0f9210109c37b76b44532e9c4 100755 --- a/ppocr/postprocess/east_postprocess.py +++ b/ppocr/postprocess/east_postprocess.py @@ -20,7 +20,6 @@ import numpy as np from .locality_aware_nms import nms_locality import cv2 import paddle -import lanms import os import sys @@ -61,6 +60,7 @@ class EASTPostProcess(object): """ restore text boxes from score map and geo map """ + score_map = score_map[0] geo_map = np.swapaxes(geo_map, 1, 0) geo_map = np.swapaxes(geo_map, 1, 2) @@ -76,8 +76,15 @@ class EASTPostProcess(object): boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] - boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh) - # boxes = nms_locality(boxes.astype(np.float64), nms_thresh) + + try: + import lanms + boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh) + except: + print( + 'you should install lanms by pip3 install lanms-nova to speed up nms_locality' + ) + boxes = nms_locality(boxes.astype(np.float64), nms_thresh) if boxes.shape[0] == 0: return [] # Here we filter some low score boxes by the average score map, diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py index 4b890f6fa352772e6ebe1614b798e1ce69cdd17c..f6013a406634ed110ea5af613a5f31e56ce90ead 100644 --- a/ppocr/utils/save_load.py +++ b/ppocr/utils/save_load.py @@ -67,6 +67,7 @@ def load_model(config, model, optimizer=None): if key not in params: logger.warning("{} not in loaded params {} !".format( key, params.keys())) + continue pre_value = params[key] if list(value.shape) == list(pre_value.shape): new_state_dict[key] = pre_value @@ -76,9 +77,14 @@ def load_model(config, model, optimizer=None): format(key, value.shape, pre_value.shape)) model.set_state_dict(new_state_dict) - optim_dict = paddle.load(checkpoints + '.pdopt') if optimizer is not None: - optimizer.set_state_dict(optim_dict) + if os.path.exists(checkpoints + '.pdopt'): + optim_dict = paddle.load(checkpoints + '.pdopt') + optimizer.set_state_dict(optim_dict) + else: + logger.warning( + "{}.pdopt is not exists, params of optimizer is not loaded". + format(checkpoints)) if os.path.exists(checkpoints + '.states'): with open(checkpoints + '.states', 'rb') as f: diff --git a/requirements.txt b/requirements.txt index 903b8eda055573621f5d5479e85b17986b702ead..0c87c5c95069a2699f5a3a50320c883c6118ffe7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,5 +12,4 @@ cython lxml premailer openpyxl -fasttext==0.9.1 -lanms-nova \ No newline at end of file +fasttext==0.9.1 \ No newline at end of file diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..92f33c58c9e97347e53b778bde5a21472b769f36 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,21 @@ +===========================kl_quant_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec_KL +python:python3.7 +Global.pretrained_model:null +Global.save_inference_dir:null +infer_model:./inference/ch_ppocr_mobile_v2.0_rec_infer/ +infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml -o +infer_quant:True +inference:tools/infer/predict_rec.py +--use_gpu:False|True +--enable_mkldnn:True +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:int8 +--det_model_dir: +--image_dir:./inference/rec_inference +null:null +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml new file mode 100644 index 0000000000000000000000000000000000000000..b06dafe7fdc01eadeee51e70dfa4e8c675bda531 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml @@ -0,0 +1,101 @@ +Global: + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_chinese_lite_v2.0 + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + max_text_length: 25 + infer_mode: False + use_space_char: True + save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: small + small_stride: [1, 2, 2, 2] + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 48 + Head: + name: CTCHead + fc_decay: 0.00001 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - RecAug: + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml new file mode 100644 index 0000000000000000000000000000000000000000..b06dafe7fdc01eadeee51e70dfa4e8c675bda531 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml @@ -0,0 +1,101 @@ +Global: + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_chinese_lite_v2.0 + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + max_text_length: 25 + infer_mode: False + use_space_char: True + save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: small + small_stride: [1, 2, 2, 2] + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 48 + Head: + name: CTCHead + fc_decay: 0.00001 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - RecAug: + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..7bbdd58ae13eca00623123cf2ca39d3b76daa72a --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec_PACT +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.checkpoints:null +train_model_name:latest +train_infer_img_dir:./train_data/ic15_data/test/word_1.png +null:null +## +trainer:pact_train +norm_train:null +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:null +quant_export:deploy/slim/quantization/export_model.py -ctest_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:null +infer_export:null +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_image_shape="3,32,100" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml b/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml new file mode 100644 index 0000000000000000000000000000000000000000..d37fdcfbb5b27404403674d99c1b8abe8cd65e85 --- /dev/null +++ b/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml @@ -0,0 +1,135 @@ +Global: + use_gpu: true + epoch_num: 600 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/det_mv3_pse/ + save_epoch_step: 600 + # evaluation is run every 63 iterations + eval_batch_step: [ 0,1000 ] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_pse/predicts_pse.txt + +Architecture: + model_type: det + algorithm: PSE + Transform: null + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + Neck: + name: FPN + out_channels: 96 + Head: + name: PSEHead + hidden_dim: 96 + out_channels: 7 + +Loss: + name: PSELoss + alpha: 0.7 + ohem_ratio: 3 + kernel_sample_mask: pred + reduction: none + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Step + learning_rate: 0.001 + step_size: 200 + gamma: 0.1 + regularizer: + name: 'L2' + factor: 0.0005 + +PostProcess: + name: PSEPostProcess + thresh: 0 + box_thresh: 0.85 + min_area: 16 + box_type: box # 'box' or 'poly' + scale: 1 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - ColorJitter: + brightness: 0.12549019607843137 + saturation: 0.5 + - IaaAugment: + augmenter_args: + - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } } + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } } + - MakePseGt: + kernel_num: 7 + min_shrink_ratio: 0.4 + size: 640 + - RandomCropImgMask: + size: [ 640,640 ] + main_key: gt_text + crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 16 + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + limit_side_len: 736 + limit_type: min + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 8 \ No newline at end of file diff --git a/test_tipc/configs/det_mv3_pse_v2.0/train_infer_python.txt b/test_tipc/configs/det_mv3_pse_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..f9909027f10d9e9f96d65f9f5a1c5f3fd5c9e1c6 --- /dev/null +++ b/test_tipc/configs/det_mv3_pse_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_mv3_pse_v2.0 +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/det_mv3_pse/best_accuracy +infer_export:tools/export_model.py -c test_tipc/cconfigs/det_mv3_pse_v2.0/det_mv3_pse.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +--det_algorithm:PSE diff --git a/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt index e9eaa779520f78622509153482fd6a84322c9cc5..dfb376237ee35c277fcd86a88328c562d5c0429a 100644 --- a/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt +++ b/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt @@ -34,7 +34,7 @@ distill_export:null export1:null export2:null ## -train_model:./inference/det_mv3_east/best_accuracy +train_model:./inference/det_r50_vd_east/best_accuracy infer_export:tools/export_model.py -c test_tipc/cconfigs/det_r50_vd_east_v2.0/det_r50_vd_east.yml -o infer_quant:False inference:tools/infer/predict_det.py diff --git a/test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml b/test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml new file mode 100644 index 0000000000000000000000000000000000000000..5ebc4252718d5572837eac58061bf6f9eb35bf73 --- /dev/null +++ b/test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml @@ -0,0 +1,134 @@ +Global: + use_gpu: true + epoch_num: 600 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/det_r50_vd_pse/ + save_epoch_step: 600 + # evaluation is run every 125 iterations + eval_batch_step: [ 0,1000 ] + cal_metric_during_train: False + pretrained_model: + checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_pse/predicts_pse.txt + +Architecture: + model_type: det + algorithm: PSE + Transform: + Backbone: + name: ResNet + layers: 50 + Neck: + name: FPN + out_channels: 256 + Head: + name: PSEHead + hidden_dim: 256 + out_channels: 7 + +Loss: + name: PSELoss + alpha: 0.7 + ohem_ratio: 3 + kernel_sample_mask: pred + reduction: none + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Step + learning_rate: 0.0001 + step_size: 200 + gamma: 0.1 + regularizer: + name: 'L2' + factor: 0.0005 + +PostProcess: + name: PSEPostProcess + thresh: 0 + box_thresh: 0.85 + min_area: 16 + box_type: box # 'box' or 'poly' + scale: 1 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - ColorJitter: + brightness: 0.12549019607843137 + saturation: 0.5 + - IaaAugment: + augmenter_args: + - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } } + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } } + - MakePseGt: + kernel_num: 7 + min_shrink_ratio: 0.4 + size: 640 + - RandomCropImgMask: + size: [ 640,640 ] + main_key: gt_text + crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + limit_side_len: 736 + limit_type: min + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 8 \ No newline at end of file diff --git a/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ab6d45d7c1eb5e3c17fd53a8c8c504812c1012c --- /dev/null +++ b/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_r50_vd_pse_v2.0 +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/det_r50_vd_pse/best_accuracy +infer_export:tools/export_model.py -c test_tipc/cconfigs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +--det_algorithm:PSE diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index bc9b29032bec2b36f9acd6beddc4226e27a9847c..d3676415e83df1754b2eab0e8b24de07133169b6 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -174,6 +174,13 @@ if [ ${MODE} = "klquant_whole_infer" ]; then wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar --no-check-certificate cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../ fi + if [ ${model_name} = "ch_ppocr_mobile_v2.0_rec_KL" ]; then + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar --no-check-certificate + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar --no-check-certificate + cd ./train_data/ && tar xf ic15_data.tar && cd ../ + cd ./inference && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf rec_inference.tar && cd ../ + fi fi if [ ${MODE} = "cpp_infer" ];then