diff --git a/README_ch.md b/README_ch.md index 7e8a8e241be1e22ddcc74bcd99d78225b32a91fa..7c6dcf5ea7cf1a0964c0a5f2dc5df28ea5466f14 100755 --- a/README_ch.md +++ b/README_ch.md @@ -81,7 +81,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库,助力 | 模型简介 | 模型名称 |推荐场景 | 检测模型 | 方向分类器 | 识别模型 | | ------------ | --------------- | ----------------|---- | ---------- | -------- | -| 中英文超轻量PP-OCRv2模型(13.0M) | ch_PP-OCRv2_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/chinese/ch_PP-OCRv2_det_distill_train.tar)| [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)| +| 中英文超轻量PP-OCRv2模型(13.0M) | ch_PP-OCRv2_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)| | 中英文超轻量PP-OCR mobile模型(9.4M) | ch_ppocr_mobile_v2.0_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar) | | 中英文通用PP-OCR server模型(143.4M) |ch_ppocr_server_v2.0_xx|服务器端 |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar) | diff --git a/configs/det/det_mv3_pse.yml b/configs/det/det_mv3_pse.yml new file mode 100644 index 0000000000000000000000000000000000000000..61ac24727acbd4f0b1eea15af08c0f9e71ce95a3 --- /dev/null +++ b/configs/det/det_mv3_pse.yml @@ -0,0 +1,135 @@ +Global: + use_gpu: true + epoch_num: 600 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/det_mv3_pse/ + save_epoch_step: 600 + # evaluation is run every 63 iterations + eval_batch_step: [ 0,63 ] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_pse/predicts_pse.txt + +Architecture: + model_type: det + algorithm: PSE + Transform: null + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + Neck: + name: FPN + out_channels: 96 + Head: + name: PSEHead + hidden_dim: 96 + out_channels: 7 + +Loss: + name: PSELoss + alpha: 0.7 + ohem_ratio: 3 + kernel_sample_mask: pred + reduction: none + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Step + learning_rate: 0.001 + step_size: 200 + gamma: 0.1 + regularizer: + name: 'L2' + factor: 0.0005 + +PostProcess: + name: PSEPostProcess + thresh: 0 + box_thresh: 0.85 + min_area: 16 + box_type: box # 'box' or 'poly' + scale: 1 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - ColorJitter: + brightness: 0.12549019607843137 + saturation: 0.5 + - IaaAugment: + augmenter_args: + - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } } + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } } + - MakePseGt: + kernel_num: 7 + min_shrink_ratio: 0.4 + size: 640 + - RandomCropImgMask: + size: [ 640,640 ] + main_key: gt_text + crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 16 + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + limit_side_len: 736 + limit_type: min + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 8 \ No newline at end of file diff --git a/configs/det/det_r50_vd_pse.yml b/configs/det/det_r50_vd_pse.yml new file mode 100644 index 0000000000000000000000000000000000000000..4629210747d3b61344cc47b11dcff01e6b738586 --- /dev/null +++ b/configs/det/det_r50_vd_pse.yml @@ -0,0 +1,134 @@ +Global: + use_gpu: true + epoch_num: 600 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/det_r50_vd_pse/ + save_epoch_step: 600 + # evaluation is run every 125 iterations + eval_batch_step: [ 0,125 ] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained + checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_pse/predicts_pse.txt + +Architecture: + model_type: det + algorithm: PSE + Transform: + Backbone: + name: ResNet + layers: 50 + Neck: + name: FPN + out_channels: 256 + Head: + name: PSEHead + hidden_dim: 256 + out_channels: 7 + +Loss: + name: PSELoss + alpha: 0.7 + ohem_ratio: 3 + kernel_sample_mask: pred + reduction: none + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Step + learning_rate: 0.0001 + step_size: 200 + gamma: 0.1 + regularizer: + name: 'L2' + factor: 0.0005 + +PostProcess: + name: PSEPostProcess + thresh: 0 + box_thresh: 0.85 + min_area: 16 + box_type: box # 'box' or 'poly' + scale: 1 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - ColorJitter: + brightness: 0.12549019607843137 + saturation: 0.5 + - IaaAugment: + augmenter_args: + - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } } + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } } + - MakePseGt: + kernel_num: 7 + min_shrink_ratio: 0.4 + size: 640 + - RandomCropImgMask: + size: [ 640,640 ] + main_key: gt_text + crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + limit_side_len: 736 + limit_type: min + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 8 \ No newline at end of file diff --git a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml index 27ba4fd70b9a7ee7d4d905b3948f6cbf2b7e9469..38f77f7372c4e422b5601deb5119c24fd1e3f787 100644 --- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml +++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml @@ -4,7 +4,7 @@ Global: epoch_num: 800 log_smooth_window: 20 print_batch_step: 10 - save_model_dir: ./output/rec_chinese_lite_distillation_v2.1 + save_model_dir: ./output/rec_mobile_pp-OCRv2 save_epoch_step: 3 eval_batch_step: [0, 2000] cal_metric_during_train: true @@ -19,7 +19,7 @@ Global: infer_mode: false use_space_char: true distributed: true - save_res_path: ./output/rec/predicts_chinese_lite_distillation_v2.1.txt + save_res_path: ./output/rec/predicts_mobile_pp-OCRv2.txt Optimizer: @@ -35,79 +35,32 @@ Optimizer: name: L2 factor: 2.0e-05 + Architecture: - model_type: &model_type "rec" - name: DistillationModel - algorithm: Distillation - Models: - Teacher: - pretrained: - freeze_params: false - return_all_feats: true - model_type: *model_type - algorithm: CRNN - Transform: - Backbone: - name: MobileNetV1Enhance - scale: 0.5 - Neck: - name: SequenceEncoder - encoder_type: rnn - hidden_size: 64 - Head: - name: CTCHead - mid_channels: 96 - fc_decay: 0.00002 - Student: - pretrained: - freeze_params: false - return_all_feats: true - model_type: *model_type - algorithm: CRNN - Transform: - Backbone: - name: MobileNetV1Enhance - scale: 0.5 - Neck: - name: SequenceEncoder - encoder_type: rnn - hidden_size: 64 - Head: - name: CTCHead - mid_channels: 96 - fc_decay: 0.00002 - + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 64 + Head: + name: CTCHead + mid_channels: 96 + fc_decay: 0.00002 Loss: - name: CombinedLoss - loss_config_list: - - DistillationCTCLoss: - weight: 1.0 - model_name_list: ["Student", "Teacher"] - key: head_out - - DistillationDMLLoss: - weight: 1.0 - act: "softmax" - model_name_pairs: - - ["Student", "Teacher"] - key: head_out - - DistillationDistanceLoss: - weight: 1.0 - mode: "l2" - model_name_pairs: - - ["Student", "Teacher"] - key: backbone_out + name: CTCLoss PostProcess: - name: DistillationCTCLabelDecode - model_name: ["Student", "Teacher"] - key: head_out + name: CTCLabelDecode Metric: - name: DistillationMetric - base_metric_name: RecMetric + name: RecMetric main_indicator: acc - key: "Student" Train: dataset: @@ -132,7 +85,6 @@ Train: shuffle: true batch_size_per_card: 128 drop_last: true - num_sections: 1 num_workers: 8 Eval: dataset: diff --git a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml new file mode 100644 index 0000000000000000000000000000000000000000..d2308fd5747f3fadf3bb1c98c5602c67d5e63eca --- /dev/null +++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml @@ -0,0 +1,160 @@ +Global: + debug: false + use_gpu: true + epoch_num: 800 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_pp-OCRv2_distillation + save_epoch_step: 3 + eval_batch_step: [0, 2000] + cal_metric_during_train: true + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: false + infer_img: doc/imgs_words/ch/word_1.jpg + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + character_type: ch + max_text_length: 25 + infer_mode: false + use_space_char: true + distributed: true + save_res_path: ./output/rec/predicts_pp-OCRv2_distillation.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Piecewise + decay_epochs : [700, 800] + values : [0.001, 0.0001] + warmup_epoch: 5 + regularizer: + name: L2 + factor: 2.0e-05 + +Architecture: + model_type: &model_type "rec" + name: DistillationModel + algorithm: Distillation + Models: + Teacher: + pretrained: + freeze_params: false + return_all_feats: true + model_type: *model_type + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 64 + Head: + name: CTCHead + mid_channels: 96 + fc_decay: 0.00002 + Student: + pretrained: + freeze_params: false + return_all_feats: true + model_type: *model_type + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 64 + Head: + name: CTCHead + mid_channels: 96 + fc_decay: 0.00002 + + +Loss: + name: CombinedLoss + loss_config_list: + - DistillationCTCLoss: + weight: 1.0 + model_name_list: ["Student", "Teacher"] + key: head_out + - DistillationDMLLoss: + weight: 1.0 + act: "softmax" + use_log: true + model_name_pairs: + - ["Student", "Teacher"] + key: head_out + - DistillationDistanceLoss: + weight: 1.0 + mode: "l2" + model_name_pairs: + - ["Student", "Teacher"] + key: backbone_out + +PostProcess: + name: DistillationCTCLabelDecode + model_name: ["Student", "Teacher"] + key: head_out + +Metric: + name: DistillationMetric + base_metric_name: RecMetric + main_indicator: acc + key: "Student" + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ + label_file_list: + - ./train_data/train_list.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - RecAug: + - CTCLabelEncode: + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: + - image + - label + - length + loader: + shuffle: true + batch_size_per_card: 128 + drop_last: true + num_sections: 1 + num_workers: 8 +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data + label_file_list: + - ./train_data/val_list.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - CTCLabelEncode: + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: + - image + - label + - length + loader: + shuffle: false + drop_last: false + batch_size_per_card: 128 + num_workers: 8 diff --git a/configs/rec/rec_mtb_nrtr.yml b/configs/rec/rec_mtb_nrtr.yml index 635c392d705acd1fcfbf9f744a8d7167c448d74c..8639a28a931247ee34f2e3842407fd1d2e065950 100644 --- a/configs/rec/rec_mtb_nrtr.yml +++ b/configs/rec/rec_mtb_nrtr.yml @@ -46,7 +46,7 @@ Architecture: name: Transformer d_model: 512 num_encoder_layers: 6 - beam_size: 10 # When Beam size is greater than 0, it means to use beam search when evaluation. + beam_size: -1 # When Beam size is greater than 0, it means to use beam search when evaluation. Loss: @@ -65,7 +65,7 @@ Train: name: LMDBDataSet data_dir: ./train_data/data_lmdb_release/training/ transforms: - - NRTRDecodeImage: # load image + - DecodeImage: # load image img_mode: BGR channel_first: False - NRTRLabelEncode: # Class handling label @@ -85,7 +85,7 @@ Eval: name: LMDBDataSet data_dir: ./train_data/data_lmdb_release/evaluation/ transforms: - - NRTRDecodeImage: # load image + - DecodeImage: # load image img_mode: BGR channel_first: False - NRTRLabelEncode: # Class handling label diff --git a/configs/rec/rec_r31_sar.yml b/configs/rec/rec_r31_sar.yml index 053b1ae8352473cea91b03814da310b65264fdb2..41609fdf28e78f5340ab08878c2b8b23f46020d2 100644 --- a/configs/rec/rec_r31_sar.yml +++ b/configs/rec/rec_r31_sar.yml @@ -79,7 +79,7 @@ Train: Eval: dataset: name: LMDBDataSet - data_dir: ./eval_data/evaluation/ + data_dir: ./train_data/data_lmdb_release/evaluation/ transforms: - DecodeImage: # load image img_mode: BGR diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index ef400cfc89cb178d5e05464f0b73be2aaed53654..82a248416f086dd2b90e891a23774c294ed50ae3 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -91,7 +91,7 @@ int main_det(std::vector cv_all_img_names) { FLAGS_use_tensorrt, FLAGS_precision); for (int i = 0; i < cv_all_img_names.size(); ++i) { - LOG(INFO) << "The predict img: " << cv_all_img_names[i]; +// LOG(INFO) << "The predict img: " << cv_all_img_names[i]; cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); if (!srcimg.data) { @@ -106,6 +106,16 @@ int main_det(std::vector cv_all_img_names) { time_info[0] += det_times[0]; time_info[1] += det_times[1]; time_info[2] += det_times[2]; + + if (FLAGS_benchmark) { + cout << cv_all_img_names[i] << '\t'; + for (int n = 0; n < boxes.size(); n++) { + for (int m = 0; m < boxes[n].size(); m++) { + cout << boxes[n][m][0] << ' ' << boxes[n][m][1] << ' '; + } + } + cout << endl; + } } if (FLAGS_benchmark) { diff --git a/deploy/hubserving/ocr_det/params.py b/deploy/hubserving/ocr_det/params.py index bc75cc404e43e0a6e9242c2684d615b4575e5d8f..2587a297662cb34d22dbdfe191439e61066cda78 100755 --- a/deploy/hubserving/ocr_det/params.py +++ b/deploy/hubserving/ocr_det/params.py @@ -13,7 +13,7 @@ def read_params(): #params for text detector cfg.det_algorithm = "DB" - cfg.det_model_dir = "./inference/ch_ppocr_mobile_v2.0_det_infer/" + cfg.det_model_dir = "./inference/ch_PP-OCRv2_det_infer/" cfg.det_limit_side_len = 960 cfg.det_limit_type = 'max' diff --git a/deploy/hubserving/ocr_rec/params.py b/deploy/hubserving/ocr_rec/params.py index f8d29114357946c9b6264079fca2eb4b19dbefba..5e11c3cfee0c9387fce7f465f15f9424b7b04e9d 100644 --- a/deploy/hubserving/ocr_rec/params.py +++ b/deploy/hubserving/ocr_rec/params.py @@ -13,7 +13,7 @@ def read_params(): #params for text recognizer cfg.rec_algorithm = "CRNN" - cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v2.0_rec_infer/" + cfg.rec_model_dir = "./inference/ch_PP-OCRv2_rec_infer/" cfg.rec_image_shape = "3, 32, 320" cfg.rec_char_type = 'ch' diff --git a/deploy/hubserving/ocr_system/params.py b/deploy/hubserving/ocr_system/params.py index bee53bfd346e6d4d91738a2e06a0b4dab8e2b0de..4698e8ce5d8f8c826fe04a85906189e729104ddb 100755 --- a/deploy/hubserving/ocr_system/params.py +++ b/deploy/hubserving/ocr_system/params.py @@ -13,7 +13,7 @@ def read_params(): #params for text detector cfg.det_algorithm = "DB" - cfg.det_model_dir = "./inference/ch_ppocr_mobile_v2.0_det_infer/" + cfg.det_model_dir = "./inference/ch_PP-OCRv2_det_infer/" cfg.det_limit_side_len = 960 cfg.det_limit_type = 'max' @@ -31,7 +31,7 @@ def read_params(): #params for text recognizer cfg.rec_algorithm = "CRNN" - cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v2.0_rec_infer/" + cfg.rec_model_dir = "./inference/ch_PP-OCRv2_rec_infer/" cfg.rec_image_shape = "3, 32, 320" cfg.rec_char_type = 'ch' diff --git a/deploy/hubserving/readme.md b/deploy/hubserving/readme.md index 11b843fec1052c3ad401ca0b7d1cb602401af8f8..b52e3584c36173e4c607dbbd9679605c98de8a67 100755 --- a/deploy/hubserving/readme.md +++ b/deploy/hubserving/readme.md @@ -34,10 +34,10 @@ pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/sim ``` ### 2. 下载推理模型 -安装服务模块前,需要准备推理模型并放到正确路径。默认使用的是v2.0版的超轻量模型,默认模型路径为: +安装服务模块前,需要准备推理模型并放到正确路径。默认使用的是PP-OCRv2模型,默认模型路径为: ``` -检测模型:./inference/ch_ppocr_mobile_v2.0_det_infer/ -识别模型:./inference/ch_ppocr_mobile_v2.0_rec_infer/ +检测模型:./inference/ch_PP-OCRv2_det_infer/ +识别模型:./inference/ch_PP-OCRv2_rec_infer/ 方向分类器:./inference/ch_ppocr_mobile_v2.0_cls_infer/ ``` diff --git a/deploy/hubserving/readme_en.md b/deploy/hubserving/readme_en.md index 539ad722cae78b8315b87d35f9af6ab81140c5b3..3bbcf98cd8b78407613e6bdfb5d5ab8b0a25a084 100755 --- a/deploy/hubserving/readme_en.md +++ b/deploy/hubserving/readme_en.md @@ -35,10 +35,10 @@ pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/sim ``` ### 2. Download inference model -Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the ultra lightweight model of v2.0 is used, and the default model path is: +Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the PP-OCRv2 models are used, and the default model path is: ``` -detection model: ./inference/ch_ppocr_mobile_v2.0_det_infer/ -recognition model: ./inference/ch_ppocr_mobile_v2.0_rec_infer/ +detection model: ./inference/ch_PP-OCRv2_det_infer/ +recognition model: ./inference/ch_PP-OCRv2_rec_infer/ text direction classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/ ``` diff --git a/deploy/slim/quantization/quant_kl.py b/deploy/slim/quantization/quant_kl.py new file mode 100755 index 0000000000000000000000000000000000000000..d866784ae6a3c087215320ec95bd39fdd1e89418 --- /dev/null +++ b/deploy/slim/quantization/quant_kl.py @@ -0,0 +1,146 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys + +__dir__ = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(__dir__) +sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..'))) +sys.path.append( + os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools'))) + +import yaml +import paddle +import paddle.distributed as dist + +paddle.seed(2) + +from ppocr.data import build_dataloader +from ppocr.modeling.architectures import build_model +from ppocr.losses import build_loss +from ppocr.optimizer import build_optimizer +from ppocr.postprocess import build_post_process +from ppocr.metrics import build_metric +from ppocr.utils.save_load import init_model +import tools.program as program +import paddleslim +from paddleslim.dygraph.quant import QAT +import numpy as np + +dist.get_world_size() + + +class PACT(paddle.nn.Layer): + def __init__(self): + super(PACT, self).__init__() + alpha_attr = paddle.ParamAttr( + name=self.full_name() + ".pact", + initializer=paddle.nn.initializer.Constant(value=20), + learning_rate=1.0, + regularizer=paddle.regularizer.L2Decay(2e-5)) + + self.alpha = self.create_parameter( + shape=[1], attr=alpha_attr, dtype='float32') + + def forward(self, x): + out_left = paddle.nn.functional.relu(x - self.alpha) + out_right = paddle.nn.functional.relu(-self.alpha - x) + x = x - out_left + out_right + return x + + +quant_config = { + # weight preprocess type, default is None and no preprocessing is performed. + 'weight_preprocess_type': None, + # activation preprocess type, default is None and no preprocessing is performed. + 'activation_preprocess_type': None, + # weight quantize type, default is 'channel_wise_abs_max' + 'weight_quantize_type': 'channel_wise_abs_max', + # activation quantize type, default is 'moving_average_abs_max' + 'activation_quantize_type': 'moving_average_abs_max', + # weight quantize bit num, default is 8 + 'weight_bits': 8, + # activation quantize bit num, default is 8 + 'activation_bits': 8, + # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' + 'dtype': 'int8', + # window size for 'range_abs_max' quantization. default is 10000 + 'window_size': 10000, + # The decay coefficient of moving average, default is 0.9 + 'moving_rate': 0.9, + # for dygraph quantization, layers of type in quantizable_layer_type will be quantized + 'quantizable_layer_type': ['Conv2D', 'Linear'], +} + + +def sample_generator(loader): + def __reader__(): + for indx, data in enumerate(loader): + images = np.array(data[0]) + yield images + + return __reader__ + + +def main(config, device, logger, vdl_writer): + # init dist environment + if config['Global']['distributed']: + dist.init_parallel_env() + + global_config = config['Global'] + + # build dataloader + config['Train']['loader']['num_workers'] = 0 + train_dataloader = build_dataloader(config, 'Train', device, logger) + if config['Eval']: + config['Eval']['loader']['num_workers'] = 0 + valid_dataloader = build_dataloader(config, 'Eval', device, logger) + else: + valid_dataloader = None + + paddle.enable_static() + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + + if 'inference_model' in global_config.keys(): # , 'inference_model'): + inference_model_dir = global_config['inference_model'] + else: + inference_model_dir = os.path.dirname(global_config['pretrained_model']) + if not (os.path.exists(os.path.join(inference_model_dir, "inference.pdmodel")) and \ + os.path.exists(os.path.join(inference_model_dir, "inference.pdiparams")) ): + raise ValueError( + "Please set inference model dir in Global.inference_model or Global.pretrained_model for post-quantazition" + ) + + paddleslim.quant.quant_post_static( + executor=exe, + model_dir=inference_model_dir, + model_filename='inference.pdmodel', + params_filename='inference.pdiparams', + quantize_model_path=global_config['save_inference_dir'], + sample_generator=sample_generator(train_dataloader), + save_model_filename='inference.pdmodel', + save_params_filename='inference.pdiparams', + batch_size=1, + batch_nums=None) + + +if __name__ == '__main__': + config, device, logger, vdl_writer = program.preprocess(is_train=True) + main(config, device, logger, vdl_writer) diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md index d465539166130bb5db9bc587ccbfc3e97b15a236..6daacaf7f08e5bd0c5e5aba8b435c5bd26eaca7b 100755 --- a/doc/doc_ch/algorithm_overview.md +++ b/doc/doc_ch/algorithm_overview.md @@ -9,11 +9,13 @@ ### 1.文本检测算法 PaddleOCR开源的文本检测算法列表: -- [x] DB([paper]( https://arxiv.org/abs/1911.08947)) [2](ppocr推荐) -- [x] EAST([paper](https://arxiv.org/abs/1704.03155))[1] -- [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4] +- [x] DB([paper]( https://arxiv.org/abs/1911.08947))(ppocr推荐) +- [x] EAST([paper](https://arxiv.org/abs/1704.03155)) +- [x] SAST([paper](https://arxiv.org/abs/1908.05498)) +- [x] PSENet([paper](https://arxiv.org/abs/1903.12473v2)) 在ICDAR2015文本检测公开数据集上,算法效果如下: + |模型|骨干网络|precision|recall|Hmean|下载链接| | --- | --- | --- | --- | --- | --- | |EAST|ResNet50_vd|85.80%|86.71%|86.25%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)| @@ -21,6 +23,8 @@ PaddleOCR开源的文本检测算法列表: |DB|ResNet50_vd|86.41%|78.72%|82.38%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|MobileNetV3|77.29%|73.08%|75.12%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)| +|PSE|ResNet50_vd|85.81%|79.53%|82.55%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_vd_pse_v2.0_train.tar)| +|PSE|MobileNetV3|82.20%|70.48%|75.89%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_mv3_pse_v2.0_train.tar)| 在Total-text文本检测公开数据集上,算法效果如下: @@ -39,15 +43,15 @@ PaddleOCR文本检测算法的训练和使用请参考文档教程中[模型训 ### 2.文本识别算法 PaddleOCR基于动态图开源的文本识别算法列表: -- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7](ppocr推荐) -- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10] -- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] -- [x] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] -- [x] SRN([paper](https://arxiv.org/abs/2003.12294))[5] +- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))(ppocr推荐) +- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085)) +- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) +- [x] RARE([paper](https://arxiv.org/abs/1603.03915v1)) +- [x] SRN([paper](https://arxiv.org/abs/2003.12294)) - [x] NRTR([paper](https://arxiv.org/abs/1806.00926v2)) - [x] SAR([paper](https://arxiv.org/abs/1811.00751v2)) -参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下: +参考[DTRB](https://arxiv.org/abs/1904.01906) 文字识别训练和评估流程,使用MJSynth和SynthText两个文字识别数据集训练,在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估,算法效果如下: |模型|骨干网络|Avg Accuracy|模型存储命名|下载链接| |---|---|---|---|---| diff --git a/doc/doc_ch/benchmark.md b/doc/doc_ch/benchmark.md index 520a2fcea35ef4bc19ae448517fbfcba61ed60b0..7ab829576e78aaf9296a67871e84f38aecb8bf80 100644 --- a/doc/doc_ch/benchmark.md +++ b/doc/doc_ch/benchmark.md @@ -12,40 +12,27 @@ ## 评估指标 说明: -- v1.0是未添加优化策略的DB+CRNN模型,v1.1是添加多种优化策略和方向分类器的PP-OCR模型。slim_v1.1是使用裁剪或量化的模型。 + - 检测输入图像的的长边尺寸是960。 -- 评估耗时阶段为图像输入到结果输出的完整阶段,包括了图像的预处理和后处理。 +- 评估耗时阶段为图像预测耗时,不包括图像的预处理和后处理。 - `Intel至强6148`为服务器端CPU型号,测试中使用Intel MKL-DNN 加速。 - `骁龙855`为移动端处理平台型号。 -不同预测模型大小和整体识别精度对比 +预测模型大小和整体识别精度对比 | 模型名称 | 整体模型
大小\(M\) | 检测模型
大小\(M\) | 方向分类器
模型大小\(M\) | 识别模型
大小\(M\) | 整体识别
F\-score | |:-:|:-:|:-:|:-:|:-:|:-:| -| ch\_ppocr\_mobile\_v1\.1 | 8\.1 | 2\.6 | 0\.9 | 4\.6 | 0\.5193 | -| ch\_ppocr\_server\_v1\.1 | 155\.1 | 47\.2 | 0\.9 | 107 | 0\.5414 | -| ch\_ppocr\_mobile\_v1\.0 | 8\.6 | 4\.1 | \- | 4\.5 | 0\.393 | -| ch\_ppocr\_server\_v1\.0 | 203\.8 | 98\.5 | \- | 105\.3 | 0\.4436 | - -不同预测模型在T4 GPU上预测速度对比,单位ms - -| 模型名称 | 整体 | 检测 | 方向分类器 | 识别 | -|:-:|:-:|:-:|:-:|:-:| -| ch\_ppocr\_mobile\_v1\.1 | 137 | 35 | 24 | 78 | -| ch\_ppocr\_server\_v1\.1 | 204 | 39 | 25 | 140 | -| ch\_ppocr\_mobile\_v1\.0 | 117 | 41 | \- | 76 | -| ch\_ppocr\_server\_v1\.0 | 199 | 52 | \- | 147 | +| PP-OCRv2 | 11\.6 | 3\.0 | 0\.9 | 8\.6 | 0\.5224 | +| PP-OCR mobile | 8\.1 | 2\.6 | 0\.9 | 4\.6 | 0\.503 | +| PP-OCR server | 155\.1 | 47\.2 | 0\.9 | 107 | 0\.570 | -不同预测模型在CPU上预测速度对比,单位ms -| 模型名称 | 整体 | 检测 | 方向分类器 | 识别 | -|:-:|:-:|:-:|:-:|:-:| -| ch\_ppocr\_mobile\_v1\.1 | 421 | 164 | 51 | 206 | -| ch\_ppocr\_mobile\_v1\.0 | 398 | 219 | \- | 179 | +预测模型在CPU和GPU上的速度对比,单位ms -裁剪量化模型和原始模型模型大小,整体识别精度和在SD 855上预测速度对比 +| 模型名称 | CPU | T4 GPU | +|:-:|:-:|:-:| +| PP-OCRv2 | 330 | 111 | +| PP-OCR mobile | 356 | 11 6| +| PP-OCR server | 1056 | 200 | -| 模型名称 | 整体模型
大小\(M\) | 检测模型
大小\(M\) | 方向分类器
模型大小\(M\) | 识别模型
大小\(M\) | 整体识别
F\-score | SD 855
\(ms\) | -|:-:|:-:|:-:|:-:|:-:|:-:|:-:| -| ch\_ppocr\_mobile\_v1\.1 | 8\.1 | 2\.6 | 0\.9 | 4\.6 | 0\.5193 | 306 | -| ch\_ppocr\_mobile\_slim\_v1\.1 | 3\.5 | 1\.4 | 0\.5 | 1\.6 | 0\.521 | 268 | +更多 PP-OCR 系列模型的预测指标可以参考[PP-OCR Benchamrk](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/doc/doc_ch/benchmark.md) diff --git a/doc/doc_ch/knowledge_distillation.md b/doc/doc_ch/knowledge_distillation.md index 5827f48c81d51a674011e2df40c798e0548fb0a1..b2772454d90ba40e5d65e035d083f8fcd79f69af 100644 --- a/doc/doc_ch/knowledge_distillation.md +++ b/doc/doc_ch/knowledge_distillation.md @@ -39,7 +39,7 @@ PaddleOCR中集成了知识蒸馏的算法,具体地,有以下几个主要 ### 2.1 识别配置文件解析 -配置文件在[ch_PP-OCRv2_rec.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml)。 +配置文件在[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml)。 #### 2.1.1 模型结构 @@ -246,6 +246,39 @@ Metric: 关于`DistillationMetric`更加具体的实现可以参考: [distillation_metric.py](../../ppocr/metrics/distillation_metric.py#L24)。 +#### 2.1.5 蒸馏模型微调 + +对蒸馏得到的识别蒸馏进行微调有2种方式。 + +(1)基于知识蒸馏的微调:这种情况比较简单,下载预训练模型,在[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml)中配置好预训练模型路径以及自己的数据路径,即可进行模型微调训练。 + +(2)微调时不使用知识蒸馏:这种情况,需要首先将预训练模型中的学生模型参数提取出来,具体步骤如下。 + +* 首先下载预训练模型并解压。 +```shell +# 下面预训练模型并解压 +wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar +tar -xf ch_PP-OCRv2_rec_train.tar +``` + +* 然后使用python,对其中的学生模型参数进行提取 + +```python +import paddle +# 加载预训练模型 +all_params = paddle.load("ch_PP-OCRv2_rec_train/best_accuracy.pdparams") +# 查看权重参数的keys +print(all_params.keys()) +# 学生模型的权重提取 +s_params = {key[len("Student."):]: all_params[key] for key in all_params if "Student." in key} +# 查看学生模型权重参数的keys +print(s_params.keys()) +# 保存 +paddle.save(s_params, "ch_PP-OCRv2_rec_train/student.pdparams") +``` + +转化完成之后,使用[ch_PP-OCRv2_rec.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml),修改预训练模型的路径(为导出的`student.pdparams`模型路径)以及自己的数据路径,即可进行模型微调。 + ### 2.2 检测配置文件解析 * coming soon! diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md index 05728153239ba1a52ae08c7e59b45340648612d5..df8a4ce3ef5fbcadb7ebdfd8ddf2bdf59637783e 100755 --- a/doc/doc_en/algorithm_overview_en.md +++ b/doc/doc_en/algorithm_overview_en.md @@ -11,9 +11,10 @@ This tutorial lists the text detection algorithms and text recognition algorithm ### 1. Text Detection Algorithm PaddleOCR open source text detection algorithms list: -- [x] EAST([paper](https://arxiv.org/abs/1704.03155))[2] -- [x] DB([paper](https://arxiv.org/abs/1911.08947))[1] -- [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4] +- [x] EAST([paper](https://arxiv.org/abs/1704.03155)) +- [x] DB([paper](https://arxiv.org/abs/1911.08947)) +- [x] SAST([paper](https://arxiv.org/abs/1908.05498)) +- [x] PSE([paper](https://arxiv.org/abs/1903.12473v2)) On the ICDAR2015 dataset, the text detection result is as follows: @@ -24,6 +25,8 @@ On the ICDAR2015 dataset, the text detection result is as follows: |DB|ResNet50_vd|86.41%|78.72%|82.38%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)| |DB|MobileNetV3|77.29%|73.08%|75.12%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)| |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)| +|PSE|ResNet50_vd|85.81%|79.53%|82.55%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_vd_pse_v2.0_train.tar)| +|PSE|MobileNetV3|82.20%|70.48%|75.89%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_mv3_pse_v2.0_train.tar)| On Total-Text dataset, the text detection result is as follows: @@ -41,11 +44,11 @@ For the training guide and use of PaddleOCR text detection algorithms, please re ### 2. Text Recognition Algorithm PaddleOCR open-source text recognition algorithms list: -- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7] -- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10] -- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] -- [x] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] -- [x] SRN([paper](https://arxiv.org/abs/2003.12294))[5] +- [x] CRNN([paper](https://arxiv.org/abs/1507.05717)) +- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085)) +- [x] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) +- [x] RARE([paper](https://arxiv.org/abs/1603.03915v1)) +- [x] SRN([paper](https://arxiv.org/abs/2003.12294)) - [x] NRTR([paper](https://arxiv.org/abs/1806.00926v2)) - [x] SAR([paper](https://arxiv.org/abs/1811.00751v2)) diff --git a/doc/doc_en/benchmark_en.md b/doc/doc_en/benchmark_en.md index 91b015941924add81f8b4f0d9d9ca13274348131..0d3ffaecc5bdffc4adeffdecf98b2978759cb4a5 100755 --- a/doc/doc_en/benchmark_en.md +++ b/doc/doc_en/benchmark_en.md @@ -13,7 +13,6 @@ We collected 300 images for different real application scenarios to evaluate the ## MEASUREMENT Explanation: -- v1.0 indicates DB+CRNN models without the strategies. v1.1 indicates the PP-OCR models with the strategies and the direction classify. slim_v1.1 indicates the PP-OCR models with prunner or quantization. - The long size of the input for the text detector is 960. @@ -27,30 +26,16 @@ Compares the model size and F-score: | Model Name | Model Size
of the
Whole System\(M\) | Model Size
of the Text
Detector\(M\) | Model Size
of the Direction
Classifier\(M\) | Model Size
of the Text
Recognizer \(M\) | F\-score | |:-:|:-:|:-:|:-:|:-:|:-:| -| ch\_ppocr\_mobile\_v1\.1 | 8\.1 | 2\.6 | 0\.9 | 4\.6 | 0\.5193 | -| ch\_ppocr\_server\_v1\.1 | 155\.1 | 47\.2 | 0\.9 | 107 | 0\.5414 | -| ch\_ppocr\_mobile\_v1\.0 | 8\.6 | 4\.1 | \- | 4\.5 | 0\.393 | -| ch\_ppocr\_server\_v1\.0 | 203\.8 | 98\.5 | \- | 105\.3 | 0\.4436 | +| PP-OCRv2 | 11\.6 | 3\.0 | 0\.9 | 8\.6 | 0\.5224 | +| PP-OCR mobile | 8\.1 | 2\.6 | 0\.9 | 4\.6 | 0\.503 | +| PP-OCR server | 155\.1 | 47\.2 | 0\.9 | 107 | 0\.570 | -Compares the time-consuming on T4 GPU (ms): +Compares the time-consuming on CPU and T4 GPU (ms): -| Model Name | Overall | Text Detector | Direction Classifier | Text Recognizer | -|:-:|:-:|:-:|:-:|:-:| -| ch\_ppocr\_mobile\_v1\.1 | 137 | 35 | 24 | 78 | -| ch\_ppocr\_server\_v1\.1 | 204 | 39 | 25 | 140 | -| ch\_ppocr\_mobile\_v1\.0 | 117 | 41 | \- | 76 | -| ch\_ppocr\_server\_v1\.0 | 199 | 52 | \- | 147 | +| Model Name | CPU | T4 GPU | +|:-:|:-:|:-:| +| PP-OCRv2 | 330 | 111 | +| PP-OCR mobile | 356 | 116| +| PP-OCR server | 1056 | 200 | -Compares the time-consuming on CPU (ms): - -| Model Name | Overall | Text Detector | Direction Classifier | Text Recognizer | -|:-:|:-:|:-:|:-:|:-:| -| ch\_ppocr\_mobile\_v1\.1 | 421 | 164 | 51 | 206 | -| ch\_ppocr\_mobile\_v1\.0 | 398 | 219 | \- | 179 | - -Compares the model size, F-score, the time-consuming on SD 855 of between the slim models and the original models: - -| Model Name | Model Size
of the
Whole System\(M\) | Model Size
of the Text
Detector\(M\) | Model Size
of the Direction
Classifier\(M\) | Model Size
of the Text
Recognizer \(M\) | F\-score | SD 855
\(ms\) | -|:-:|:-:|:-:|:-:|:-:|:-:|:-:| -| ch\_ppocr\_mobile\_v1\.1 | 8\.1 | 2\.6 | 0\.9 | 4\.6 | 0\.5193 | 306 | -| ch\_ppocr\_mobile\_slim\_v1\.1 | 3\.5 | 1\.4 | 0\.5 | 1\.6 | 0\.521 | 268 | +More indicators of PP-OCR series models can be referred to [PP-OCR Benchamrk](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/doc/doc_en/benchmark_en.md) diff --git a/ppocr/data/imaug/ColorJitter.py b/ppocr/data/imaug/ColorJitter.py new file mode 100644 index 0000000000000000000000000000000000000000..4b542abc8f9dc5af76529f9feb4bcb8b47b5f7d0 --- /dev/null +++ b/ppocr/data/imaug/ColorJitter.py @@ -0,0 +1,26 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddle.vision.transforms import ColorJitter as pp_ColorJitter + +__all__ = ['ColorJitter'] + +class ColorJitter(object): + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,**kwargs): + self.aug = pp_ColorJitter(brightness, contrast, saturation, hue) + + def __call__(self, data): + image = data['image'] + image = self.aug(image) + data['image'] = image + return data diff --git a/ppocr/data/imaug/__init__.py b/ppocr/data/imaug/__init__.py index 8bfc17508398224b86d7e9d9a03a250c266a54a8..5aaa1cd71eb791efa94e6bd812f3ab76632c96c6 100644 --- a/ppocr/data/imaug/__init__.py +++ b/ppocr/data/imaug/__init__.py @@ -19,11 +19,13 @@ from __future__ import unicode_literals from .iaa_augment import IaaAugment from .make_border_map import MakeBorderMap from .make_shrink_map import MakeShrinkMap -from .random_crop_data import EastRandomCropData, PSERandomCrop +from .random_crop_data import EastRandomCropData, RandomCropImgMask +from .make_pse_gt import MakePseGt from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg from .randaugment import RandAugment from .copy_paste import CopyPaste +from .ColorJitter import ColorJitter from .operators import * from .label_ops import * diff --git a/ppocr/data/imaug/label_ops.py b/ppocr/data/imaug/label_ops.py index 643ec70503fb0015e9f7a448d3a6cf9f99171493..d7b47a8ac8beac684192cd8245e519fd1f600e6b 100644 --- a/ppocr/data/imaug/label_ops.py +++ b/ppocr/data/imaug/label_ops.py @@ -174,21 +174,26 @@ class NRTRLabelEncode(BaseRecLabelEncode): super(NRTRLabelEncode, self).__init__(max_text_length, character_dict_path, character_type, use_space_char) + def __call__(self, data): text = data['label'] text = self.encode(text) if text is None: return None + if len(text) >= self.max_text_len - 1: + return None data['length'] = np.array(len(text)) text.insert(0, 2) text.append(3) text = text + [0] * (self.max_text_len - len(text)) data['label'] = np.array(text) return data + def add_special_char(self, dict_character): - dict_character = ['blank','','',''] + dict_character + dict_character = ['blank', '', '', ''] + dict_character return dict_character + class CTCLabelEncode(BaseRecLabelEncode): """ Convert between text-label and text-index """ @@ -588,7 +593,7 @@ class SARLabelEncode(BaseRecLabelEncode): data['length'] = np.array(len(text)) target = [self.start_idx] + text + [self.end_idx] padded_text = [self.padding_idx for _ in range(self.max_text_len)] - + padded_text[:len(target)] = target data['label'] = np.array(padded_text) return data diff --git a/ppocr/data/imaug/make_pse_gt.py b/ppocr/data/imaug/make_pse_gt.py new file mode 100644 index 0000000000000000000000000000000000000000..55abc8970784fd00843d2e91f259c58b65ae8579 --- /dev/null +++ b/ppocr/data/imaug/make_pse_gt.py @@ -0,0 +1,85 @@ +# -*- coding:utf-8 -*- + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import cv2 +import numpy as np +import pyclipper +from shapely.geometry import Polygon + +__all__ = ['MakePseGt'] + +class MakePseGt(object): + r''' + Making binary mask from detection data with ICDAR format. + Typically following the process of class `MakeICDARData`. + ''' + + def __init__(self, kernel_num=7, size=640, min_shrink_ratio=0.4, **kwargs): + self.kernel_num = kernel_num + self.min_shrink_ratio = min_shrink_ratio + self.size = size + + def __call__(self, data): + + image = data['image'] + text_polys = data['polys'] + ignore_tags = data['ignore_tags'] + + h, w, _ = image.shape + short_edge = min(h, w) + if short_edge < self.size: + # keep short_size >= self.size + scale = self.size / short_edge + image = cv2.resize(image, dsize=None, fx=scale, fy=scale) + text_polys *= scale + + gt_kernels = [] + for i in range(1,self.kernel_num+1): + # s1->sn, from big to small + rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1) * i + text_kernel, ignore_tags = self.generate_kernel(image.shape[0:2], rate, text_polys, ignore_tags) + gt_kernels.append(text_kernel) + + training_mask = np.ones(image.shape[0:2], dtype='uint8') + for i in range(text_polys.shape[0]): + if ignore_tags[i]: + cv2.fillPoly(training_mask, text_polys[i].astype(np.int32)[np.newaxis, :, :], 0) + + gt_kernels = np.array(gt_kernels) + gt_kernels[gt_kernels > 0] = 1 + + data['image'] = image + data['polys'] = text_polys + data['gt_kernels'] = gt_kernels[0:] + data['gt_text'] = gt_kernels[0] + data['mask'] = training_mask.astype('float32') + return data + + def generate_kernel(self, img_size, shrink_ratio, text_polys, ignore_tags=None): + h, w = img_size + text_kernel = np.zeros((h, w), dtype=np.float32) + for i, poly in enumerate(text_polys): + polygon = Polygon(poly) + distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / (polygon.length + 1e-6) + subject = [tuple(l) for l in poly] + pco = pyclipper.PyclipperOffset() + pco.AddPath(subject, pyclipper.JT_ROUND, + pyclipper.ET_CLOSEDPOLYGON) + shrinked = np.array(pco.Execute(-distance)) + + if len(shrinked) == 0 or shrinked.size == 0: + if ignore_tags is not None: + ignore_tags[i] = True + continue + try: + shrinked = np.array(shrinked[0]).reshape(-1, 2) + except: + if ignore_tags is not None: + ignore_tags[i] = True + continue + cv2.fillPoly(text_kernel, [shrinked.astype(np.int32)], i + 1) + return text_kernel, ignore_tags diff --git a/ppocr/data/imaug/random_crop_data.py b/ppocr/data/imaug/random_crop_data.py index 4d67cff61d6f340be6d80d8243c68909a94c4e88..7c1c25abb56a0cf7d4d59b8523962bd5d81c873a 100644 --- a/ppocr/data/imaug/random_crop_data.py +++ b/ppocr/data/imaug/random_crop_data.py @@ -164,47 +164,55 @@ class EastRandomCropData(object): return data -class PSERandomCrop(object): - def __init__(self, size, **kwargs): +class RandomCropImgMask(object): + def __init__(self, size, main_key, crop_keys, p=3 / 8, **kwargs): self.size = size + self.main_key = main_key + self.crop_keys = crop_keys + self.p = p def __call__(self, data): - imgs = data['imgs'] + image = data['image'] - h, w = imgs[0].shape[0:2] + h, w = image.shape[0:2] th, tw = self.size if w == tw and h == th: - return imgs + return data - # label中存在文本实例,并且按照概率进行裁剪,使用threshold_label_map控制 - if np.max(imgs[2]) > 0 and random.random() > 3 / 8: - # 文本实例的左上角点 - tl = np.min(np.where(imgs[2] > 0), axis=1) - self.size + mask = data[self.main_key] + if np.max(mask) > 0 and random.random() > self.p: + # make sure to crop the text region + tl = np.min(np.where(mask > 0), axis=1) - (th, tw) tl[tl < 0] = 0 - # 文本实例的右下角点 - br = np.max(np.where(imgs[2] > 0), axis=1) - self.size + br = np.max(np.where(mask > 0), axis=1) - (th, tw) br[br < 0] = 0 - # 保证选到右下角点时,有足够的距离进行crop + br[0] = min(br[0], h - th) br[1] = min(br[1], w - tw) - for _ in range(50000): - i = random.randint(tl[0], br[0]) - j = random.randint(tl[1], br[1]) - # 保证shrink_label_map有文本 - if imgs[1][i:i + th, j:j + tw].sum() <= 0: - continue - else: - break + i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0 + j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0 else: - i = random.randint(0, h - th) - j = random.randint(0, w - tw) + i = random.randint(0, h - th) if h - th > 0 else 0 + j = random.randint(0, w - tw) if w - tw > 0 else 0 # return i, j, th, tw - for idx in range(len(imgs)): - if len(imgs[idx].shape) == 3: - imgs[idx] = imgs[idx][i:i + th, j:j + tw, :] - else: - imgs[idx] = imgs[idx][i:i + th, j:j + tw] - data['imgs'] = imgs + for k in data: + if k in self.crop_keys: + if len(data[k].shape) == 3: + if np.argmin(data[k].shape) == 0: + img = data[k][:, i:i + th, j:j + tw] + if img.shape[1] != img.shape[2]: + a = 1 + elif np.argmin(data[k].shape) == 2: + img = data[k][i:i + th, j:j + tw, :] + if img.shape[1] != img.shape[0]: + a = 1 + else: + img = data[k] + else: + img = data[k][i:i + th, j:j + tw] + if img.shape[0] != img.shape[1]: + a = 1 + data[k] = img return data diff --git a/ppocr/data/imaug/rec_img_aug.py b/ppocr/data/imaug/rec_img_aug.py index 51f5855ac36c40f1808fdec4dc00c540792b15e7..2c6302386579df7024f6f8570870967b2483f283 100644 --- a/ppocr/data/imaug/rec_img_aug.py +++ b/ppocr/data/imaug/rec_img_aug.py @@ -44,12 +44,33 @@ class ClsResizeImg(object): class NRTRRecResizeImg(object): - def __init__(self, image_shape, resize_type, **kwargs): + def __init__(self, image_shape, resize_type, padding=False, **kwargs): self.image_shape = image_shape self.resize_type = resize_type + self.padding = padding def __call__(self, data): img = data['image'] + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + image_shape = self.image_shape + if self.padding: + imgC, imgH, imgW = image_shape + # todo: change to 0 and modified image shape + h = img.shape[0] + w = img.shape[1] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + norm_img = np.expand_dims(resized_image, -1) + norm_img = norm_img.transpose((2, 0, 1)) + resized_image = norm_img.astype(np.float32) / 128. - 1. + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + data['image'] = padding_im + return data if self.resize_type == 'PIL': image_pil = Image.fromarray(np.uint8(img)) img = image_pil.resize(self.image_shape, Image.ANTIALIAS) @@ -109,7 +130,8 @@ class SARRecResizeImg(object): def __call__(self, data): img = data['image'] - norm_img, resize_shape, pad_shape, valid_ratio = resize_norm_img_sar(img, self.image_shape, self.width_downsample_ratio) + norm_img, resize_shape, pad_shape, valid_ratio = resize_norm_img_sar( + img, self.image_shape, self.width_downsample_ratio) data['image'] = norm_img data['resized_shape'] = resize_shape data['pad_shape'] = pad_shape diff --git a/ppocr/data/simple_dataset.py b/ppocr/data/simple_dataset.py index e9c3394cbe930d5169ae005e7582a2902e697b7e..6a33e1342506f26ccaa4a146f3f02fadfbd741a2 100644 --- a/ppocr/data/simple_dataset.py +++ b/ppocr/data/simple_dataset.py @@ -15,7 +15,6 @@ import numpy as np import os import random from paddle.io import Dataset - from .imaug import transform, create_operators diff --git a/ppocr/losses/__init__.py b/ppocr/losses/__init__.py index 0484542f0b8c3cc46ce643d9f4577fe87c6346b7..467d7b6579cef5eca2532cd30df43169184967fc 100755 --- a/ppocr/losses/__init__.py +++ b/ppocr/losses/__init__.py @@ -20,6 +20,7 @@ import paddle.nn as nn from .det_db_loss import DBLoss from .det_east_loss import EASTLoss from .det_sast_loss import SASTLoss +from .det_pse_loss import PSELoss # rec loss from .rec_ctc_loss import CTCLoss @@ -42,10 +43,12 @@ from .combined_loss import CombinedLoss # table loss from .table_att_loss import TableAttentionLoss + def build_loss(config): support_dict = [ - 'DBLoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss', 'AttentionLoss', - 'SRNLoss', 'PGLoss', 'CombinedLoss', 'NRTRLoss', 'TableAttentionLoss', 'SARLoss' + 'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss', + 'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss', 'NRTRLoss', + 'TableAttentionLoss', 'SARLoss' ] config = copy.deepcopy(config) diff --git a/ppocr/losses/basic_loss.py b/ppocr/losses/basic_loss.py index 8306523ac1a933f0c664fc0b4cf077659cccdee3..d2ef5e5ac9692eec5bc30774c4451eab7706705d 100644 --- a/ppocr/losses/basic_loss.py +++ b/ppocr/losses/basic_loss.py @@ -56,31 +56,34 @@ class CELoss(nn.Layer): class KLJSLoss(object): def __init__(self, mode='kl'): - assert mode in ['kl', 'js', 'KL', 'JS'], "mode can only be one of ['kl', 'js', 'KL', 'JS']" + assert mode in ['kl', 'js', 'KL', 'JS' + ], "mode can only be one of ['kl', 'js', 'KL', 'JS']" self.mode = mode def __call__(self, p1, p2, reduction="mean"): - loss = paddle.multiply(p2, paddle.log( (p2+1e-5)/(p1+1e-5) + 1e-5)) + loss = paddle.multiply(p2, paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5)) if self.mode.lower() == "js": - loss += paddle.multiply(p1, paddle.log((p1+1e-5)/(p2+1e-5) + 1e-5)) + loss += paddle.multiply( + p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5)) loss *= 0.5 if reduction == "mean": - loss = paddle.mean(loss, axis=[1,2]) - elif reduction=="none" or reduction is None: - return loss + loss = paddle.mean(loss, axis=[1, 2]) + elif reduction == "none" or reduction is None: + return loss else: - loss = paddle.sum(loss, axis=[1,2]) + loss = paddle.sum(loss, axis=[1, 2]) + + return loss - return loss class DMLLoss(nn.Layer): """ DMLLoss """ - def __init__(self, act=None): + def __init__(self, act=None, use_log=False): super().__init__() if act is not None: assert act in ["softmax", "sigmoid"] @@ -90,20 +93,24 @@ class DMLLoss(nn.Layer): self.act = nn.Sigmoid() else: self.act = None - + + self.use_log = use_log + self.jskl_loss = KLJSLoss(mode="js") def forward(self, out1, out2): if self.act is not None: out1 = self.act(out1) out2 = self.act(out2) - if len(out1.shape) < 2: + if self.use_log: + # for recognition distillation, log is needed for feature map log_out1 = paddle.log(out1) log_out2 = paddle.log(out2) loss = (F.kl_div( log_out1, out2, reduction='batchmean') + F.kl_div( log_out2, out1, reduction='batchmean')) / 2.0 else: + # for detection distillation log is not needed loss = self.jskl_loss(out1, out2) return loss diff --git a/ppocr/losses/combined_loss.py b/ppocr/losses/combined_loss.py index 0d6fe968d0d7733200a4cfd21d779196cccaba03..f3bb36cf5ac751e6c27e4aa29a46fc5f913f7d05 100644 --- a/ppocr/losses/combined_loss.py +++ b/ppocr/losses/combined_loss.py @@ -49,11 +49,15 @@ class CombinedLoss(nn.Layer): loss = loss_func(input, batch, **kargs) if isinstance(loss, paddle.Tensor): loss = {"loss_{}_{}".format(str(loss), idx): loss} + weight = self.loss_weight[idx] - for key in loss.keys(): - if key == "loss": - loss_all += loss[key] * weight - else: - loss_dict["{}_{}".format(key, idx)] = loss[key] + + loss = {key: loss[key] * weight for key in loss} + + if "loss" in loss: + loss_all += loss["loss"] + else: + loss_all += paddle.add_n(list(loss.values())) + loss_dict.update(loss) loss_dict["loss"] = loss_all return loss_dict diff --git a/ppocr/losses/det_basic_loss.py b/ppocr/losses/det_basic_loss.py index eba5526dd2bd1c0328130b50817172df437cc360..7017236c284e55710f242275a413d56d32158d34 100644 --- a/ppocr/losses/det_basic_loss.py +++ b/ppocr/losses/det_basic_loss.py @@ -75,12 +75,6 @@ class BalanceLoss(nn.Layer): mask (variable): masked maps. return: (variable) balanced loss """ - # if self.main_loss_type in ['DiceLoss']: - # # For the loss that returns to scalar value, perform ohem on the mask - # mask = ohem_batch(pred, gt, mask, self.negative_ratio) - # loss = self.loss(pred, gt, mask) - # return loss - positive = gt * mask negative = (1 - gt) * mask @@ -153,53 +147,4 @@ class BCELoss(nn.Layer): def forward(self, input, label, mask=None, weight=None, name=None): loss = F.binary_cross_entropy(input, label, reduction=self.reduction) - return loss - - -def ohem_single(score, gt_text, training_mask, ohem_ratio): - pos_num = (int)(np.sum(gt_text > 0.5)) - ( - int)(np.sum((gt_text > 0.5) & (training_mask <= 0.5))) - - if pos_num == 0: - # selected_mask = gt_text.copy() * 0 # may be not good - selected_mask = training_mask - selected_mask = selected_mask.reshape( - 1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') - return selected_mask - - neg_num = (int)(np.sum(gt_text <= 0.5)) - neg_num = (int)(min(pos_num * ohem_ratio, neg_num)) - - if neg_num == 0: - selected_mask = training_mask - selected_mask = selected_mask.reshape( - 1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') - return selected_mask - - neg_score = score[gt_text <= 0.5] - # 将负样本得分从高到低排序 - neg_score_sorted = np.sort(-neg_score) - threshold = -neg_score_sorted[neg_num - 1] - # 选出 得分高的 负样本 和正样本 的 mask - selected_mask = ((score >= threshold) | - (gt_text > 0.5)) & (training_mask > 0.5) - selected_mask = selected_mask.reshape( - 1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32') - return selected_mask - - -def ohem_batch(scores, gt_texts, training_masks, ohem_ratio): - scores = scores.numpy() - gt_texts = gt_texts.numpy() - training_masks = training_masks.numpy() - - selected_masks = [] - for i in range(scores.shape[0]): - selected_masks.append( - ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[ - i, :, :], ohem_ratio)) - - selected_masks = np.concatenate(selected_masks, 0) - selected_masks = paddle.to_tensor(selected_masks) - - return selected_masks + return loss \ No newline at end of file diff --git a/ppocr/losses/det_pse_loss.py b/ppocr/losses/det_pse_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..78423091f841f29b1217f73f79beb26fe1575844 --- /dev/null +++ b/ppocr/losses/det_pse_loss.py @@ -0,0 +1,145 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddle import nn +from paddle.nn import functional as F +import numpy as np +from ppocr.utils.iou import iou + + +class PSELoss(nn.Layer): + def __init__(self, + alpha, + ohem_ratio=3, + kernel_sample_mask='pred', + reduction='sum', + eps=1e-6, + **kwargs): + """Implement PSE Loss. + """ + super(PSELoss, self).__init__() + assert reduction in ['sum', 'mean', 'none'] + self.alpha = alpha + self.ohem_ratio = ohem_ratio + self.kernel_sample_mask = kernel_sample_mask + self.reduction = reduction + self.eps = eps + + def forward(self, outputs, labels): + predicts = outputs['maps'] + predicts = F.interpolate(predicts, scale_factor=4) + + texts = predicts[:, 0, :, :] + kernels = predicts[:, 1:, :, :] + gt_texts, gt_kernels, training_masks = labels[1:] + + # text loss + selected_masks = self.ohem_batch(texts, gt_texts, training_masks) + + loss_text = self.dice_loss(texts, gt_texts, selected_masks) + iou_text = iou((texts > 0).astype('int64'), + gt_texts, + training_masks, + reduce=False) + losses = dict(loss_text=loss_text, iou_text=iou_text) + + # kernel loss + loss_kernels = [] + if self.kernel_sample_mask == 'gt': + selected_masks = gt_texts * training_masks + elif self.kernel_sample_mask == 'pred': + selected_masks = ( + F.sigmoid(texts) > 0.5).astype('float32') * training_masks + + for i in range(kernels.shape[1]): + kernel_i = kernels[:, i, :, :] + gt_kernel_i = gt_kernels[:, i, :, :] + loss_kernel_i = self.dice_loss(kernel_i, gt_kernel_i, + selected_masks) + loss_kernels.append(loss_kernel_i) + loss_kernels = paddle.mean(paddle.stack(loss_kernels, axis=1), axis=1) + iou_kernel = iou((kernels[:, -1, :, :] > 0).astype('int64'), + gt_kernels[:, -1, :, :], + training_masks * gt_texts, + reduce=False) + losses.update(dict(loss_kernels=loss_kernels, iou_kernel=iou_kernel)) + loss = self.alpha * loss_text + (1 - self.alpha) * loss_kernels + losses['loss'] = loss + if self.reduction == 'sum': + losses = {x: paddle.sum(v) for x, v in losses.items()} + elif self.reduction == 'mean': + losses = {x: paddle.mean(v) for x, v in losses.items()} + return losses + + def dice_loss(self, input, target, mask): + input = F.sigmoid(input) + + input = input.reshape([input.shape[0], -1]) + target = target.reshape([target.shape[0], -1]) + mask = mask.reshape([mask.shape[0], -1]) + + input = input * mask + target = target * mask + + a = paddle.sum(input * target, 1) + b = paddle.sum(input * input, 1) + self.eps + c = paddle.sum(target * target, 1) + self.eps + d = (2 * a) / (b + c) + return 1 - d + + def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3): + pos_num = int(paddle.sum((gt_text > 0.5).astype('float32'))) - int( + paddle.sum( + paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5)) + .astype('float32'))) + + if pos_num == 0: + selected_mask = training_mask + selected_mask = selected_mask.reshape( + [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( + 'float32') + return selected_mask + + neg_num = int(paddle.sum((gt_text <= 0.5).astype('float32'))) + neg_num = int(min(pos_num * ohem_ratio, neg_num)) + + if neg_num == 0: + selected_mask = training_mask + selected_mask = selected_mask.view( + 1, selected_mask.shape[0], + selected_mask.shape[1]).astype('float32') + return selected_mask + + neg_score = paddle.masked_select(score, gt_text <= 0.5) + neg_score_sorted = paddle.sort(-neg_score) + threshold = -neg_score_sorted[neg_num - 1] + + selected_mask = paddle.logical_and( + paddle.logical_or((score >= threshold), (gt_text > 0.5)), + (training_mask > 0.5)) + selected_mask = selected_mask.reshape( + [1, selected_mask.shape[0], selected_mask.shape[1]]).astype( + 'float32') + return selected_mask + + def ohem_batch(self, scores, gt_texts, training_masks, ohem_ratio=3): + selected_masks = [] + for i in range(scores.shape[0]): + selected_masks.append( + self.ohem_single(scores[i, :, :], gt_texts[i, :, :], + training_masks[i, :, :], ohem_ratio)) + + selected_masks = paddle.concat(selected_masks, 0).astype('float32') + return selected_masks diff --git a/ppocr/losses/distillation_loss.py b/ppocr/losses/distillation_loss.py index 75f0a773152e52c98ada5c1907f1c8cc2f72d8f3..73d3ae2ad2499607f897a102f6ea25e4cb7f297f 100644 --- a/ppocr/losses/distillation_loss.py +++ b/ppocr/losses/distillation_loss.py @@ -44,20 +44,22 @@ class DistillationDMLLoss(DMLLoss): def __init__(self, model_name_pairs=[], act=None, + use_log=False, key=None, maps_name=None, name="dml"): - super().__init__(act=act) + super().__init__(act=act, use_log=use_log) assert isinstance(model_name_pairs, list) self.key = key self.model_name_pairs = self._check_model_name_pairs(model_name_pairs) self.name = name self.maps_name = self._check_maps_name(maps_name) - + def _check_model_name_pairs(self, model_name_pairs): if not isinstance(model_name_pairs, list): return [] - elif isinstance(model_name_pairs[0], list) and isinstance(model_name_pairs[0][0], str): + elif isinstance(model_name_pairs[0], list) and isinstance( + model_name_pairs[0][0], str): return model_name_pairs else: return [model_name_pairs] @@ -112,9 +114,9 @@ class DistillationDMLLoss(DMLLoss): loss_dict["{}_{}_{}_{}_{}".format(key, pair[ 0], pair[1], map_name, idx)] = loss[key] else: - loss_dict["{}_{}_{}".format(self.name, self.maps_name[_c], - idx)] = loss - + loss_dict["{}_{}_{}".format(self.name, self.maps_name[ + _c], idx)] = loss + loss_dict = _sum_loss(loss_dict) return loss_dict diff --git a/ppocr/metrics/eval_det_iou.py b/ppocr/metrics/eval_det_iou.py index 0e32b2d19281de9a18a1fe0343bd7e8237825b7b..bc05e7df7d1d21abfb9d9fbd224ecd7254d9f393 100644 --- a/ppocr/metrics/eval_det_iou.py +++ b/ppocr/metrics/eval_det_iou.py @@ -169,21 +169,10 @@ class DetectionIoUEvaluator(object): numGlobalCareDet += numDetCare perSampleMetrics = { - 'precision': precision, - 'recall': recall, - 'hmean': hmean, - 'pairs': pairs, - 'iouMat': [] if len(detPols) > 100 else iouMat.tolist(), - 'gtPolPoints': gtPolPoints, - 'detPolPoints': detPolPoints, 'gtCare': numGtCare, 'detCare': numDetCare, - 'gtDontCare': gtDontCarePolsNum, - 'detDontCare': detDontCarePolsNum, 'detMatched': detMatched, - 'evaluationLog': evaluationLog } - return perSampleMetrics def combine_results(self, results): diff --git a/ppocr/modeling/backbones/rec_nrtr_mtb.py b/ppocr/modeling/backbones/rec_nrtr_mtb.py index 04b5c9bb5fdff448fbf7ad366bc39bf0e3ebfe6b..22e02a6371c3ff8b28fd88b5cfa1087309d551f8 100644 --- a/ppocr/modeling/backbones/rec_nrtr_mtb.py +++ b/ppocr/modeling/backbones/rec_nrtr_mtb.py @@ -13,6 +13,7 @@ # limitations under the License. from paddle import nn +import paddle class MTB(nn.Layer): @@ -40,7 +41,8 @@ class MTB(nn.Layer): x = self.block(images) if self.cnn_num == 2: # (b, w, h, c) - x = x.transpose([0, 3, 2, 1]) - x_shape = x.shape - x = x.reshape([x_shape[0], x_shape[1], x_shape[2] * x_shape[3]]) + x = paddle.transpose(x, [0, 3, 2, 1]) + x_shape = paddle.shape(x) + x = paddle.reshape( + x, [x_shape[0], x_shape[1], x_shape[2] * x_shape[3]]) return x diff --git a/ppocr/modeling/heads/__init__.py b/ppocr/modeling/heads/__init__.py index 80311f92a6c0c7a67673b90ee19ff5d8778ac0e8..20eb150fc3f91c3abd9a3fd8ce655c24ccab8179 100755 --- a/ppocr/modeling/heads/__init__.py +++ b/ppocr/modeling/heads/__init__.py @@ -20,6 +20,7 @@ def build_head(config): from .det_db_head import DBHead from .det_east_head import EASTHead from .det_sast_head import SASTHead + from .det_pse_head import PSEHead from .e2e_pg_head import PGHead # rec head @@ -32,8 +33,9 @@ def build_head(config): # cls head from .cls_head import ClsHead support_dict = [ - 'DBHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead', 'AttentionHead', - 'SRNHead', 'PGHead', 'Transformer', 'TableAttentionHead', 'SARHead' + 'DBHead', 'PSEHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead', + 'AttentionHead', 'SRNHead', 'PGHead', 'Transformer', + 'TableAttentionHead', 'SARHead' ] #table head diff --git a/ppocr/modeling/heads/det_pse_head.py b/ppocr/modeling/heads/det_pse_head.py new file mode 100644 index 0000000000000000000000000000000000000000..db800f57a216ab437b724988ce692a9ac0c545d9 --- /dev/null +++ b/ppocr/modeling/heads/det_pse_head.py @@ -0,0 +1,35 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddle import nn + + +class PSEHead(nn.Layer): + def __init__(self, + in_channels, + hidden_dim=256, + out_channels=7, + **kwargs): + super(PSEHead, self).__init__() + self.conv1 = nn.Conv2D(in_channels, hidden_dim, kernel_size=3, stride=1, padding=1) + self.bn1 = nn.BatchNorm2D(hidden_dim) + self.relu1 = nn.ReLU() + + self.conv2 = nn.Conv2D(hidden_dim, out_channels, kernel_size=1, stride=1, padding=0) + + + def forward(self, x, **kwargs): + out = self.conv1(x) + out = self.relu1(self.bn1(out)) + out = self.conv2(out) + return {'maps': out} diff --git a/ppocr/modeling/heads/multiheadAttention.py b/ppocr/modeling/heads/multiheadAttention.py index 651d4f577d2f5d1c11e36f90d1c7fea5fc3ab86e..900865ba1a8d80a108b3247ce1aff91c242860f2 100755 --- a/ppocr/modeling/heads/multiheadAttention.py +++ b/ppocr/modeling/heads/multiheadAttention.py @@ -71,8 +71,6 @@ class MultiheadAttention(nn.Layer): value, key_padding_mask=None, incremental_state=None, - need_weights=True, - static_kv=False, attn_mask=None): """ Inputs of forward function @@ -88,46 +86,42 @@ class MultiheadAttention(nn.Layer): attn_output: [target length, batch size, embed dim] attn_output_weights: [batch size, target length, sequence length] """ - tgt_len, bsz, embed_dim = query.shape - assert embed_dim == self.embed_dim - assert list(query.shape) == [tgt_len, bsz, embed_dim] - assert key.shape == value.shape - + q_shape = paddle.shape(query) + src_shape = paddle.shape(key) q = self._in_proj_q(query) k = self._in_proj_k(key) v = self._in_proj_v(value) q *= self.scaling - - q = q.reshape([tgt_len, bsz * self.num_heads, self.head_dim]).transpose( - [1, 0, 2]) - k = k.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose( - [1, 0, 2]) - v = v.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose( - [1, 0, 2]) - - src_len = k.shape[1] - + q = paddle.transpose( + paddle.reshape( + q, [q_shape[0], q_shape[1], self.num_heads, self.head_dim]), + [1, 2, 0, 3]) + k = paddle.transpose( + paddle.reshape( + k, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]), + [1, 2, 0, 3]) + v = paddle.transpose( + paddle.reshape( + v, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]), + [1, 2, 0, 3]) if key_padding_mask is not None: - assert key_padding_mask.shape[0] == bsz - assert key_padding_mask.shape[1] == src_len - - attn_output_weights = paddle.bmm(q, k.transpose([0, 2, 1])) - assert list(attn_output_weights. - shape) == [bsz * self.num_heads, tgt_len, src_len] - + assert key_padding_mask.shape[0] == q_shape[1] + assert key_padding_mask.shape[1] == src_shape[0] + attn_output_weights = paddle.matmul(q, + paddle.transpose(k, [0, 1, 3, 2])) if attn_mask is not None: - attn_mask = attn_mask.unsqueeze(0) + attn_mask = paddle.unsqueeze(paddle.unsqueeze(attn_mask, 0), 0) attn_output_weights += attn_mask if key_padding_mask is not None: - attn_output_weights = attn_output_weights.reshape( - [bsz, self.num_heads, tgt_len, src_len]) - key = key_padding_mask.unsqueeze(1).unsqueeze(2).astype('float32') - y = paddle.full(shape=key.shape, dtype='float32', fill_value='-inf') + attn_output_weights = paddle.reshape( + attn_output_weights, + [q_shape[1], self.num_heads, q_shape[0], src_shape[0]]) + key = paddle.unsqueeze(paddle.unsqueeze(key_padding_mask, 1), 2) + key = paddle.cast(key, 'float32') + y = paddle.full( + shape=paddle.shape(key), dtype='float32', fill_value='-inf') y = paddle.where(key == 0., key, y) attn_output_weights += y - attn_output_weights = attn_output_weights.reshape( - [bsz * self.num_heads, tgt_len, src_len]) - attn_output_weights = F.softmax( attn_output_weights.astype('float32'), axis=-1, @@ -136,43 +130,34 @@ class MultiheadAttention(nn.Layer): attn_output_weights = F.dropout( attn_output_weights, p=self.dropout, training=self.training) - attn_output = paddle.bmm(attn_output_weights, v) - assert list(attn_output. - shape) == [bsz * self.num_heads, tgt_len, self.head_dim] - attn_output = attn_output.transpose([1, 0, 2]).reshape( - [tgt_len, bsz, embed_dim]) + attn_output = paddle.matmul(attn_output_weights, v) + attn_output = paddle.reshape( + paddle.transpose(attn_output, [2, 0, 1, 3]), + [q_shape[0], q_shape[1], self.embed_dim]) attn_output = self.out_proj(attn_output) - if need_weights: - # average attention weights over heads - attn_output_weights = attn_output_weights.reshape( - [bsz, self.num_heads, tgt_len, src_len]) - attn_output_weights = attn_output_weights.sum( - axis=1) / self.num_heads - else: - attn_output_weights = None - return attn_output, attn_output_weights + return attn_output def _in_proj_q(self, query): - query = query.transpose([1, 2, 0]) + query = paddle.transpose(query, [1, 2, 0]) query = paddle.unsqueeze(query, axis=2) res = self.conv1(query) res = paddle.squeeze(res, axis=2) - res = res.transpose([2, 0, 1]) + res = paddle.transpose(res, [2, 0, 1]) return res def _in_proj_k(self, key): - key = key.transpose([1, 2, 0]) + key = paddle.transpose(key, [1, 2, 0]) key = paddle.unsqueeze(key, axis=2) res = self.conv2(key) res = paddle.squeeze(res, axis=2) - res = res.transpose([2, 0, 1]) + res = paddle.transpose(res, [2, 0, 1]) return res def _in_proj_v(self, value): - value = value.transpose([1, 2, 0]) #(1, 2, 0) + value = paddle.transpose(value, [1, 2, 0]) #(1, 2, 0) value = paddle.unsqueeze(value, axis=2) res = self.conv3(value) res = paddle.squeeze(res, axis=2) - res = res.transpose([2, 0, 1]) + res = paddle.transpose(res, [2, 0, 1]) return res diff --git a/ppocr/modeling/heads/rec_nrtr_head.py b/ppocr/modeling/heads/rec_nrtr_head.py index 05dba677b4109897b6a20888151e680e652d6741..38ba0c917840ea7d1e2a3c2bf0da32c2c35f2b40 100644 --- a/ppocr/modeling/heads/rec_nrtr_head.py +++ b/ppocr/modeling/heads/rec_nrtr_head.py @@ -61,12 +61,12 @@ class Transformer(nn.Layer): custom_decoder=None, in_channels=0, out_channels=0, - dst_vocab_size=99, scale_embedding=True): super(Transformer, self).__init__() + self.out_channels = out_channels + 1 self.embedding = Embeddings( d_model=d_model, - vocab=dst_vocab_size, + vocab=self.out_channels, padding_idx=0, scale_embedding=scale_embedding) self.positional_encoding = PositionalEncoding( @@ -96,9 +96,10 @@ class Transformer(nn.Layer): self.beam_size = beam_size self.d_model = d_model self.nhead = nhead - self.tgt_word_prj = nn.Linear(d_model, dst_vocab_size, bias_attr=False) + self.tgt_word_prj = nn.Linear( + d_model, self.out_channels, bias_attr=False) w0 = np.random.normal(0.0, d_model**-0.5, - (d_model, dst_vocab_size)).astype(np.float32) + (d_model, self.out_channels)).astype(np.float32) self.tgt_word_prj.weight.set_value(w0) self.apply(self._init_weights) @@ -156,46 +157,41 @@ class Transformer(nn.Layer): return self.forward_test(src) def forward_test(self, src): - bs = src.shape[0] + bs = paddle.shape(src)[0] if self.encoder is not None: - src = self.positional_encoding(src.transpose([1, 0, 2])) + src = self.positional_encoding(paddle.transpose(src, [1, 0, 2])) memory = self.encoder(src) else: - memory = src.squeeze(2).transpose([2, 0, 1]) + memory = paddle.transpose(paddle.squeeze(src, 2), [2, 0, 1]) dec_seq = paddle.full((bs, 1), 2, dtype=paddle.int64) + dec_prob = paddle.full((bs, 1), 1., dtype=paddle.float32) for len_dec_seq in range(1, 25): - src_enc = memory.clone() - tgt_key_padding_mask = self.generate_padding_mask(dec_seq) - dec_seq_embed = self.embedding(dec_seq).transpose([1, 0, 2]) + dec_seq_embed = paddle.transpose(self.embedding(dec_seq), [1, 0, 2]) dec_seq_embed = self.positional_encoding(dec_seq_embed) - tgt_mask = self.generate_square_subsequent_mask(dec_seq_embed.shape[ - 0]) + tgt_mask = self.generate_square_subsequent_mask( + paddle.shape(dec_seq_embed)[0]) output = self.decoder( dec_seq_embed, - src_enc, + memory, tgt_mask=tgt_mask, memory_mask=None, - tgt_key_padding_mask=tgt_key_padding_mask, + tgt_key_padding_mask=None, memory_key_padding_mask=None) - dec_output = output.transpose([1, 0, 2]) - - dec_output = dec_output[:, - -1, :] # Pick the last step: (bh * bm) * d_h - word_prob = F.log_softmax(self.tgt_word_prj(dec_output), axis=1) - word_prob = word_prob.reshape([1, bs, -1]) - preds_idx = word_prob.argmax(axis=2) - + dec_output = paddle.transpose(output, [1, 0, 2]) + dec_output = dec_output[:, -1, :] + word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1) + preds_idx = paddle.argmax(word_prob, axis=1) if paddle.equal_all( - preds_idx[-1], + preds_idx, paddle.full( - preds_idx[-1].shape, 3, dtype='int64')): + paddle.shape(preds_idx), 3, dtype='int64')): break - - preds_prob = word_prob.max(axis=2) + preds_prob = paddle.max(word_prob, axis=1) dec_seq = paddle.concat( - [dec_seq, preds_idx.reshape([-1, 1])], axis=1) - - return dec_seq + [dec_seq, paddle.reshape(preds_idx, [-1, 1])], axis=1) + dec_prob = paddle.concat( + [dec_prob, paddle.reshape(preds_prob, [-1, 1])], axis=1) + return [dec_seq, dec_prob] def forward_beam(self, images): ''' Translation work in one batch ''' @@ -211,14 +207,15 @@ class Transformer(nn.Layer): n_prev_active_inst, n_bm): ''' Collect tensor parts associated to active instances. ''' - _, *d_hs = beamed_tensor.shape + beamed_tensor_shape = paddle.shape(beamed_tensor) n_curr_active_inst = len(curr_active_inst_idx) - new_shape = (n_curr_active_inst * n_bm, *d_hs) + new_shape = (n_curr_active_inst * n_bm, beamed_tensor_shape[1], + beamed_tensor_shape[2]) beamed_tensor = beamed_tensor.reshape([n_prev_active_inst, -1]) beamed_tensor = beamed_tensor.index_select( - paddle.to_tensor(curr_active_inst_idx), axis=0) - beamed_tensor = beamed_tensor.reshape([*new_shape]) + curr_active_inst_idx, axis=0) + beamed_tensor = beamed_tensor.reshape(new_shape) return beamed_tensor @@ -249,44 +246,26 @@ class Transformer(nn.Layer): b.get_current_state() for b in inst_dec_beams if not b.done ] dec_partial_seq = paddle.stack(dec_partial_seq) - dec_partial_seq = dec_partial_seq.reshape([-1, len_dec_seq]) return dec_partial_seq - def prepare_beam_memory_key_padding_mask( - inst_dec_beams, memory_key_padding_mask, n_bm): - keep = [] - for idx in (memory_key_padding_mask): - if not inst_dec_beams[idx].done: - keep.append(idx) - memory_key_padding_mask = memory_key_padding_mask[ - paddle.to_tensor(keep)] - len_s = memory_key_padding_mask.shape[-1] - n_inst = memory_key_padding_mask.shape[0] - memory_key_padding_mask = paddle.concat( - [memory_key_padding_mask for i in range(n_bm)], axis=1) - memory_key_padding_mask = memory_key_padding_mask.reshape( - [n_inst * n_bm, len_s]) #repeat(1, n_bm) - return memory_key_padding_mask - def predict_word(dec_seq, enc_output, n_active_inst, n_bm, memory_key_padding_mask): - tgt_key_padding_mask = self.generate_padding_mask(dec_seq) - dec_seq = self.embedding(dec_seq).transpose([1, 0, 2]) + dec_seq = paddle.transpose(self.embedding(dec_seq), [1, 0, 2]) dec_seq = self.positional_encoding(dec_seq) - tgt_mask = self.generate_square_subsequent_mask(dec_seq.shape[ - 0]) + tgt_mask = self.generate_square_subsequent_mask( + paddle.shape(dec_seq)[0]) dec_output = self.decoder( dec_seq, enc_output, tgt_mask=tgt_mask, - tgt_key_padding_mask=tgt_key_padding_mask, - memory_key_padding_mask=memory_key_padding_mask, - ).transpose([1, 0, 2]) + tgt_key_padding_mask=None, + memory_key_padding_mask=memory_key_padding_mask, ) + dec_output = paddle.transpose(dec_output, [1, 0, 2]) dec_output = dec_output[:, -1, :] # Pick the last step: (bh * bm) * d_h - word_prob = F.log_softmax(self.tgt_word_prj(dec_output), axis=1) - word_prob = word_prob.reshape([n_active_inst, n_bm, -1]) + word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1) + word_prob = paddle.reshape(word_prob, [n_active_inst, n_bm, -1]) return word_prob def collect_active_inst_idx_list(inst_beams, word_prob, @@ -302,9 +281,8 @@ class Transformer(nn.Layer): n_active_inst = len(inst_idx_to_position_map) dec_seq = prepare_beam_dec_seq(inst_dec_beams, len_dec_seq) - memory_key_padding_mask = None word_prob = predict_word(dec_seq, enc_output, n_active_inst, n_bm, - memory_key_padding_mask) + None) # Update the beam with predicted word prob information and collect incomplete instances active_inst_idx_list = collect_active_inst_idx_list( inst_dec_beams, word_prob, inst_idx_to_position_map) @@ -324,27 +302,21 @@ class Transformer(nn.Layer): with paddle.no_grad(): #-- Encode - if self.encoder is not None: src = self.positional_encoding(images.transpose([1, 0, 2])) - src_enc = self.encoder(src).transpose([1, 0, 2]) + src_enc = self.encoder(src) else: src_enc = images.squeeze(2).transpose([0, 2, 1]) - #-- Repeat data for beam search n_bm = self.beam_size - n_inst, len_s, d_h = src_enc.shape - src_enc = paddle.concat([src_enc for i in range(n_bm)], axis=1) - src_enc = src_enc.reshape([n_inst * n_bm, len_s, d_h]).transpose( - [1, 0, 2]) - #-- Prepare beams - inst_dec_beams = [Beam(n_bm) for _ in range(n_inst)] - - #-- Bookkeeping for active or not - active_inst_idx_list = list(range(n_inst)) + src_shape = paddle.shape(src_enc) + inst_dec_beams = [Beam(n_bm) for _ in range(1)] + active_inst_idx_list = list(range(1)) + # Repeat data for beam search + src_enc = paddle.tile(src_enc, [1, n_bm, 1]) inst_idx_to_position_map = get_inst_idx_to_tensor_position_map( active_inst_idx_list) - #-- Decode + # Decode for len_dec_seq in range(1, 25): src_enc_copy = src_enc.clone() active_inst_idx_list = beam_decode_step( @@ -358,10 +330,19 @@ class Transformer(nn.Layer): batch_hyp, batch_scores = collect_hypothesis_and_scores(inst_dec_beams, 1) result_hyp = [] - for bs_hyp in batch_hyp: - bs_hyp_pad = bs_hyp[0] + [3] * (25 - len(bs_hyp[0])) + hyp_scores = [] + for bs_hyp, score in zip(batch_hyp, batch_scores): + l = len(bs_hyp[0]) + bs_hyp_pad = bs_hyp[0] + [3] * (25 - l) result_hyp.append(bs_hyp_pad) - return paddle.to_tensor(np.array(result_hyp), dtype=paddle.int64) + score = float(score) / l + hyp_score = [score for _ in range(25)] + hyp_scores.append(hyp_score) + return [ + paddle.to_tensor( + np.array(result_hyp), dtype=paddle.int64), + paddle.to_tensor(hyp_scores) + ] def generate_square_subsequent_mask(self, sz): """Generate a square mask for the sequence. The masked positions are filled with float('-inf'). @@ -376,7 +357,7 @@ class Transformer(nn.Layer): return mask def generate_padding_mask(self, x): - padding_mask = x.equal(paddle.to_tensor(0, dtype=x.dtype)) + padding_mask = paddle.equal(x, paddle.to_tensor(0, dtype=x.dtype)) return padding_mask def _reset_parameters(self): @@ -514,17 +495,17 @@ class TransformerEncoderLayer(nn.Layer): src, src, attn_mask=src_mask, - key_padding_mask=src_key_padding_mask)[0] + key_padding_mask=src_key_padding_mask) src = src + self.dropout1(src2) src = self.norm1(src) - src = src.transpose([1, 2, 0]) + src = paddle.transpose(src, [1, 2, 0]) src = paddle.unsqueeze(src, 2) src2 = self.conv2(F.relu(self.conv1(src))) src2 = paddle.squeeze(src2, 2) - src2 = src2.transpose([2, 0, 1]) + src2 = paddle.transpose(src2, [2, 0, 1]) src = paddle.squeeze(src, 2) - src = src.transpose([2, 0, 1]) + src = paddle.transpose(src, [2, 0, 1]) src = src + self.dropout2(src2) src = self.norm2(src) @@ -598,7 +579,7 @@ class TransformerDecoderLayer(nn.Layer): tgt, tgt, attn_mask=tgt_mask, - key_padding_mask=tgt_key_padding_mask)[0] + key_padding_mask=tgt_key_padding_mask) tgt = tgt + self.dropout1(tgt2) tgt = self.norm1(tgt) tgt2 = self.multihead_attn( @@ -606,18 +587,18 @@ class TransformerDecoderLayer(nn.Layer): memory, memory, attn_mask=memory_mask, - key_padding_mask=memory_key_padding_mask)[0] + key_padding_mask=memory_key_padding_mask) tgt = tgt + self.dropout2(tgt2) tgt = self.norm2(tgt) # default - tgt = tgt.transpose([1, 2, 0]) + tgt = paddle.transpose(tgt, [1, 2, 0]) tgt = paddle.unsqueeze(tgt, 2) tgt2 = self.conv2(F.relu(self.conv1(tgt))) tgt2 = paddle.squeeze(tgt2, 2) - tgt2 = tgt2.transpose([2, 0, 1]) + tgt2 = paddle.transpose(tgt2, [2, 0, 1]) tgt = paddle.squeeze(tgt, 2) - tgt = tgt.transpose([2, 0, 1]) + tgt = paddle.transpose(tgt, [2, 0, 1]) tgt = tgt + self.dropout3(tgt2) tgt = self.norm3(tgt) @@ -656,8 +637,8 @@ class PositionalEncoding(nn.Layer): (-math.log(10000.0) / dim)) pe[:, 0::2] = paddle.sin(position * div_term) pe[:, 1::2] = paddle.cos(position * div_term) - pe = pe.unsqueeze(0) - pe = pe.transpose([1, 0, 2]) + pe = paddle.unsqueeze(pe, 0) + pe = paddle.transpose(pe, [1, 0, 2]) self.register_buffer('pe', pe) def forward(self, x): @@ -670,7 +651,7 @@ class PositionalEncoding(nn.Layer): Examples: >>> output = pos_encoder(x) """ - x = x + self.pe[:x.shape[0], :] + x = x + self.pe[:paddle.shape(x)[0], :] return self.dropout(x) @@ -702,7 +683,7 @@ class PositionalEncoding_2d(nn.Layer): (-math.log(10000.0) / dim)) pe[:, 0::2] = paddle.sin(position * div_term) pe[:, 1::2] = paddle.cos(position * div_term) - pe = pe.unsqueeze(0).transpose([1, 0, 2]) + pe = paddle.transpose(paddle.unsqueeze(pe, 0), [1, 0, 2]) self.register_buffer('pe', pe) self.avg_pool_1 = nn.AdaptiveAvgPool2D((1, 1)) @@ -722,22 +703,23 @@ class PositionalEncoding_2d(nn.Layer): Examples: >>> output = pos_encoder(x) """ - w_pe = self.pe[:x.shape[-1], :] + w_pe = self.pe[:paddle.shape(x)[-1], :] w1 = self.linear1(self.avg_pool_1(x).squeeze()).unsqueeze(0) w_pe = w_pe * w1 - w_pe = w_pe.transpose([1, 2, 0]) - w_pe = w_pe.unsqueeze(2) + w_pe = paddle.transpose(w_pe, [1, 2, 0]) + w_pe = paddle.unsqueeze(w_pe, 2) - h_pe = self.pe[:x.shape[-2], :] + h_pe = self.pe[:paddle.shape(x).shape[-2], :] w2 = self.linear2(self.avg_pool_2(x).squeeze()).unsqueeze(0) h_pe = h_pe * w2 - h_pe = h_pe.transpose([1, 2, 0]) - h_pe = h_pe.unsqueeze(3) + h_pe = paddle.transpose(h_pe, [1, 2, 0]) + h_pe = paddle.unsqueeze(h_pe, 3) x = x + w_pe + h_pe - x = x.reshape( - [x.shape[0], x.shape[1], x.shape[2] * x.shape[3]]).transpose( - [2, 0, 1]) + x = paddle.transpose( + paddle.reshape(x, + [x.shape[0], x.shape[1], x.shape[2] * x.shape[3]]), + [2, 0, 1]) return self.dropout(x) @@ -817,7 +799,7 @@ class Beam(): def sort_scores(self): "Sort the scores." return self.scores, paddle.to_tensor( - [i for i in range(self.scores.shape[0])], dtype='int32') + [i for i in range(int(self.scores.shape[0]))], dtype='int32') def get_the_best_score_and_idx(self): "Get the score of the best in the beam." diff --git a/ppocr/modeling/heads/rec_sar_head.py b/ppocr/modeling/heads/rec_sar_head.py index 647f58200f83e3f2c23030872c606f299c58f7b7..7107788d9ef3b49ac6d4dcd4a8133a9603ada19b 100644 --- a/ppocr/modeling/heads/rec_sar_head.py +++ b/ppocr/modeling/heads/rec_sar_head.py @@ -235,7 +235,8 @@ class ParallelSARDecoder(BaseDecoder): # cal mask of attention weight for i, valid_ratio in enumerate(valid_ratios): valid_width = min(w, math.ceil(w * valid_ratio)) - attn_weight[i, :, :, valid_width:, :] = float('-inf') + if valid_width < w: + attn_weight[i, :, :, valid_width:, :] = float('-inf') attn_weight = paddle.reshape(attn_weight, [bsz, T, -1]) attn_weight = F.softmax(attn_weight, axis=-1) diff --git a/ppocr/modeling/necks/__init__.py b/ppocr/modeling/necks/__init__.py index e97c4f64bdc9acd6729d67a9c6ff7a7563f6c95e..5606a4c35f68021e7f151a7eae4a0da4d5b6b95e 100644 --- a/ppocr/modeling/necks/__init__.py +++ b/ppocr/modeling/necks/__init__.py @@ -22,7 +22,8 @@ def build_neck(config): from .rnn import SequenceEncoder from .pg_fpn import PGFPN from .table_fpn import TableFPN - support_dict = ['DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder', 'PGFPN', 'TableFPN'] + from .fpn import FPN + support_dict = ['FPN','DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder', 'PGFPN', 'TableFPN'] module_name = config.pop('name') assert module_name in support_dict, Exception('neck only support {}'.format( diff --git a/ppocr/modeling/necks/fpn.py b/ppocr/modeling/necks/fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..8728a5c9ded5b9c174fd34f088d8012961f65ec0 --- /dev/null +++ b/ppocr/modeling/necks/fpn.py @@ -0,0 +1,100 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.nn as nn +import paddle +import math +import paddle.nn.functional as F + +class Conv_BN_ReLU(nn.Layer): + def __init__(self, in_planes, out_planes, kernel_size=1, stride=1, padding=0): + super(Conv_BN_ReLU, self).__init__() + self.conv = nn.Conv2D(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, + bias_attr=False) + self.bn = nn.BatchNorm2D(out_planes, momentum=0.1) + self.relu = nn.ReLU() + + for m in self.sublayers(): + if isinstance(m, nn.Conv2D): + n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels + m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32', default_initializer=paddle.nn.initializer.Normal(0, math.sqrt(2. / n))) + elif isinstance(m, nn.BatchNorm2D): + m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32', default_initializer=paddle.nn.initializer.Constant(1.0)) + m.bias = paddle.create_parameter(shape=m.bias.shape, dtype='float32', default_initializer=paddle.nn.initializer.Constant(0.0)) + + def forward(self, x): + return self.relu(self.bn(self.conv(x))) + +class FPN(nn.Layer): + def __init__(self, in_channels, out_channels): + super(FPN, self).__init__() + + # Top layer + self.toplayer_ = Conv_BN_ReLU(in_channels[3], out_channels, kernel_size=1, stride=1, padding=0) + # Lateral layers + self.latlayer1_ = Conv_BN_ReLU(in_channels[2], out_channels, kernel_size=1, stride=1, padding=0) + + self.latlayer2_ = Conv_BN_ReLU(in_channels[1], out_channels, kernel_size=1, stride=1, padding=0) + + self.latlayer3_ = Conv_BN_ReLU(in_channels[0], out_channels, kernel_size=1, stride=1, padding=0) + + # Smooth layers + self.smooth1_ = Conv_BN_ReLU(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + + self.smooth2_ = Conv_BN_ReLU(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + + self.smooth3_ = Conv_BN_ReLU(out_channels, out_channels, kernel_size=3, stride=1, padding=1) + + + self.out_channels = out_channels * 4 + for m in self.sublayers(): + if isinstance(m, nn.Conv2D): + n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels + m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32', + default_initializer=paddle.nn.initializer.Normal(0, + math.sqrt(2. / n))) + elif isinstance(m, nn.BatchNorm2D): + m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32', + default_initializer=paddle.nn.initializer.Constant(1.0)) + m.bias = paddle.create_parameter(shape=m.bias.shape, dtype='float32', + default_initializer=paddle.nn.initializer.Constant(0.0)) + + def _upsample(self, x, scale=1): + return F.upsample(x, scale_factor=scale, mode='bilinear') + + def _upsample_add(self, x, y, scale=1): + return F.upsample(x, scale_factor=scale, mode='bilinear') + y + + def forward(self, x): + f2, f3, f4, f5 = x + p5 = self.toplayer_(f5) + + f4 = self.latlayer1_(f4) + p4 = self._upsample_add(p5, f4,2) + p4 = self.smooth1_(p4) + + f3 = self.latlayer2_(f3) + p3 = self._upsample_add(p4, f3,2) + p3 = self.smooth2_(p3) + + f2 = self.latlayer3_(f2) + p2 = self._upsample_add(p3, f2,2) + p2 = self.smooth3_(p2) + + p3 = self._upsample(p3, 2) + p4 = self._upsample(p4, 4) + p5 = self._upsample(p5, 8) + + fuse = paddle.concat([p2, p3, p4, p5], axis=1) + return fuse \ No newline at end of file diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py index 77081abeb691f01d0d3fcc8285d78cf5878411a7..3eb5e28da34bf4d9ed478c52ab38b1de21c0d1e3 100644 --- a/ppocr/postprocess/__init__.py +++ b/ppocr/postprocess/__init__.py @@ -28,12 +28,14 @@ from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, Di TableLabelDecode, SARLabelDecode from .cls_postprocess import ClsPostProcess from .pg_postprocess import PGPostProcess +from .pse_postprocess import PSEPostProcess + def build_post_process(config, global_config=None): support_dict = [ - 'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode', - 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess', - 'DistillationCTCLabelDecode', 'TableLabelDecode', + 'DBPostProcess', 'PSEPostProcess', 'EASTPostProcess', 'SASTPostProcess', + 'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', + 'PGPostProcess', 'DistillationCTCLabelDecode', 'TableLabelDecode', 'DistillationDBPostProcess', 'NRTRLabelDecode', 'SARLabelDecode' ] diff --git a/ppocr/postprocess/pse_postprocess/__init__.py b/ppocr/postprocess/pse_postprocess/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..680473bf4b1863ac695dc8173778e59bd4fdacf9 --- /dev/null +++ b/ppocr/postprocess/pse_postprocess/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .pse_postprocess import PSEPostProcess \ No newline at end of file diff --git a/ppocr/postprocess/pse_postprocess/pse/README.md b/ppocr/postprocess/pse_postprocess/pse/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9c2d9eaeaa5f93550358ebdd4d9161330b78a86f --- /dev/null +++ b/ppocr/postprocess/pse_postprocess/pse/README.md @@ -0,0 +1,5 @@ +## 编译 +code from https://github.com/whai362/pan_pp.pytorch +```python +python3 setup.py build_ext --inplace +``` diff --git a/ppocr/postprocess/pse_postprocess/pse/__init__.py b/ppocr/postprocess/pse_postprocess/pse/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..97b8d8aff0cf229a4e3ec1961638273bd201822a --- /dev/null +++ b/ppocr/postprocess/pse_postprocess/pse/__init__.py @@ -0,0 +1,23 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import os +import subprocess + +python_path = sys.executable + +if subprocess.call('cd ppocr/postprocess/pse_postprocess/pse;{} setup.py build_ext --inplace;cd -'.format(python_path), shell=True) != 0: + raise RuntimeError('Cannot compile pse: {}'.format(os.path.dirname(os.path.realpath(__file__)))) + +from .pse import pse \ No newline at end of file diff --git a/ppocr/postprocess/pse_postprocess/pse/pse.pyx b/ppocr/postprocess/pse_postprocess/pse/pse.pyx new file mode 100644 index 0000000000000000000000000000000000000000..b2be49e9471865c11b840207f922258e67a554b6 --- /dev/null +++ b/ppocr/postprocess/pse_postprocess/pse/pse.pyx @@ -0,0 +1,70 @@ + +import numpy as np +import cv2 +cimport numpy as np +cimport cython +cimport libcpp +cimport libcpp.pair +cimport libcpp.queue +from libcpp.pair cimport * +from libcpp.queue cimport * + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef np.ndarray[np.int32_t, ndim=2] _pse(np.ndarray[np.uint8_t, ndim=3] kernels, + np.ndarray[np.int32_t, ndim=2] label, + int kernel_num, + int label_num, + float min_area=0): + cdef np.ndarray[np.int32_t, ndim=2] pred + pred = np.zeros((label.shape[0], label.shape[1]), dtype=np.int32) + + for label_idx in range(1, label_num): + if np.sum(label == label_idx) < min_area: + label[label == label_idx] = 0 + + cdef libcpp.queue.queue[libcpp.pair.pair[np.int16_t,np.int16_t]] que = \ + queue[libcpp.pair.pair[np.int16_t,np.int16_t]]() + cdef libcpp.queue.queue[libcpp.pair.pair[np.int16_t,np.int16_t]] nxt_que = \ + queue[libcpp.pair.pair[np.int16_t,np.int16_t]]() + cdef np.int16_t* dx = [-1, 1, 0, 0] + cdef np.int16_t* dy = [0, 0, -1, 1] + cdef np.int16_t tmpx, tmpy + + points = np.array(np.where(label > 0)).transpose((1, 0)) + for point_idx in range(points.shape[0]): + tmpx, tmpy = points[point_idx, 0], points[point_idx, 1] + que.push(pair[np.int16_t,np.int16_t](tmpx, tmpy)) + pred[tmpx, tmpy] = label[tmpx, tmpy] + + cdef libcpp.pair.pair[np.int16_t,np.int16_t] cur + cdef int cur_label + for kernel_idx in range(kernel_num - 1, -1, -1): + while not que.empty(): + cur = que.front() + que.pop() + cur_label = pred[cur.first, cur.second] + + is_edge = True + for j in range(4): + tmpx = cur.first + dx[j] + tmpy = cur.second + dy[j] + if tmpx < 0 or tmpx >= label.shape[0] or tmpy < 0 or tmpy >= label.shape[1]: + continue + if kernels[kernel_idx, tmpx, tmpy] == 0 or pred[tmpx, tmpy] > 0: + continue + + que.push(pair[np.int16_t,np.int16_t](tmpx, tmpy)) + pred[tmpx, tmpy] = cur_label + is_edge = False + if is_edge: + nxt_que.push(cur) + + que, nxt_que = nxt_que, que + + return pred + +def pse(kernels, min_area): + kernel_num = kernels.shape[0] + label_num, label = cv2.connectedComponents(kernels[-1], connectivity=4) + return _pse(kernels[:-1], label, kernel_num, label_num, min_area) \ No newline at end of file diff --git a/ppocr/postprocess/pse_postprocess/pse/setup.py b/ppocr/postprocess/pse_postprocess/pse/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..03746782af791938bff31c24e4a760f566c73b49 --- /dev/null +++ b/ppocr/postprocess/pse_postprocess/pse/setup.py @@ -0,0 +1,14 @@ +from distutils.core import setup, Extension +from Cython.Build import cythonize +import numpy + +setup(ext_modules=cythonize(Extension( + 'pse', + sources=['pse.pyx'], + language='c++', + include_dirs=[numpy.get_include()], + library_dirs=[], + libraries=[], + extra_compile_args=['-O3'], + extra_link_args=[] +))) diff --git a/ppocr/postprocess/pse_postprocess/pse_postprocess.py b/ppocr/postprocess/pse_postprocess/pse_postprocess.py new file mode 100755 index 0000000000000000000000000000000000000000..4b89d221d284602933ab3d4f21468fcae79ef310 --- /dev/null +++ b/ppocr/postprocess/pse_postprocess/pse_postprocess.py @@ -0,0 +1,112 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import cv2 +import paddle +from paddle.nn import functional as F + +from ppocr.postprocess.pse_postprocess.pse import pse + + +class PSEPostProcess(object): + """ + The post process for PSE. + """ + + def __init__(self, + thresh=0.5, + box_thresh=0.85, + min_area=16, + box_type='box', + scale=4, + **kwargs): + assert box_type in ['box', 'poly'], 'Only box and poly is supported' + self.thresh = thresh + self.box_thresh = box_thresh + self.min_area = min_area + self.box_type = box_type + self.scale = scale + + def __call__(self, outs_dict, shape_list): + pred = outs_dict['maps'] + if not isinstance(pred, paddle.Tensor): + pred = paddle.to_tensor(pred) + pred = F.interpolate(pred, scale_factor=4 // self.scale, mode='bilinear') + + score = F.sigmoid(pred[:, 0, :, :]) + + kernels = (pred > self.thresh).astype('float32') + text_mask = kernels[:, 0, :, :] + kernels[:, 0:, :, :] = kernels[:, 0:, :, :] * text_mask + + score = score.numpy() + kernels = kernels.numpy().astype(np.uint8) + + boxes_batch = [] + for batch_index in range(pred.shape[0]): + boxes, scores = self.boxes_from_bitmap(score[batch_index], kernels[batch_index], shape_list[batch_index]) + + boxes_batch.append({'points': boxes, 'scores': scores}) + return boxes_batch + + def boxes_from_bitmap(self, score, kernels, shape): + label = pse(kernels, self.min_area) + return self.generate_box(score, label, shape) + + def generate_box(self, score, label, shape): + src_h, src_w, ratio_h, ratio_w = shape + label_num = np.max(label) + 1 + + boxes = [] + scores = [] + for i in range(1, label_num): + ind = label == i + points = np.array(np.where(ind)).transpose((1, 0))[:, ::-1] + + if points.shape[0] < self.min_area: + label[ind] = 0 + continue + + score_i = np.mean(score[ind]) + if score_i < self.box_thresh: + label[ind] = 0 + continue + + if self.box_type == 'box': + rect = cv2.minAreaRect(points) + bbox = cv2.boxPoints(rect) + elif self.box_type == 'poly': + box_height = np.max(points[:, 1]) + 10 + box_width = np.max(points[:, 0]) + 10 + + mask = np.zeros((box_height, box_width), np.uint8) + mask[points[:, 1], points[:, 0]] = 255 + + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + bbox = np.squeeze(contours[0], 1) + else: + raise NotImplementedError + + bbox[:, 0] = np.clip( + np.round(bbox[:, 0] / ratio_w), 0, src_w) + bbox[:, 1] = np.clip( + np.round(bbox[:, 1] / ratio_h), 0, src_h) + boxes.append(bbox) + scores.append(score_i) + return boxes, scores diff --git a/ppocr/postprocess/rec_postprocess.py b/ppocr/postprocess/rec_postprocess.py index 6ff375eb43e773f68f89f66663835ffe45da09b5..96b2169d28004d408bd567db2b3130b681fcb582 100644 --- a/ppocr/postprocess/rec_postprocess.py +++ b/ppocr/postprocess/rec_postprocess.py @@ -169,15 +169,20 @@ class NRTRLabelDecode(BaseRecLabelDecode): character_type, use_space_char) def __call__(self, preds, label=None, *args, **kwargs): - if preds.dtype == paddle.int64: - if isinstance(preds, paddle.Tensor): - preds = preds.numpy() - if preds[0][0] == 2: - preds_idx = preds[:, 1:] - else: - preds_idx = preds - text = self.decode(preds_idx) + if len(preds) == 2: + preds_id = preds[0] + preds_prob = preds[1] + if isinstance(preds_id, paddle.Tensor): + preds_id = preds_id.numpy() + if isinstance(preds_prob, paddle.Tensor): + preds_prob = preds_prob.numpy() + if preds_id[0][0] == 2: + preds_idx = preds_id[:, 1:] + preds_prob = preds_prob[:, 1:] + else: + preds_idx = preds_id + text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False) if label is None: return text label = self.decode(label[:, 1:]) diff --git a/ppocr/utils/iou.py b/ppocr/utils/iou.py new file mode 100644 index 0000000000000000000000000000000000000000..20529dee2d14083f3de4ac034668d004136c56e2 --- /dev/null +++ b/ppocr/utils/iou.py @@ -0,0 +1,48 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle + +EPS = 1e-6 + +def iou_single(a, b, mask, n_class): + valid = mask == 1 + a = a.masked_select(valid) + b = b.masked_select(valid) + miou = [] + for i in range(n_class): + if a.shape == [0] and a.shape==b.shape: + inter = paddle.to_tensor(0.0) + union = paddle.to_tensor(0.0) + else: + inter = ((a == i).logical_and(b == i)).astype('float32') + union = ((a == i).logical_or(b == i)).astype('float32') + miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS)) + miou = sum(miou) / len(miou) + return miou + +def iou(a, b, mask, n_class=2, reduce=True): + batch_size = a.shape[0] + + a = a.reshape([batch_size, -1]) + b = b.reshape([batch_size, -1]) + mask = mask.reshape([batch_size, -1]) + + iou = paddle.zeros((batch_size,), dtype='float32') + for i in range(batch_size): + iou[i] = iou_single(a[i], b[i], mask[i], n_class) + + if reduce: + iou = paddle.mean(iou) + return iou \ No newline at end of file diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py index 3bb022ed98b140995b79ceea93d7f494d3f5930d..a7d24dd71a6e35ca619c2a3f90df3a202b8ad94b 100644 --- a/ppocr/utils/save_load.py +++ b/ppocr/utils/save_load.py @@ -108,14 +108,15 @@ def load_dygraph_params(config, model, logger, optimizer): for k1, k2 in zip(state_dict.keys(), params.keys()): if list(state_dict[k1].shape) == list(params[k2].shape): new_state_dict[k1] = params[k2] - else: - logger.info( - f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !" - ) + else: + logger.info( + f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !" + ) model.set_state_dict(new_state_dict) logger.info(f"loaded pretrained_model successful from {pm}") return {} + def load_pretrained_params(model, path): if path is None: return False @@ -138,6 +139,7 @@ def load_pretrained_params(model, path): print(f"load pretrain successful from {path}") return model + def save_model(model, optimizer, model_path, diff --git a/requirements.txt b/requirements.txt index 2c7baa8516932f56f77b71b4e6dc7d45cd43072e..0b2366c5cd344260d7afab811b27e19499a89b26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ numpy visualdl python-Levenshtein opencv-contrib-python==4.4.0.46 +cython lxml premailer openpyxl \ No newline at end of file diff --git a/tests/compare_results.py b/tests/compare_results.py index 1c3fe4ea951aef122728a7aed7fc4ecaf8e7607e..35af38809fe7d564707d0d538f7d0159cb6edfbd 100644 --- a/tests/compare_results.py +++ b/tests/compare_results.py @@ -32,7 +32,6 @@ def run_shell_command(cmd): else: return None - def parser_results_from_log_by_name(log_path, names_list): if not os.path.exists(log_path): raise ValueError("The log file {} does not exists!".format(log_path)) @@ -46,11 +45,13 @@ def parser_results_from_log_by_name(log_path, names_list): outs = run_shell_command(cmd) outs = outs.split("\n")[0] result = outs.split("{}".format(name))[-1] - result = json.loads(result) + try: + result = json.loads(result) + except: + result = np.array([int(r) for r in result.split()]).reshape(-1, 4) parser_results[name] = result return parser_results - def load_gt_from_file(gt_file): if not os.path.exists(gt_file): raise ValueError("The log file {} does not exists!".format(gt_file)) @@ -60,7 +61,11 @@ def load_gt_from_file(gt_file): parser_gt = {} for line in data: image_name, result = line.strip("\n").split("\t") - result = json.loads(result) + image_name = image_name.split('/')[-1] + try: + result = json.loads(result) + except: + result = np.array([int(r) for r in result.split()]).reshape(-1, 4) parser_gt[image_name] = result return parser_gt diff --git a/tests/configs/det_mv3_db.yml b/tests/configs/det_mv3_db.yml index d6d4c26cbcafa9ff698faec7f8af950152635eeb..5eada6d53dd3364238bdfc6a3c40515ca0726688 100644 --- a/tests/configs/det_mv3_db.yml +++ b/tests/configs/det_mv3_db.yml @@ -23,10 +23,10 @@ Architecture: name: MobileNetV3 scale: 0.5 model_name: large - disable_se: True + disable_se: False Neck: name: DBFPN - out_channels: 96 + out_channels: 256 Head: name: DBHead k: 50 @@ -74,7 +74,7 @@ Train: channel_first: False - DetLabelEncode: # Class handling label - Resize: - # size: [640, 640] + size: [640, 640] - MakeBorderMap: shrink_ratio: 0.4 thresh_min: 0.3 diff --git a/tests/configs/rec_icdar15_r34_train.yml b/tests/configs/rec_icdar15_r34_train.yml new file mode 100644 index 0000000000000000000000000000000000000000..5825c3e9622728e050941a34a055514b2c184659 --- /dev/null +++ b/tests/configs/rec_icdar15_r34_train.yml @@ -0,0 +1,99 @@ +Global: + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/ic15/ + save_epoch_step: 3 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: ./ + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: ppocr/utils/en_dict.txt + character_type: EN + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_ic15.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: ResNet + layers: 34 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 256 + Head: + name: CTCHead + fc_decay: 0 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + use_shared_memory: False + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 4 + use_shared_memory: False diff --git a/tests/ocr_det_params.txt b/tests/ocr_det_params.txt index 4ea74746620dbaf130084e7ecb8f76348cac088f..6fd22e409a5219574b2f29285ff5ee5d2e1cf7ca 100644 --- a/tests/ocr_det_params.txt +++ b/tests/ocr_det_params.txt @@ -12,7 +12,7 @@ train_model_name:latest train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ null:null ## -trainer:norm_train|pact_train +trainer:norm_train|pact_train|fpgm_train norm_train:tools/train.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained pact_train:deploy/slim/quantization/quant.py -c tests/configs/det_mv3_db.yml -o fpgm_train:deploy/slim/prune/sensitivity_anal.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy @@ -21,7 +21,7 @@ null:null null:null ## ===========================eval_params=========================== -eval:tools/eval.py -c tests/configs/det_mv3_db.yml -o +eval:null null:null ## ===========================infer_params=========================== @@ -35,7 +35,7 @@ export1:null export2:null ## train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy -infer_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o +infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o infer_quant:False inference:tools/infer/predict_det.py --use_gpu:True|False diff --git a/tests/ocr_kl_quant_params.txt b/tests/ocr_kl_quant_params.txt new file mode 100644 index 0000000000000000000000000000000000000000..c6ee97dca49bb7d942a339783af44053e6c79b00 --- /dev/null +++ b/tests/ocr_kl_quant_params.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ocr_system +python:python3.7 +gpu_list:null +Global.use_gpu:null +Global.auto_cast:null +Global.epoch_num:null +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:null +Global.pretrained_model:null +train_model_name:null +train_infer_img_dir:null +null:null +## +trainer: +norm_train:null +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:null +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ +kl_quant:deploy/slim/quantization/quant_kl.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +infer_quant:True +inference:tools/infer/predict_det.py +--use_gpu:TrueFalse +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +null:null diff --git a/tests/ocr_ppocr_mobile_params.txt b/tests/ocr_ppocr_mobile_params.txt index 30b9a038079ce1e69332338ca258db04a3d077f2..bb6e0960a77c946e7d452c1026368682be0c4579 100644 --- a/tests/ocr_ppocr_mobile_params.txt +++ b/tests/ocr_ppocr_mobile_params.txt @@ -1,5 +1,5 @@ ===========================train_params=========================== -model_name:ocr_system +model_name:ocr_system_mobile python:python3.7 gpu_list:null Global.use_gpu:null diff --git a/tests/ocr_ppocr_server_params.txt b/tests/ocr_ppocr_server_params.txt new file mode 100644 index 0000000000000000000000000000000000000000..9c49f7ddf43dbca2562bb206d92e5aeb84e703aa --- /dev/null +++ b/tests/ocr_ppocr_server_params.txt @@ -0,0 +1,66 @@ +===========================train_params=========================== +model_name:ocr_system_server +python:python3.7 +gpu_list:null +Global.use_gpu:null +Global.auto_cast:null +Global.epoch_num:null +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:null +Global.pretrained_model:null +train_model_name:null +train_infer_img_dir:null +null:null +## +trainer: +norm_train:null +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:null +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:./inference/ch_ppocr_server_v2.0_det_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_system.py +--use_gpu:True +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +--rec_model_dir:./inference/ch_ppocr_server_v2.0_rec_infer/ +===========================cpp_infer_params=========================== +use_opencv:True +infer_model:./inference/ch_ppocr_server_v2.0_det_infer/ +infer_quant:False +inference:./deploy/cpp_infer/build/ppocr system +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--rec_model_dir:./inference/ch_ppocr_server_v2.0_rec_infer/ +--benchmark:True \ No newline at end of file diff --git a/tests/ocr_rec_params.txt b/tests/ocr_rec_params.txt index d1dc3c311ebe198de19ce93875a5327134c00e64..f9c407897269d4729b9cab7313c45fe69712c62d 100644 --- a/tests/ocr_rec_params.txt +++ b/tests/ocr_rec_params.txt @@ -63,4 +63,19 @@ inference:./deploy/cpp_infer/build/ppocr rec --rec_model_dir: --image_dir:./inference/rec_inference/ null:null ---benchmark:True \ No newline at end of file +--benchmark:True +===========================serving_params=========================== +trans_model:-m paddle_serving_client.convert +--dirname:./inference/ch_ppocr_mobile_v2.0_rec_infer/ +--model_filename:inference.pdmodel +--params_filename:inference.pdiparams +--serving_server:./deploy/pdserving/ppocr_rec_mobile_2.0_serving/ +--serving_client:./deploy/pdserving/ppocr_rec_mobile_2.0_client/ +serving_dir:./deploy/pdserving +web_service:web_service_rec.py --config=config.yml --opt op.rec.concurrency=1 +op.rec.local_service_conf.devices:null|0 +op.rec.local_service_conf.use_mkldnn:True|False +op.rec.local_service_conf.thread_num:1|6 +op.rec.local_service_conf.use_trt:False|True +op.rec.local_service_conf.precision:fp32|fp16|int8 +pipline:pipeline_http_client.py --image_dir=../../doc/imgs_words_en \ No newline at end of file diff --git a/tests/ocr_rec_server_params.txt b/tests/ocr_rec_server_params.txt new file mode 100644 index 0000000000000000000000000000000000000000..7d151fcf0b793bd0bf63ac925c9ef3cf0ff56557 --- /dev/null +++ b/tests/ocr_rec_server_params.txt @@ -0,0 +1,81 @@ +===========================train_params=========================== +model_name:ocr_server_rec +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_infer=2|whole_train_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_infer=128|whole_train_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train|pact_train +norm_train:tools/train.py -c tests/configs/rec_icdar15_r34_train.yml -o +pact_train:deploy/slim/quantization/quant.py -c tests/configs/rec_icdar15_r34_train.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c tests/configs/rec_icdar15_r34_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c tests/configs/rec_icdar15_r34_train.yml -o +quant_export:deploy/slim/quantization/export_model.py -c tests/configs/rec_icdar15_r34_train.yml -o +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:./inference/ch_ppocr_server_v2.0_rec_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|fp16|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null +===========================cpp_infer_params=========================== +use_opencv:True +infer_model:./inference/ch_ppocr_server_v2.0_rec_infer/ +infer_quant:False +inference:./deploy/cpp_infer/build/ppocr rec +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16 +--rec_model_dir: +--image_dir:./inference/rec_inference/ +null:null +--benchmark:True +===========================serving_params=========================== +trans_model:-m paddle_serving_client.convert +--dirname:./inference/ch_ppocr_server_v2.0_rec_infer/ +--model_filename:inference.pdmodel +--params_filename:inference.pdiparams +--serving_server:./deploy/pdserving/ppocr_rec_server_2.0_serving/ +--serving_client:./deploy/pdserving/ppocr_rec_server_2.0_client/ +serving_dir:./deploy/pdserving +web_service:web_service_rec.py --config=config.yml --opt op.rec.concurrency=1 +op.rec.local_service_conf.devices:null|0 +op.rec.local_service_conf.use_mkldnn:True|False +op.rec.local_service_conf.thread_num:1|6 +op.rec.local_service_conf.use_trt:False|True +op.rec.local_service_conf.precision:fp32|fp16|int8 +pipline:pipeline_http_client.py --image_dir=../../doc/imgs_words_en \ No newline at end of file diff --git a/tests/prepare.sh b/tests/prepare.sh index 77fc46e05c4435f8a12a139c8bfe886b3e129806..ef021fa385f16ae5c9c996bfcb607f73b4129f49 100644 --- a/tests/prepare.sh +++ b/tests/prepare.sh @@ -75,17 +75,28 @@ elif [ ${MODE} = "infer" ];then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar cd ./inference && tar xf ch_ppocr_server_v2.0_det_infer.tar && tar xf ch_det_data_50.tar && cd ../ - elif [ ${model_name} = "ocr_system" ]; then + elif [ ${model_name} = "ocr_system_mobile" ]; then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_det_data_50.tar && cd ../ - else + elif [ ${model_name} = "ocr_system_server" ]; then + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar + cd ./inference && tar xf ch_ppocr_server_v2.0_det_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_det_data_50.tar && cd ../ + elif [ ${model_name} = "ocr_rec" ]; then rm -rf ./train_data/ic15_data eval_model_name="ch_ppocr_mobile_v2.0_rec_infer" wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar cd ./inference && tar xf ${eval_model_name}.tar && tar xf rec_inference.tar && cd ../ + elif [ ${model_name} = "ocr_server_rec" ]; then + rm -rf ./train_data/ic15_data + eval_model_name="ch_ppocr_server_v2.0_rec_infer" + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar + cd ./inference && tar xf ${eval_model_name}.tar && tar xf rec_inference.tar && cd ../ fi elif [ ${MODE} = "cpp_infer" ];then if [ ${model_name} = "ocr_det" ]; then @@ -107,12 +118,15 @@ fi if [ ${MODE} = "serving_infer" ];then # prepare serving env python_name=$(func_parser_value "${lines[2]}") - ${python_name} -m pip install install paddle-serving-server-gpu==0.6.1.post101 + wget https://paddle-serving.bj.bcebos.com/chain/paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl + ${python_name} -m pip install install paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl ${python_name} -m pip install paddle_serving_client==0.6.1 - ${python_name} -m pip install paddle-serving-app==0.6.1 + ${python_name} -m pip install paddle-serving-app==0.6.3 wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar - cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && cd ../ + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar + cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_det_infer.tar cd ../ fi if [ ${MODE} = "cpp_infer" ];then diff --git a/tests/readme.md b/tests/readme.md index 592f621424756e95966ae2695cbba6546f794317..127eef9fe34de37f5dfa608b3ed9903f2b826fa0 100644 --- a/tests/readme.md +++ b/tests/readme.md @@ -1,5 +1,5 @@ -# 介绍 +# 从训练到推理部署工具链测试方法介绍 test.sh和params.txt文件配合使用,完成OCR轻量检测和识别模型从训练到预测的流程测试。 @@ -36,7 +36,7 @@ test.sh包含四种运行模式,每种模式的运行数据不同,分别用 - 模式1:lite_train_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; ```shell -bash test/prepare.sh ./tests/ocr_det_params.txt 'lite_train_infer' +bash tests/prepare.sh ./tests/ocr_det_params.txt 'lite_train_infer' bash tests/test.sh ./tests/ocr_det_params.txt 'lite_train_infer' ``` @@ -66,3 +66,7 @@ bash tests/test.sh ./tests/ocr_det_params.txt 'whole_train_infer' bash tests/prepare.sh ./tests/ocr_det_params.txt 'cpp_infer' bash tests/test.sh ./tests/ocr_det_params.txt 'cpp_infer' ``` + +# 日志输出 +最终在```tests/output```目录下生成.log后缀的日志文件 + diff --git a/tests/results/det_results_gpu_trt_fp16_cpp.txt b/tests/results/det_results_gpu_trt_fp16_cpp.txt new file mode 100644 index 0000000000000000000000000000000000000000..34cde2526d2c719a473bec36b9801f56c954561c --- /dev/null +++ b/tests/results/det_results_gpu_trt_fp16_cpp.txt @@ -0,0 +1,50 @@ +../../inference/ch_det_data_50/all-sum-510/00008790.jpg 208 404 282 404 282 421 208 421 58 396 107 396 107 413 58 413 197 387 296 387 296 403 197 403 161 389 174 389 174 402 161 402 34 378 134 378 134 394 34 394 323 377 329 377 329 382 323 382 199 370 292 370 292 383 199 383 216 309 274 309 274 325 216 325 161 304 173 304 173 315 161 315 370 301 437 301 437 317 370 317 30 301 135 300 135 316 30 317 221 291 270 291 270 308 221 308 58 224 106 224 106 238 58 238 216 222 274 222 274 239 216 239 161 217 174 217 174 229 161 229 33 205 133 205 133 221 33 221 221 204 270 204 270 221 221 221 73 145 385 145 385 162 73 162 52 119 119 119 119 135 52 135 72 50 296 50 296 66 72 66 54 15 118 15 118 32 54 32 +../../inference/ch_det_data_50/all-sum-510/00018946.jpg 439 327 476 327 476 341 439 341 85 284 142 284 142 308 85 308 300 278 380 278 380 299 300 299 195 262 287 275 284 299 192 286 196 196 454 218 452 244 194 222 343 182 376 182 376 193 343 193 198 162 341 169 340 195 197 188 176 130 381 145 380 165 175 150 176 100 417 118 415 148 174 130 +../../inference/ch_det_data_50/all-sum-510/00034387.jpg 263 459 741 459 741 485 263 485 346 415 421 415 421 444 346 444 544 418 568 418 568 442 544 442 684 415 712 415 712 444 684 444 173 413 228 413 228 444 173 444 872 412 910 412 910 447 872 447 55 415 76 415 76 443 55 443 855 371 927 371 927 401 855 401 347 371 420 371 420 400 347 400 672 370 725 370 725 402 672 402 537 371 571 371 571 401 537 401 136 364 230 367 229 403 135 400 55 370 76 370 76 399 55 399 856 328 927 328 927 358 856 358 350 328 420 328 420 358 350 358 672 326 725 326 725 358 672 358 539 327 571 327 571 359 539 359 170 326 229 323 231 357 171 359 56 328 76 328 76 358 56 358 297 326 316 326 316 334 297 334 854 284 927 284 927 314 854 314 672 284 725 284 725 315 672 315 344 284 431 282 432 315 345 317 537 283 570 283 570 314 537 314 170 281 228 281 228 315 170 315 55 285 75 285 75 314 55 314 856 241 927 241 927 270 856 270 346 240 464 240 464 271 346 271 154 241 228 241 228 271 154 271 672 240 726 240 726 271 672 271 530 240 573 240 573 272 530 272 55 241 76 241 76 270 55 270 854 196 927 198 926 228 853 225 672 197 728 197 728 228 672 228 342 199 439 194 441 224 344 230 175 196 229 196 229 226 175 226 55 199 75 199 75 228 55 228 526 193 578 193 578 228 526 228 347 154 420 154 420 182 347 182 853 153 927 153 927 181 853 181 175 153 228 153 228 184 175 184 668 152 725 152 725 182 668 182 536 153 572 153 572 183 536 183 55 155 76 155 76 183 55 183 347 109 420 109 420 138 347 138 172 109 229 109 229 140 172 140 544 111 565 111 565 138 544 138 51 110 77 110 77 140 51 140 639 105 729 105 729 141 639 141 815 101 929 109 927 141 813 133 812 65 953 65 953 93 812 93 305 64 447 66 447 94 305 92 671 65 725 65 725 95 671 95 173 64 229 66 228 96 172 94 37 64 91 66 90 98 36 96 527 63 581 63 581 95 527 95 333 18 671 18 671 45 333 45 +../../inference/ch_det_data_50/all-sum-510/00037951.jpg 432 973 552 977 552 994 432 991 431 931 554 931 554 970 431 970 29 520 101 520 101 546 29 546 29 441 146 441 146 465 29 465 233 333 328 331 328 356 233 358 121 250 439 250 439 287 121 287 180 205 380 205 380 229 180 229 255 104 323 121 307 184 239 166 35 57 147 57 147 82 35 82 +../../inference/ch_det_data_50/all-sum-510/00044782.jpg 222 214 247 214 247 230 222 230 162 214 183 214 183 231 162 231 122 190 216 190 216 203 122 203 90 82 252 82 252 100 90 100 70 61 279 61 279 78 70 78 103 14 244 14 244 46 103 46 +../../inference/ch_det_data_50/all-sum-510/00067516.jpg 139 806 596 807 596 824 139 823 46 782 699 782 699 800 46 800 577 749 669 749 669 766 577 766 353 748 397 748 397 769 353 769 220 749 261 749 261 767 220 767 475 748 502 748 502 769 475 769 68 746 134 749 133 766 67 763 574 680 670 680 670 700 574 700 474 680 519 680 519 701 474 701 352 680 397 680 397 701 352 701 68 679 134 682 133 700 67 697 219 678 245 681 242 702 216 698 575 614 669 614 669 633 575 633 66 612 135 614 135 633 66 631 474 613 501 613 501 633 474 633 353 613 379 613 379 634 353 634 219 612 245 612 245 633 219 633 576 546 669 546 669 566 576 566 474 545 519 545 519 566 474 566 351 544 381 544 381 567 351 567 219 545 245 545 245 566 219 566 67 541 134 544 133 565 66 562 67 477 134 480 133 501 66 498 584 479 666 479 666 499 584 499 474 478 519 478 519 500 474 500 352 478 397 478 397 500 352 500 218 477 246 477 246 502 218 502 579 424 666 427 665 451 578 448 344 428 410 428 410 449 344 449 66 425 151 427 151 451 66 449 473 427 515 427 515 450 473 450 218 427 259 427 259 450 218 450 282 396 479 397 479 420 282 419 83 316 667 316 667 335 83 335 64 277 666 277 666 292 64 292 456 209 585 209 585 226 456 226 311 208 373 208 373 227 311 227 163 208 227 208 227 227 163 227 504 150 541 150 541 168 504 168 264 47 485 47 485 69 264 69 +../../inference/ch_det_data_50/all-sum-510/00088568.jpg 57 443 119 443 119 456 57 456 309 413 744 413 744 430 309 430 309 375 737 375 737 392 309 392 415 337 559 337 559 351 415 351 307 322 674 321 674 338 307 339 275 292 348 294 348 313 275 311 52 285 210 285 210 301 52 301 273 262 421 262 421 279 273 279 55 262 249 262 249 279 55 279 669 247 697 247 697 262 669 262 601 247 629 247 629 262 601 262 531 247 559 247 559 262 531 262 461 247 489 247 489 262 461 262 277 247 310 247 310 261 277 261 55 240 142 240 142 254 55 254 276 230 400 230 400 246 276 246 741 227 749 237 741 246 732 237 665 230 701 230 701 245 665 245 598 230 631 230 631 245 598 245 527 230 563 230 563 245 527 245 458 230 493 230 493 245 458 245 52 213 212 215 212 233 52 231 732 214 747 214 747 227 732 227 662 212 706 212 706 230 662 230 594 213 638 213 638 227 594 227 522 213 570 213 570 227 522 227 453 213 497 213 497 227 453 227 278 213 352 213 352 227 278 227 734 198 748 198 748 210 734 210 667 196 702 196 702 210 667 210 599 196 633 196 633 211 599 211 527 196 564 196 564 210 527 210 459 196 493 196 493 210 459 210 276 194 418 195 418 212 276 211 54 190 241 190 241 207 54 207 664 179 705 179 705 194 664 194 278 178 352 180 352 195 278 193 733 179 747 179 747 194 733 194 596 178 635 178 635 193 596 193 523 177 567 177 567 195 523 195 456 178 495 178 495 193 456 193 55 170 142 170 142 184 55 184 733 164 748 164 748 176 733 176 664 162 705 162 705 176 664 176 597 162 635 162 635 176 597 176 525 162 566 162 566 176 525 176 456 162 494 162 494 176 456 176 277 160 399 160 399 176 277 176 54 146 149 146 149 161 54 161 452 145 497 145 497 160 452 160 729 144 748 144 748 162 729 162 662 143 706 143 706 161 662 161 595 144 636 144 636 159 595 159 521 143 566 141 567 159 522 161 277 143 310 143 310 159 277 159 275 120 430 120 430 140 275 140 50 119 234 120 234 140 50 139 402 90 703 90 703 107 402 107 46 78 282 78 282 98 46 98 324 67 745 68 745 86 324 85 667 47 744 47 744 64 667 64 295 47 435 47 435 63 295 63 64 30 232 27 233 65 65 68 +../../inference/ch_det_data_50/all-sum-510/00091741.jpg 46 335 87 335 87 360 46 360 98 209 258 209 258 232 98 232 101 189 258 190 258 206 101 205 87 99 268 97 269 184 88 186 92 45 266 53 263 117 89 109 89 10 258 12 258 38 89 36 +../../inference/ch_det_data_50/all-sum-510/00105313.jpg 289 261 407 261 407 277 289 277 152 260 265 260 265 276 152 276 10 257 74 259 74 276 10 274 32 230 134 230 134 245 32 245 34 215 218 215 218 228 34 228 32 199 148 199 148 214 32 214 31 181 217 182 217 199 31 198 34 169 107 169 107 182 34 182 34 153 126 153 126 166 34 166 33 136 144 137 144 150 33 149 34 122 177 122 177 135 34 135 32 104 178 104 178 120 32 120 32 91 102 91 102 104 32 104 33 75 121 75 121 88 33 88 32 60 121 60 121 73 32 73 34 44 121 44 121 57 34 57 31 28 144 28 144 43 31 43 177 20 415 15 416 51 178 56 24 10 152 10 152 26 24 26 +../../inference/ch_det_data_50/all-sum-510/00134770.jpg 386 645 457 645 457 658 386 658 406 618 486 616 486 634 406 636 111 533 272 530 272 550 111 553 110 501 445 496 445 516 110 521 110 469 445 465 445 485 110 489 110 438 446 433 446 453 110 458 109 407 445 403 445 423 109 427 151 375 443 372 443 392 151 395 183 336 371 334 371 358 183 360 73 96 517 101 516 220 72 215 +../../inference/ch_det_data_50/all-sum-510/00145943.jpg 390 243 751 274 735 454 375 423 88 90 302 90 302 121 88 121 43 40 329 37 329 78 43 81 +../../inference/ch_det_data_50/all-sum-510/00147605.jpg 800 613 878 613 878 627 800 627 514 605 786 604 786 629 514 630 116 521 226 521 226 561 116 561 252 522 309 522 309 558 252 558 713 500 902 503 902 539 713 536 254 501 296 501 296 519 254 519 345 479 475 479 475 517 345 517 251 483 296 483 296 501 251 501 350 456 447 456 447 471 350 471 143 442 203 442 203 469 143 469 727 370 880 370 880 422 727 422 526 369 684 369 684 421 526 421 140 367 490 367 490 423 140 423 742 313 872 313 872 338 742 338 798 155 888 155 888 192 798 192 272 140 457 140 457 161 272 161 737 114 895 118 894 158 736 155 107 110 206 110 206 131 107 131 268 92 464 94 464 134 268 131 +../../inference/ch_det_data_50/all-sum-510/00150341.jpg 98 640 300 640 300 664 98 664 113 615 289 615 289 633 113 633 82 591 320 590 320 611 82 612 30 563 315 561 315 582 30 584 30 513 169 513 169 531 30 531 32 488 111 488 111 506 32 506 357 458 465 461 464 486 356 483 26 458 271 459 271 483 26 482 338 438 423 442 422 461 337 457 64 437 145 437 145 455 64 455 205 414 293 414 293 436 205 436 318 407 442 411 441 439 317 435 42 404 176 407 176 435 42 432 28 381 137 381 137 405 28 405 +../../inference/ch_det_data_50/all-sum-510/00150669.jpg 647 698 683 698 683 718 647 718 515 684 551 684 551 721 515 721 650 687 680 687 680 702 650 702 920 673 938 673 938 686 920 686 518 670 548 670 548 690 518 690 785 670 808 670 808 688 785 688 590 670 608 670 608 688 590 688 732 665 745 679 732 692 718 679 652 668 680 668 680 689 652 689 271 665 423 665 423 690 271 690 45 666 110 666 110 688 45 688 130 664 205 664 205 690 130 690 781 628 812 628 812 663 781 663 643 626 687 626 687 666 643 666 514 627 550 627 550 665 514 665 654 617 673 617 673 629 654 629 521 617 541 617 541 629 521 629 858 617 868 617 868 628 858 628 727 617 736 617 736 628 727 628 920 614 940 614 940 631 920 631 785 614 807 614 807 631 785 631 371 603 421 603 421 620 371 620 83 600 216 603 216 624 83 620 46 602 72 602 72 623 46 623 780 569 817 573 813 610 776 606 922 559 936 559 936 575 922 575 856 559 869 559 869 575 856 575 61 552 411 552 411 569 61 569 61 531 117 533 117 547 61 545 859 527 868 527 868 539 859 539 923 525 936 525 936 542 923 542 787 524 807 524 807 540 787 540 526 526 536 526 536 536 526 536 261 511 396 511 396 528 261 528 120 512 246 512 246 526 120 526 47 512 120 512 120 527 47 527 753 491 829 491 829 508 753 508 636 491 712 491 712 508 636 508 517 491 593 491 593 508 517 508 84 448 125 448 125 463 84 463 221 448 238 448 238 462 221 462 682 444 869 444 869 461 682 461 561 444 667 444 667 461 561 461 489 445 545 445 545 459 489 459 183 437 209 437 209 459 183 459 52 429 73 437 64 464 42 456 222 430 278 430 278 445 222 445 86 430 145 430 145 445 86 445 505 382 617 381 617 398 505 399 701 380 758 380 758 398 701 398 307 371 365 371 365 386 307 386 90 371 168 371 168 386 90 386 686 334 821 334 821 352 686 352 496 333 659 333 659 350 496 350 207 314 245 314 245 333 207 333 497 287 642 287 642 304 497 304 670 286 804 286 804 304 670 304 668 239 817 239 817 257 668 257 495 239 644 239 644 257 495 257 668 193 816 193 816 209 668 209 496 192 644 192 644 208 496 208 668 144 816 144 816 161 668 161 497 144 646 144 646 161 497 161 488 102 546 102 546 121 488 121 845 21 900 21 900 43 845 43 25 18 702 18 702 39 25 39 896 10 997 14 996 46 895 42 +../../inference/ch_det_data_50/all-sum-510/00152568.jpg 2 250 285 252 285 281 2 279 195 231 255 231 255 241 195 241 198 158 282 164 277 230 193 224 177 148 251 148 251 161 177 161 +../../inference/ch_det_data_50/all-sum-510/00155628.jpg 147 898 506 901 506 925 147 922 519 892 562 894 561 912 518 910 59 884 83 884 83 895 59 895 148 877 505 881 505 902 148 897 523 833 641 837 640 858 522 854 68 832 187 834 187 855 68 853 245 554 468 554 468 570 245 570 307 506 405 508 405 526 307 523 243 481 460 483 460 504 243 502 250 420 460 422 460 454 250 452 193 377 518 379 518 410 193 408 473 194 625 194 625 212 473 212 70 127 643 129 643 163 70 161 478 39 599 35 602 101 481 105 67 23 136 14 140 44 71 54 +../../inference/ch_det_data_50/all-sum-510/00173364.jpg 7 176 59 176 59 201 7 201 135 118 196 118 196 135 135 135 38 75 87 75 87 105 38 105 249 19 313 19 313 38 249 38 19 15 105 15 105 40 19 40 +../../inference/ch_det_data_50/all-sum-510/00175503.jpg 39 256 503 252 504 362 40 366 49 198 351 175 357 253 55 276 +../../inference/ch_det_data_50/all-sum-510/00193218.jpg 282 373 411 373 411 389 282 389 170 373 223 373 223 390 170 390 108 373 162 373 162 390 108 390 276 357 358 357 358 371 276 371 169 357 222 357 222 371 169 371 106 356 175 356 175 373 106 373 408 356 493 356 493 370 408 370 24 185 64 185 64 203 24 203 500 184 558 184 558 201 500 201 379 185 421 183 422 200 380 202 283 184 311 184 311 202 283 202 173 185 197 185 197 201 173 201 498 163 544 163 544 177 498 177 379 162 412 162 412 177 379 177 261 161 303 161 303 178 261 178 174 161 231 161 231 178 174 178 24 161 80 161 80 178 24 178 385 139 489 139 489 155 385 155 26 137 133 137 133 153 26 153 442 115 538 117 538 134 442 132 345 117 406 117 406 131 345 131 259 117 303 117 303 131 259 131 28 112 229 114 229 132 28 130 130 90 395 93 395 110 130 107 560 81 585 81 585 109 560 109 +../../inference/ch_det_data_50/all-sum-510/00195033.jpg 221 302 240 302 240 309 221 309 487 262 534 264 533 282 486 280 125 249 194 249 194 285 125 285 336 248 364 248 364 268 336 268 317 221 381 223 381 240 317 238 431 224 450 224 450 236 431 236 360 202 539 202 539 218 360 218 87 199 148 201 148 218 87 216 371 181 450 181 450 195 371 195 327 180 354 180 354 194 327 194 94 178 241 178 241 195 94 195 431 159 559 159 559 175 431 175 128 148 289 149 289 166 128 165 35 145 75 148 74 163 34 160 487 146 501 146 501 153 487 153 100 143 122 143 122 154 100 154 370 127 505 126 505 140 370 141 98 125 194 125 194 139 98 139 320 125 338 125 338 136 320 136 35 121 78 121 78 135 35 135 322 104 338 104 338 116 322 116 371 101 503 101 503 117 371 117 348 103 362 103 362 115 348 115 37 101 81 101 81 114 37 114 97 98 207 99 207 116 97 115 305 89 317 89 317 97 305 97 346 86 364 86 364 97 346 97 319 85 342 85 342 100 319 100 357 82 515 80 515 96 357 98 40 81 90 81 90 94 40 94 92 77 242 78 242 95 92 94 312 65 394 65 394 79 312 79 240 64 290 64 290 78 240 78 183 52 222 52 222 66 183 66 468 47 547 47 547 61 468 61 422 34 438 34 438 55 422 55 464 29 551 29 551 43 464 43 206 19 330 21 330 42 206 40 +../../inference/ch_det_data_50/all-sum-510/00208502.jpg 556 535 630 535 630 569 556 569 204 537 284 537 284 552 204 552 142 512 191 512 191 526 142 526 248 511 309 511 309 525 248 525 41 499 118 499 118 520 41 520 465 490 558 490 558 510 465 510 666 489 680 493 677 505 662 501 724 490 739 490 739 503 724 503 40 450 118 448 118 469 40 471 173 448 237 448 237 465 173 465 93 403 121 403 121 424 93 424 38 403 63 403 63 424 38 424 214 392 232 405 220 422 203 409 39 357 58 357 58 375 39 375 92 355 121 355 121 375 92 375 187 339 248 337 249 363 188 365 458 319 551 317 551 338 458 340 457 271 553 271 553 292 457 292 562 271 737 267 737 288 562 292 516 225 548 225 548 245 516 245 620 185 675 185 675 202 620 202 456 130 550 128 550 149 456 151 571 104 789 98 789 121 571 127 121 46 291 46 291 99 121 99 536 36 710 36 710 92 536 92 +../../inference/ch_det_data_50/all-sum-510/00224225.jpg 135 426 157 426 157 449 135 449 199 402 480 408 479 461 198 455 200 225 474 225 474 394 200 394 130 264 174 264 174 281 130 281 343 205 458 205 458 232 343 232 197 186 349 194 346 242 194 234 7 41 160 39 161 115 8 117 +../../inference/ch_det_data_50/all-sum-510/00227746.jpg 142 230 210 230 210 240 142 240 71 230 130 230 130 240 71 240 215 228 386 228 386 240 215 240 290 208 347 208 347 224 290 224 142 179 165 181 162 209 139 208 172 179 250 179 250 195 172 195 171 152 347 152 347 167 171 167 143 110 279 112 279 135 143 132 202 53 387 53 387 69 202 69 141 47 193 47 193 64 141 64 +../../inference/ch_det_data_50/all-sum-510/00229605.jpg 742 528 882 528 882 545 742 545 232 497 590 496 590 524 232 525 5 496 229 496 229 524 5 524 733 494 884 497 884 522 733 519 605 493 718 488 719 517 606 522 2 242 865 227 866 291 3 305 477 26 884 26 884 77 477 77 +../../inference/ch_det_data_50/all-sum-510/00233011.jpg 61 225 293 225 293 243 61 243 11 218 43 218 43 252 11 252 60 177 120 177 120 196 60 196 11 169 44 169 44 204 11 204 59 127 149 129 149 148 59 146 11 123 45 123 45 156 11 156 124 86 239 86 239 104 124 104 147 49 218 49 218 67 147 67 257 44 354 47 353 71 256 68 8 47 54 47 54 69 8 69 275 10 346 10 346 32 275 32 26 9 75 9 75 32 26 32 +../../inference/ch_det_data_50/all-sum-510/00233625.jpg 370 395 635 397 635 445 370 443 67 210 935 204 936 325 68 331 +../../inference/ch_det_data_50/all-sum-510/00233634.jpg 213 637 264 637 264 706 213 706 522 634 572 634 572 697 522 697 641 522 684 522 684 570 641 570 95 514 155 514 155 592 95 592 754 394 762 394 762 403 754 403 677 362 730 360 733 432 679 433 53 360 109 360 109 436 53 436 77 207 157 207 157 282 77 282 642 204 695 204 695 274 642 274 208 88 262 85 266 165 212 168 362 47 428 44 432 117 366 120 +../../inference/ch_det_data_50/all-sum-510/00234400.jpg 156 419 739 419 739 439 156 439 157 393 653 393 653 412 157 412 38 390 129 390 129 413 38 413 156 339 307 342 307 365 156 362 36 342 125 342 125 363 36 363 519 293 705 293 705 316 519 316 393 290 485 288 485 316 393 318 156 291 271 291 271 315 156 315 35 291 127 291 127 315 35 315 155 242 360 242 360 269 155 269 34 242 83 242 83 270 34 270 27 150 159 150 159 177 27 177 280 96 507 96 507 113 280 113 313 44 477 47 476 90 312 87 516 50 664 52 664 68 516 67 485 17 708 15 708 45 485 47 +../../inference/ch_det_data_50/all-sum-510/00234883.jpg 64 122 318 117 319 193 65 197 71 118 122 118 122 132 71 132 381 62 506 61 506 75 381 76 57 26 368 26 368 116 57 116 385 26 503 23 503 47 385 50 +../../inference/ch_det_data_50/all-sum-510/test_add_0.jpg 311 521 391 521 391 534 311 534 277 499 426 499 426 516 277 516 259 445 438 445 438 461 259 461 210 426 487 426 487 443 210 443 244 385 460 385 460 411 244 411 220 327 476 327 476 373 220 373 205 204 494 208 493 279 204 275 264 163 423 165 423 198 264 196 15 17 203 15 203 45 15 47 +../../inference/ch_det_data_50/all-sum-510/test_add_1.png +../../inference/ch_det_data_50/all-sum-510/test_add_10.png 155 123 187 123 187 174 155 174 160 105 184 105 184 131 160 131 116 45 155 44 158 176 119 176 63 30 102 31 99 172 60 171 +../../inference/ch_det_data_50/all-sum-510/test_add_11.jpg 1388 755 1486 755 1486 794 1388 794 1011 752 1210 752 1210 802 1011 802 681 752 879 752 879 801 681 801 355 750 568 745 570 796 356 801 76 748 266 743 268 796 78 801 600 645 1155 645 1155 706 600 706 600 562 1151 553 1151 614 600 622 596 478 1070 470 1070 529 596 537 595 390 1095 385 1095 444 595 448 600 303 1061 303 1061 362 600 362 353 180 1521 180 1521 265 353 265 59 40 261 40 261 91 59 91 1303 39 1495 39 1495 90 1303 90 971 37 1173 32 1175 83 973 88 668 37 864 32 866 83 670 88 361 32 561 32 561 88 361 88 +../../inference/ch_det_data_50/all-sum-510/test_add_12.jpg 9 590 140 592 140 615 9 613 107 520 908 524 908 571 107 566 632 448 905 445 905 481 632 484 110 445 468 447 468 487 110 485 580 303 682 301 683 351 581 353 368 257 568 262 565 361 364 355 61 83 856 85 856 164 61 162 +../../inference/ch_det_data_50/all-sum-510/test_add_13.jpg 68 94 117 97 116 115 67 112 +../../inference/ch_det_data_50/all-sum-510/test_add_14.jpg 28 94 238 92 238 130 28 132 27 50 241 48 241 88 27 90 +../../inference/ch_det_data_50/all-sum-510/test_add_15.jpg 140 251 354 251 354 268 140 268 203 212 407 217 407 234 203 229 104 210 194 212 194 229 104 227 153 155 287 159 287 175 153 172 143 134 307 140 307 157 143 150 106 136 147 136 147 149 106 149 106 101 278 107 277 126 105 119 106 70 247 77 246 97 105 90 106 37 211 40 210 64 105 61 +../../inference/ch_det_data_50/all-sum-510/test_add_16.jpg 380 740 750 740 750 780 380 780 360 700 472 700 472 728 360 728 1550 698 1580 698 1580 750 1550 750 1256 694 1444 694 1444 722 1256 722 1242 659 1452 659 1452 690 1242 690 384 643 672 643 672 682 384 682 1226 623 1474 621 1474 655 1226 657 356 599 582 599 582 631 356 631 1198 587 1496 587 1496 619 1198 619 1164 553 1534 553 1534 585 1164 585 378 549 642 549 642 589 378 589 354 500 520 500 520 540 354 540 772 258 1128 258 1128 303 772 303 372 208 508 208 508 303 372 303 774 208 1092 214 1092 260 774 254 +../../inference/ch_det_data_50/all-sum-510/test_add_17.jpg 319 255 394 257 394 271 319 269 306 236 407 238 407 257 306 255 306 221 413 226 412 243 305 237 93 134 387 140 386 210 92 204 69 92 401 100 401 127 69 118 66 74 225 77 225 95 66 92 64 58 227 60 227 77 64 75 +../../inference/ch_det_data_50/all-sum-510/test_add_18.jpg 153 908 616 914 616 935 153 930 464 786 718 788 718 816 464 813 552 750 666 755 665 792 551 788 117 538 190 538 190 572 117 572 115 472 676 484 675 530 114 518 119 427 670 439 670 471 119 459 119 374 676 379 676 411 119 406 555 261 677 262 677 280 555 279 164 258 336 258 336 275 164 275 342 194 457 196 457 221 342 219 307 172 490 172 490 190 307 190 252 125 540 129 540 171 252 168 345 90 488 92 488 110 345 108 283 40 569 48 567 84 282 76 235 30 268 30 268 64 235 64 +../../inference/ch_det_data_50/all-sum-510/test_add_19.jpg 22 293 44 293 44 304 22 304 62 291 106 291 106 305 62 305 61 279 107 279 107 291 61 291 218 278 247 278 247 292 218 292 176 278 210 278 210 291 176 291 141 275 166 275 166 307 141 307 7 266 20 266 20 278 7 278 219 264 245 264 245 279 219 279 60 263 133 263 133 279 60 279 22 264 49 264 49 279 22 279 218 251 250 251 250 266 218 266 63 251 133 251 133 264 63 264 22 250 45 250 45 265 22 265 7 251 20 251 20 263 7 263 8 240 18 240 18 249 8 249 61 236 115 236 115 252 61 252 23 234 49 237 47 253 21 250 210 235 246 235 246 252 210 252 143 236 166 236 166 252 143 252 493 224 533 224 533 241 493 241 334 224 355 224 355 239 334 239 287 224 315 224 315 239 287 239 61 224 114 224 114 238 61 238 7 226 18 226 18 235 7 235 219 223 250 223 250 237 219 237 141 224 167 221 169 235 143 238 23 223 49 223 49 239 23 239 494 212 526 212 526 225 494 225 418 211 439 211 439 226 418 226 335 211 400 211 400 224 335 224 291 211 322 211 322 224 291 224 220 211 251 211 251 224 220 224 144 212 167 212 167 223 144 223 60 211 115 209 115 222 60 224 24 210 50 210 50 224 24 224 336 197 384 197 384 211 336 211 63 198 89 198 89 209 63 209 492 195 542 195 542 213 492 213 443 201 456 194 464 207 451 215 219 195 257 195 257 213 219 213 177 196 207 196 207 210 177 210 144 197 158 197 158 210 144 210 23 196 44 196 44 212 23 212 416 193 440 193 440 213 416 213 63 185 134 185 134 197 63 197 335 184 400 184 400 197 335 197 455 180 466 191 456 201 444 190 289 187 309 180 315 194 295 202 219 183 256 183 256 197 219 197 140 183 160 183 160 198 140 198 493 182 519 182 519 197 493 197 426 178 441 191 426 204 412 190 32 177 46 189 32 202 19 189 176 180 193 180 193 197 176 197 335 170 402 170 402 186 335 186 491 169 521 169 521 186 491 186 426 163 441 176 426 191 412 179 292 170 315 170 315 186 292 186 219 170 252 170 252 185 219 185 177 171 189 171 189 185 177 185 62 170 127 168 127 182 62 184 454 167 464 177 455 186 445 176 142 169 164 169 164 185 142 185 492 158 525 158 525 172 492 172 399 159 436 159 436 169 399 169 334 157 403 157 403 170 334 170 295 157 327 157 327 171 295 171 219 156 253 156 253 170 219 170 143 156 164 156 164 171 143 171 60 157 127 155 127 169 60 171 491 142 543 142 543 158 491 158 449 143 480 143 480 157 449 157 334 142 441 142 441 157 334 157 294 143 328 143 328 157 294 157 219 143 254 143 254 157 219 157 61 143 105 143 105 156 61 156 142 141 164 141 164 157 142 157 17 150 31 136 45 149 30 162 285 133 293 133 293 141 285 141 177 132 193 132 193 145 177 145 335 130 389 130 389 143 335 143 491 129 528 129 528 143 491 143 449 129 479 129 479 143 449 143 417 130 437 130 437 142 417 142 291 129 323 129 323 143 291 143 217 130 256 128 257 143 218 145 61 129 97 129 97 143 61 143 143 128 161 128 161 145 143 145 29 123 45 132 34 149 18 139 492 117 537 117 537 130 492 130 335 117 389 117 389 130 335 130 218 118 256 118 256 128 218 128 450 116 480 116 480 130 450 130 417 116 440 116 440 131 417 131 177 116 210 116 210 130 177 130 143 116 164 116 164 131 143 131 60 115 90 115 90 132 60 132 17 121 32 110 45 124 29 136 490 105 527 105 527 115 490 115 448 105 479 105 479 115 448 115 419 106 436 106 436 114 419 114 292 105 321 105 321 116 292 116 218 105 244 105 244 115 218 115 175 105 205 105 205 115 175 115 143 105 163 105 163 116 143 116 334 104 373 104 373 115 334 115 61 104 88 104 88 115 61 115 483 89 523 89 523 99 483 99 330 87 381 87 381 100 330 100 274 87 336 87 336 100 274 100 213 87 248 87 248 100 213 100 5 85 103 85 103 101 5 101 414 64 464 64 464 78 414 78 287 64 335 64 335 78 287 78 155 62 208 62 208 79 155 79 414 47 525 48 525 64 414 63 287 48 377 48 377 64 287 64 157 48 270 48 270 63 157 63 415 34 483 34 483 48 415 48 287 33 338 33 338 50 287 50 26 34 45 34 45 52 26 52 155 32 207 32 207 49 155 49 55 32 115 31 116 51 56 53 411 2 529 2 529 19 411 19 144 2 346 0 346 17 144 19 +../../inference/ch_det_data_50/all-sum-510/test_add_2.jpg 251 404 535 404 535 430 251 430 302 339 483 339 483 385 302 385 302 303 482 303 482 326 302 326 573 217 693 217 693 240 573 240 331 216 455 214 455 240 331 242 108 212 182 214 181 244 107 242 313 98 672 99 672 121 313 120 311 60 585 61 585 87 311 86 +../../inference/ch_det_data_50/all-sum-510/test_add_20.jpg 30 345 607 345 607 372 30 372 216 292 512 292 512 323 216 323 472 270 527 270 527 287 472 287 216 266 292 266 292 287 216 287 218 238 486 238 486 265 218 265 220 215 305 215 305 236 220 236 399 190 419 190 419 207 399 207 221 185 343 185 343 209 221 209 220 160 289 160 289 182 220 182 374 120 477 122 477 147 374 145 221 122 367 120 367 145 221 147 217 80 354 82 354 117 217 115 439 33 607 33 607 60 439 60 67 15 400 15 400 46 67 46 +../../inference/ch_det_data_50/all-sum-510/test_add_3.jpg 168 326 339 324 339 341 168 343 169 286 309 288 309 314 169 312 169 219 324 219 324 235 169 235 339 219 451 216 451 232 339 235 168 200 373 200 373 216 168 216 168 180 418 180 418 197 168 197 169 147 417 147 417 165 169 165 170 117 419 117 419 141 170 141 325 62 480 62 480 93 325 93 170 62 310 59 311 91 171 94 +../../inference/ch_det_data_50/all-sum-510/test_add_4.png +../../inference/ch_det_data_50/all-sum-510/test_add_5.png 47 162 109 162 109 176 47 176 51 119 170 119 170 136 51 136 49 100 166 100 166 119 49 119 51 83 166 83 166 102 51 102 50 66 169 66 169 85 50 85 49 47 149 46 149 68 49 69 5 9 81 9 81 43 5 43 +../../inference/ch_det_data_50/all-sum-510/test_add_6.jpg 122 222 220 226 219 253 121 249 160 176 185 180 182 200 157 196 +../../inference/ch_det_data_50/all-sum-510/test_add_7.jpg 47 937 175 933 176 964 48 967 224 870 632 873 632 955 224 952 53 743 640 743 640 793 53 793 148 673 546 676 546 723 148 720 71 502 636 502 636 604 71 604 54 264 660 274 657 446 51 436 59 173 534 173 534 241 59 241 502 173 646 173 646 239 502 239 +../../inference/ch_det_data_50/all-sum-510/test_add_8.jpg 249 584 455 578 456 608 250 614 106 531 458 524 458 561 107 568 334 492 385 492 385 509 334 509 26 306 356 296 357 321 27 331 21 258 447 250 447 275 21 283 77 208 447 204 447 226 77 230 158 20 322 28 319 82 155 74 +../../inference/ch_det_data_50/all-sum-510/test_add_9.png 264 684 486 684 486 697 264 697 194 666 556 666 556 682 194 682 152 595 600 595 600 608 152 608 211 577 542 577 542 590 211 590 131 558 616 558 616 571 131 571 84 540 665 540 665 553 84 553 95 521 654 521 654 536 95 536 361 448 390 448 390 461 361 461 236 375 515 375 515 391 236 391 174 353 575 353 575 369 174 369 342 279 409 281 409 298 342 296 254 203 493 203 493 220 254 220 diff --git a/tests/results/det_results_gpu_trt_fp32_cpp.txt b/tests/results/det_results_gpu_trt_fp32_cpp.txt new file mode 100644 index 0000000000000000000000000000000000000000..fb33ce1becd834b4d3a0948f448e2cba6fd54769 --- /dev/null +++ b/tests/results/det_results_gpu_trt_fp32_cpp.txt @@ -0,0 +1,50 @@ +../../inference/ch_det_data_50/all-sum-510/00008790.jpg 208 404 282 404 282 421 208 421 58 396 107 396 107 413 58 413 197 387 296 387 296 403 197 403 161 389 174 389 174 402 161 402 34 378 134 378 134 394 34 394 323 377 329 377 329 382 323 382 199 370 292 370 292 383 199 383 216 309 274 309 274 325 216 325 161 304 173 304 173 315 161 315 370 301 437 301 437 317 370 317 30 301 135 300 135 316 30 317 221 291 270 291 270 308 221 308 58 224 106 224 106 238 58 238 216 222 274 222 274 239 216 239 161 217 174 217 174 229 161 229 33 205 133 205 133 221 33 221 221 204 270 204 270 221 221 221 73 145 385 145 385 162 73 162 52 119 119 119 119 135 52 135 72 50 296 50 296 66 72 66 54 15 118 15 118 32 54 32 +../../inference/ch_det_data_50/all-sum-510/00018946.jpg 439 327 476 327 476 341 439 341 85 284 142 284 142 308 85 308 300 278 380 278 380 299 300 299 195 262 287 275 284 299 192 286 196 196 454 218 452 244 194 222 343 182 376 182 376 193 343 193 198 162 341 169 340 195 197 188 176 130 381 145 380 165 175 150 176 100 417 118 415 148 174 130 +../../inference/ch_det_data_50/all-sum-510/00034387.jpg 263 459 741 459 741 485 263 485 346 415 421 415 421 444 346 444 544 418 568 418 568 442 544 442 684 415 712 415 712 444 684 444 173 413 228 413 228 444 173 444 872 412 910 412 910 447 872 447 55 415 76 415 76 443 55 443 855 371 927 371 927 401 855 401 347 371 420 371 420 400 347 400 672 370 725 370 725 402 672 402 537 371 571 371 571 401 537 401 136 364 230 367 229 403 135 400 55 370 76 370 76 399 55 399 856 328 927 328 927 358 856 358 350 328 420 328 420 358 350 358 672 326 725 326 725 358 672 358 539 327 571 327 571 359 539 359 170 326 229 323 231 357 171 359 56 328 76 328 76 358 56 358 297 326 316 326 316 334 297 334 854 284 927 284 927 314 854 314 672 284 725 284 725 315 672 315 344 284 431 282 432 315 345 317 537 283 570 283 570 314 537 314 170 281 228 281 228 315 170 315 55 285 75 285 75 314 55 314 856 241 927 241 927 270 856 270 346 240 464 240 464 271 346 271 154 241 228 241 228 271 154 271 672 240 726 240 726 271 672 271 530 240 573 240 573 272 530 272 55 241 76 241 76 270 55 270 854 196 927 198 926 228 853 225 672 197 728 197 728 228 672 228 342 199 439 194 441 224 344 230 175 196 229 196 229 226 175 226 55 199 75 199 75 228 55 228 526 193 578 193 578 228 526 228 347 154 420 154 420 182 347 182 853 153 927 153 927 181 853 181 175 153 228 153 228 184 175 184 668 152 725 152 725 182 668 182 536 153 572 153 572 183 536 183 55 155 76 155 76 183 55 183 347 109 420 109 420 138 347 138 172 109 229 109 229 140 172 140 544 111 565 111 565 138 544 138 51 110 77 110 77 140 51 140 639 105 730 105 730 141 639 141 815 101 929 109 927 141 813 133 812 65 953 65 953 93 812 93 305 64 447 66 447 94 305 92 671 65 725 65 725 95 671 95 173 64 229 66 228 96 172 94 37 64 91 66 90 98 36 96 527 63 581 63 581 95 527 95 333 18 671 18 671 45 333 45 +../../inference/ch_det_data_50/all-sum-510/00037951.jpg 432 973 552 977 552 994 432 991 431 931 554 931 554 970 431 970 29 520 101 520 101 546 29 546 29 441 146 441 146 465 29 465 233 333 328 331 328 356 233 358 121 250 439 250 439 287 121 287 180 205 380 205 380 229 180 229 257 103 323 121 305 184 239 165 35 57 147 57 147 82 35 82 +../../inference/ch_det_data_50/all-sum-510/00044782.jpg 222 214 247 214 247 230 222 230 162 214 183 214 183 231 162 231 122 190 216 190 216 203 122 203 90 82 252 82 252 100 90 100 70 61 279 61 279 78 70 78 103 14 244 14 244 46 103 46 +../../inference/ch_det_data_50/all-sum-510/00067516.jpg 139 806 596 807 596 824 139 823 46 782 699 782 699 800 46 800 577 749 669 749 669 766 577 766 353 748 397 748 397 769 353 769 220 749 261 749 261 767 220 767 475 748 502 748 502 769 475 769 68 746 134 749 133 766 67 763 574 680 670 680 670 700 574 700 474 680 519 680 519 701 474 701 352 680 397 680 397 701 352 701 68 679 134 682 133 700 67 697 219 678 245 681 242 702 216 698 575 614 669 614 669 633 575 633 68 611 134 614 133 633 67 630 474 613 501 613 501 633 474 633 353 613 379 613 379 634 353 634 219 612 245 612 245 633 219 633 576 546 669 546 669 566 576 566 474 545 519 545 519 566 474 566 351 544 381 544 381 567 351 567 219 545 245 545 245 566 219 566 67 541 134 544 133 565 66 562 67 477 134 480 133 501 66 498 584 479 666 479 666 499 584 499 474 478 519 478 519 500 474 500 352 478 397 478 397 500 352 500 218 477 246 477 246 502 218 502 579 424 666 427 665 451 578 448 345 428 411 428 411 449 345 449 66 425 151 427 151 451 66 449 473 427 515 427 515 450 473 450 218 427 259 427 259 450 218 450 282 396 479 397 479 420 282 419 83 316 667 316 667 335 83 335 64 277 666 277 666 292 64 292 456 209 585 209 585 226 456 226 311 208 373 208 373 227 311 227 163 208 227 208 227 227 163 227 504 150 541 150 541 168 504 168 264 47 485 47 485 69 264 69 +../../inference/ch_det_data_50/all-sum-510/00088568.jpg 57 443 119 443 119 456 57 456 309 413 744 413 744 430 309 430 309 375 737 375 737 392 309 392 415 337 559 337 559 351 415 351 307 322 674 321 674 338 307 339 275 292 349 294 349 313 275 311 52 285 210 285 210 301 52 301 273 262 420 262 420 279 273 279 55 262 249 262 249 279 55 279 669 247 697 247 697 262 669 262 601 247 629 247 629 262 601 262 531 247 559 247 559 262 531 262 461 247 489 247 489 262 461 262 277 247 310 247 310 261 277 261 55 240 142 240 142 254 55 254 276 230 400 230 400 246 276 246 741 227 749 237 741 246 732 237 665 230 701 230 701 245 665 245 598 230 631 230 631 245 598 245 527 230 563 230 563 245 527 245 458 230 493 230 493 245 458 245 52 213 212 215 212 233 52 231 732 214 747 214 747 227 732 227 662 212 706 212 706 230 662 230 594 213 638 213 638 227 594 227 522 213 570 213 570 227 522 227 453 213 497 213 497 227 453 227 278 213 352 213 352 227 278 227 734 198 748 198 748 210 734 210 667 196 702 196 702 210 667 210 599 196 633 196 633 211 599 211 527 196 564 196 564 210 527 210 459 196 493 196 493 210 459 210 276 194 418 195 418 212 276 211 54 190 241 190 241 207 54 207 664 179 705 179 705 194 664 194 278 178 352 180 352 195 278 193 733 179 747 179 747 194 733 194 596 178 635 178 635 193 596 193 523 177 567 177 567 195 523 195 456 178 495 178 495 193 456 193 55 170 142 170 142 184 55 184 733 164 748 164 748 176 733 176 664 162 705 162 705 176 664 176 597 162 635 162 635 176 597 176 525 162 566 162 566 176 525 176 456 162 494 162 494 176 456 176 277 160 399 160 399 176 277 176 54 146 149 146 149 161 54 161 452 145 497 145 497 160 452 160 729 144 748 144 748 162 729 162 662 143 706 143 706 161 662 161 595 144 636 144 636 159 595 159 521 143 566 141 567 159 522 161 277 143 310 143 310 159 277 159 275 120 430 120 430 140 275 140 50 119 234 120 234 140 50 139 402 90 703 90 703 107 402 107 46 78 282 78 282 98 46 98 324 67 745 68 745 86 324 85 666 47 743 47 743 64 666 64 295 47 435 47 435 63 295 63 64 30 232 27 233 65 65 68 +../../inference/ch_det_data_50/all-sum-510/00091741.jpg 46 335 87 335 87 360 46 360 98 209 258 209 258 232 98 232 101 189 258 190 258 206 101 205 87 99 268 97 269 184 88 186 92 45 266 53 263 117 89 109 89 10 258 12 258 38 89 36 +../../inference/ch_det_data_50/all-sum-510/00105313.jpg 289 261 407 261 407 277 289 277 152 260 265 260 265 276 152 276 10 257 74 259 74 276 10 274 32 230 134 230 134 245 32 245 34 215 218 215 218 228 34 228 32 199 148 199 148 214 32 214 31 181 217 182 217 199 31 198 34 169 107 169 107 182 34 182 34 153 126 153 126 166 34 166 33 136 144 137 144 150 33 149 34 122 177 122 177 135 34 135 32 104 178 104 178 120 32 120 32 91 102 91 102 104 32 104 33 75 121 75 121 88 33 88 32 60 121 60 121 73 32 73 34 44 121 44 121 57 34 57 31 28 144 28 144 43 31 43 177 20 415 15 416 51 178 56 24 10 152 10 152 26 24 26 +../../inference/ch_det_data_50/all-sum-510/00134770.jpg 386 645 457 645 457 658 386 658 406 618 486 617 486 635 406 636 111 533 272 530 272 550 111 553 110 501 445 496 445 516 110 521 110 469 445 465 445 485 110 489 110 438 446 433 446 453 110 458 109 407 445 403 445 423 109 427 151 375 443 372 443 392 151 395 183 336 371 334 371 358 183 360 224 307 272 308 272 318 224 317 73 96 517 101 516 220 72 215 +../../inference/ch_det_data_50/all-sum-510/00145943.jpg 390 243 751 274 735 454 375 423 88 90 302 90 302 121 88 121 43 40 329 37 329 78 43 81 +../../inference/ch_det_data_50/all-sum-510/00147605.jpg 514 605 786 604 786 629 514 630 116 521 226 521 226 561 116 561 252 522 309 522 309 558 252 558 713 500 902 503 902 539 713 536 254 501 296 501 296 519 254 519 345 479 475 479 475 517 345 517 251 483 296 483 296 501 251 501 350 456 447 456 447 471 350 471 143 442 203 442 203 469 143 469 727 370 880 370 880 422 727 422 526 369 684 369 684 421 526 421 140 367 490 367 490 423 140 423 742 313 872 313 872 338 742 338 798 155 888 155 888 192 798 192 272 140 457 140 457 161 272 161 737 114 895 118 894 158 736 155 107 110 206 110 206 131 107 131 268 92 464 94 464 134 268 131 +../../inference/ch_det_data_50/all-sum-510/00150341.jpg 99 643 300 643 300 664 99 664 113 615 289 615 289 633 113 633 82 591 320 590 320 611 82 612 30 563 315 561 315 582 30 584 30 513 169 513 169 531 30 531 32 488 111 488 111 506 32 506 357 458 465 461 464 486 356 483 26 458 271 459 271 483 26 482 338 438 423 442 422 461 337 457 64 437 145 437 145 455 64 455 205 414 293 414 293 436 205 436 318 407 442 411 441 439 317 435 42 404 176 407 176 435 42 432 28 381 137 381 137 405 28 405 +../../inference/ch_det_data_50/all-sum-510/00150669.jpg 647 698 683 698 683 718 647 718 515 684 551 684 551 721 515 721 650 687 680 687 680 702 650 702 920 673 938 673 938 686 920 686 518 670 548 670 548 690 518 690 785 670 808 670 808 688 785 688 590 670 608 670 608 688 590 688 732 665 745 679 732 692 718 679 652 668 680 668 680 689 652 689 271 665 423 665 423 690 271 690 130 664 205 664 205 690 130 690 44 664 111 664 111 689 44 689 781 628 812 628 812 663 781 663 643 626 687 626 687 666 643 666 514 627 550 627 550 665 514 665 654 617 673 617 673 629 654 629 858 617 868 617 868 628 858 628 727 617 736 617 736 628 727 628 920 614 940 614 940 631 920 631 785 614 807 614 807 631 785 631 371 603 421 603 421 620 371 620 83 600 216 603 216 624 83 620 46 602 72 602 72 624 46 624 780 569 817 573 813 610 776 606 922 559 936 559 936 575 922 575 856 559 869 559 869 575 856 575 61 552 411 552 411 569 61 569 61 531 117 533 117 547 61 545 859 527 868 527 868 539 859 539 923 525 936 525 936 542 923 542 787 524 807 524 807 540 787 540 526 526 536 526 536 536 526 536 261 511 396 511 396 528 261 528 120 512 246 512 246 526 120 526 47 512 120 512 120 527 47 527 753 491 829 491 829 508 753 508 636 491 712 491 712 508 636 508 517 491 593 491 593 508 517 508 84 448 125 448 125 463 84 463 221 448 238 448 238 462 221 462 682 444 869 444 869 461 682 461 561 444 667 444 667 461 561 461 489 445 545 445 545 459 489 459 183 437 209 437 209 459 183 459 52 429 73 437 64 464 42 456 222 430 278 430 278 445 222 445 86 430 145 430 145 445 86 445 505 382 617 381 617 398 505 399 701 380 758 380 758 398 701 398 307 371 365 371 365 386 307 386 90 371 168 371 168 386 90 386 686 334 821 334 821 352 686 352 496 333 659 333 659 350 496 350 207 314 245 314 245 333 207 333 497 287 642 287 642 304 497 304 670 286 804 286 804 304 670 304 668 239 817 239 817 257 668 257 495 239 644 239 644 257 495 257 668 193 816 193 816 209 668 209 496 193 644 193 644 209 496 209 668 144 816 144 816 161 668 161 497 144 646 144 646 161 497 161 488 102 546 102 546 121 488 121 845 21 900 21 900 43 845 43 25 18 702 18 702 39 25 39 896 10 997 14 996 46 895 42 +../../inference/ch_det_data_50/all-sum-510/00152568.jpg 2 250 285 252 285 281 2 279 195 231 255 231 255 241 195 241 198 158 282 164 277 230 193 224 177 148 251 148 251 161 177 161 +../../inference/ch_det_data_50/all-sum-510/00155628.jpg 147 898 506 901 506 925 147 922 519 892 562 894 561 912 518 910 59 884 83 884 83 895 59 895 148 877 505 881 505 902 148 897 523 833 641 837 640 858 522 854 68 832 187 834 187 855 68 853 245 554 468 554 468 570 245 570 307 506 405 508 405 526 307 523 243 481 460 483 460 504 243 502 250 420 460 422 460 454 250 452 193 377 518 379 518 410 193 408 473 194 625 194 625 212 473 212 70 127 643 129 643 163 70 161 478 39 599 35 602 101 481 105 67 23 136 14 140 44 71 54 +../../inference/ch_det_data_50/all-sum-510/00173364.jpg 7 176 58 176 58 201 7 201 135 118 196 118 196 135 135 135 38 75 87 75 87 105 38 105 249 19 313 19 313 38 249 38 19 15 105 15 105 40 19 40 +../../inference/ch_det_data_50/all-sum-510/00175503.jpg 39 256 503 252 504 362 40 366 49 198 351 175 357 253 55 276 +../../inference/ch_det_data_50/all-sum-510/00193218.jpg 282 373 411 373 411 389 282 389 170 373 223 373 223 390 170 390 108 373 162 373 162 390 108 390 276 357 358 357 358 371 276 371 169 357 222 357 222 371 169 371 106 356 175 356 175 373 106 373 408 356 493 356 493 370 408 370 24 185 64 185 64 203 24 203 500 184 558 184 558 201 500 201 379 185 421 183 422 200 380 202 283 184 311 184 311 202 283 202 173 185 197 185 197 201 173 201 498 163 544 163 544 177 498 177 379 162 412 162 412 177 379 177 261 161 303 161 303 178 261 178 174 161 231 161 231 178 174 178 24 161 80 161 80 178 24 178 385 139 489 139 489 155 385 155 26 137 133 137 133 153 26 153 442 115 538 117 538 134 442 132 345 117 406 117 406 131 345 131 259 117 303 117 303 131 259 131 28 112 229 114 229 132 28 130 130 90 395 93 395 110 130 107 560 81 585 81 585 109 560 109 +../../inference/ch_det_data_50/all-sum-510/00195033.jpg 221 302 240 302 240 309 221 309 487 262 534 264 533 282 486 280 125 249 194 249 194 285 125 285 336 248 364 248 364 268 336 268 317 221 381 223 381 240 317 238 431 224 450 224 450 236 431 236 360 202 539 202 539 218 360 218 87 199 148 201 148 218 87 216 371 181 450 181 450 195 371 195 327 180 354 180 354 194 327 194 94 178 241 178 241 195 94 195 431 159 559 159 559 175 431 175 128 148 289 149 289 166 128 165 35 145 75 148 74 163 34 160 487 146 501 146 501 153 487 153 100 143 122 143 122 154 100 154 370 127 505 126 505 140 370 141 98 125 194 125 194 139 98 139 320 125 338 125 338 136 320 136 35 121 78 121 78 135 35 135 322 104 338 104 338 116 322 116 371 101 503 101 503 117 371 117 348 103 362 103 362 115 348 115 37 101 81 101 81 114 37 114 97 98 207 99 207 116 97 115 305 89 317 89 317 97 305 97 346 86 364 86 364 97 346 97 319 85 342 85 342 100 319 100 357 82 515 80 515 96 357 98 40 81 90 81 90 94 40 94 92 77 242 78 242 95 92 94 312 65 394 65 394 79 312 79 240 64 290 64 290 78 240 78 183 52 222 52 222 66 183 66 468 47 547 47 547 61 468 61 422 34 438 34 438 55 422 55 464 29 551 29 551 43 464 43 206 19 330 21 330 42 206 40 +../../inference/ch_det_data_50/all-sum-510/00208502.jpg 556 535 630 535 630 569 556 569 204 537 284 537 284 552 204 552 142 512 191 512 191 526 142 526 248 511 309 511 309 525 248 525 41 499 118 499 118 520 41 520 465 490 558 490 558 510 465 510 666 489 680 493 677 505 662 501 724 490 739 490 739 503 724 503 40 450 118 448 118 469 40 471 173 448 237 448 237 465 173 465 93 403 121 403 121 424 93 424 38 403 63 403 63 424 38 424 214 392 232 405 220 422 203 409 39 357 58 357 58 375 39 375 92 355 121 355 121 375 92 375 187 339 248 337 249 363 188 365 458 319 551 317 551 338 458 340 457 271 553 271 553 292 457 292 562 271 737 267 737 288 562 292 516 225 548 225 548 245 516 245 620 185 675 185 675 202 620 202 456 130 550 128 550 149 456 151 571 104 789 98 789 121 571 127 121 46 291 46 291 99 121 99 536 36 710 36 710 92 536 92 +../../inference/ch_det_data_50/all-sum-510/00224225.jpg 135 426 157 426 157 449 135 449 199 402 480 408 479 461 198 455 200 225 474 225 474 394 200 394 130 264 174 264 174 281 130 281 343 205 458 205 458 232 343 232 197 186 349 194 346 242 194 234 7 41 160 39 161 115 8 117 +../../inference/ch_det_data_50/all-sum-510/00227746.jpg 142 230 210 230 210 240 142 240 72 230 130 230 130 240 72 240 215 228 386 228 386 240 215 240 290 208 347 208 347 224 290 224 142 179 165 181 162 209 139 208 171 152 347 152 347 167 171 167 143 110 279 112 279 135 143 132 202 53 387 53 387 69 202 69 141 47 193 47 193 64 141 64 +../../inference/ch_det_data_50/all-sum-510/00229605.jpg 742 528 882 528 882 545 742 545 232 497 590 496 590 524 232 525 5 496 229 496 229 524 5 524 734 494 884 497 884 522 734 519 605 493 718 488 719 517 606 522 2 242 865 227 866 291 3 305 477 26 884 26 884 77 477 77 +../../inference/ch_det_data_50/all-sum-510/00233011.jpg 61 225 293 225 293 243 61 243 11 218 43 218 43 252 11 252 60 177 120 177 120 196 60 196 11 169 44 169 44 204 11 204 59 127 149 129 149 148 59 146 11 123 45 123 45 156 11 156 124 87 239 87 239 105 124 105 147 49 218 49 218 67 147 67 257 44 354 47 353 71 256 68 8 47 54 47 54 69 8 69 275 10 346 10 346 32 275 32 26 9 75 9 75 32 26 32 +../../inference/ch_det_data_50/all-sum-510/00233625.jpg 370 395 635 397 635 445 370 443 67 210 935 204 936 325 68 331 +../../inference/ch_det_data_50/all-sum-510/00233634.jpg 213 637 264 637 264 706 213 706 522 634 572 634 572 697 522 697 641 522 684 522 684 570 641 570 95 514 155 514 155 592 95 592 754 394 762 394 762 403 754 403 677 362 730 360 733 432 679 433 53 360 109 360 109 436 53 436 77 207 157 207 157 282 77 282 642 204 695 204 695 274 642 274 208 88 262 85 266 165 212 168 362 47 428 44 432 117 366 120 +../../inference/ch_det_data_50/all-sum-510/00234400.jpg 156 419 739 419 739 439 156 439 157 393 653 393 653 412 157 412 38 390 129 390 129 413 38 413 156 339 307 342 307 365 156 362 36 342 125 342 125 363 36 363 519 293 705 293 705 316 519 316 393 290 485 288 485 316 393 318 156 291 271 291 271 315 156 315 35 291 127 291 127 315 35 315 155 242 360 242 360 269 155 269 34 242 83 242 83 270 34 270 27 150 159 150 159 177 27 177 280 96 507 96 507 113 280 113 313 44 477 47 476 90 312 87 516 50 664 52 664 68 516 67 485 17 708 15 708 45 485 47 +../../inference/ch_det_data_50/all-sum-510/00234883.jpg 64 122 318 117 319 193 65 197 71 118 122 118 122 132 71 132 381 62 506 61 506 75 381 76 54 25 369 23 369 116 54 118 385 26 503 23 503 47 385 50 +../../inference/ch_det_data_50/all-sum-510/test_add_0.jpg 311 521 391 521 391 534 311 534 277 499 426 499 426 516 277 516 259 445 438 445 438 461 259 461 210 426 487 426 487 443 210 443 244 385 460 385 460 411 244 411 220 327 476 327 476 373 220 373 205 204 494 208 493 279 204 275 264 163 423 165 423 198 264 196 15 17 203 15 203 45 15 47 +../../inference/ch_det_data_50/all-sum-510/test_add_1.png +../../inference/ch_det_data_50/all-sum-510/test_add_10.png 155 123 187 123 187 174 155 174 160 105 184 105 184 131 160 131 116 45 155 44 158 176 119 176 63 30 102 31 99 172 60 171 +../../inference/ch_det_data_50/all-sum-510/test_add_11.jpg 1388 755 1486 755 1486 794 1388 794 1011 752 1210 752 1210 802 1011 802 681 752 879 752 879 801 681 801 355 750 568 745 570 796 356 801 76 748 266 743 268 796 78 801 600 645 1155 645 1155 706 600 706 600 562 1151 553 1151 614 600 622 596 478 1070 470 1070 529 596 537 595 390 1095 385 1095 444 595 448 600 303 1061 303 1061 362 600 362 353 180 1521 180 1521 265 353 265 59 40 261 40 261 91 59 91 1303 39 1495 39 1495 90 1303 90 971 37 1173 32 1175 83 973 88 668 37 864 32 866 83 670 88 361 32 561 32 561 88 361 88 +../../inference/ch_det_data_50/all-sum-510/test_add_12.jpg 9 590 140 592 140 615 9 613 107 520 908 524 908 571 107 566 632 448 905 445 905 481 632 484 110 445 468 447 468 487 110 485 580 303 682 301 683 351 581 353 368 257 568 262 565 361 364 355 61 83 856 85 856 164 61 162 +../../inference/ch_det_data_50/all-sum-510/test_add_13.jpg 68 93 118 96 116 116 66 113 +../../inference/ch_det_data_50/all-sum-510/test_add_14.jpg 28 94 238 92 238 130 28 132 27 50 241 48 241 88 27 90 +../../inference/ch_det_data_50/all-sum-510/test_add_15.jpg 140 251 354 251 354 268 140 268 203 212 407 217 407 234 203 229 104 210 194 212 194 229 104 227 153 155 287 159 287 175 153 172 143 134 307 140 307 157 143 150 106 136 147 136 147 149 106 149 106 101 278 107 277 126 105 119 106 70 247 77 246 97 105 90 106 37 211 40 210 64 105 61 +../../inference/ch_det_data_50/all-sum-510/test_add_16.jpg 380 740 750 740 750 780 380 780 360 700 472 700 472 728 360 728 1550 698 1580 698 1580 750 1550 750 1256 694 1444 694 1444 722 1256 722 1242 659 1452 659 1452 690 1242 690 384 643 672 643 672 682 384 682 1226 623 1474 621 1474 655 1226 657 356 599 582 599 582 631 356 631 1198 587 1496 587 1496 619 1198 619 1164 553 1534 553 1534 585 1164 585 378 549 642 549 642 589 378 589 354 500 520 500 520 540 354 540 772 258 1128 258 1128 303 772 303 372 208 508 208 508 303 372 303 774 208 1092 214 1092 260 774 254 +../../inference/ch_det_data_50/all-sum-510/test_add_17.jpg 319 255 394 257 394 271 319 269 306 236 407 238 407 257 306 255 306 221 413 226 412 243 305 237 93 135 387 140 386 209 92 204 69 92 401 100 401 127 69 118 66 74 225 77 225 95 66 92 64 58 227 60 227 77 64 75 +../../inference/ch_det_data_50/all-sum-510/test_add_18.jpg 153 908 616 914 616 935 153 930 464 786 718 788 718 816 464 813 552 750 666 755 665 792 551 788 117 538 190 538 190 572 117 572 115 472 676 484 675 530 114 518 119 427 670 439 670 471 119 459 119 374 676 380 676 411 119 405 555 261 677 262 677 280 555 279 164 258 336 258 336 275 164 275 342 194 457 196 457 221 342 219 307 172 490 172 490 190 307 190 252 125 540 129 540 171 252 168 345 90 488 92 488 110 345 108 283 40 569 48 567 84 282 76 235 30 268 30 268 64 235 64 +../../inference/ch_det_data_50/all-sum-510/test_add_19.jpg 22 293 44 293 44 304 22 304 62 291 106 291 106 305 62 305 61 279 107 279 107 291 61 291 218 278 247 278 247 292 218 292 176 278 210 278 210 291 176 291 141 275 166 275 166 307 141 307 7 266 20 266 20 278 7 278 219 264 245 264 245 279 219 279 60 263 133 263 133 279 60 279 22 264 49 264 49 279 22 279 218 251 250 251 250 266 218 266 63 251 133 251 133 264 63 264 22 250 45 250 45 265 22 265 7 251 20 251 20 263 7 263 8 240 18 240 18 249 8 249 61 236 115 236 115 252 61 252 23 234 49 237 47 253 21 250 210 235 246 235 246 252 210 252 143 236 166 236 166 252 143 252 493 224 533 224 533 241 493 241 334 224 355 224 355 239 334 239 287 224 315 224 315 239 287 239 61 224 114 224 114 238 61 238 7 226 18 226 18 235 7 235 219 223 250 223 250 237 219 237 141 224 167 221 169 235 143 238 23 223 49 223 49 239 23 239 494 212 526 212 526 225 494 225 418 211 439 211 439 226 418 226 335 211 400 211 400 224 335 224 291 211 322 211 322 224 291 224 220 211 251 211 251 224 220 224 144 212 167 212 167 223 144 223 60 211 115 209 115 222 60 224 24 210 50 210 50 224 24 224 336 197 384 197 384 211 336 211 63 198 89 198 89 209 63 209 492 195 542 195 542 213 492 213 219 195 257 195 257 213 219 213 177 196 207 196 207 210 177 210 144 197 158 197 158 210 144 210 23 196 44 196 44 212 23 212 416 193 440 193 440 213 416 213 63 185 134 185 134 197 63 197 335 184 400 184 400 197 335 197 455 180 466 191 456 201 444 190 289 187 309 180 315 194 295 202 219 183 256 183 256 197 219 197 140 183 160 183 160 198 140 198 493 182 519 182 519 197 493 197 426 178 441 191 426 204 412 190 32 177 46 189 32 202 19 189 176 180 193 180 193 197 176 197 335 170 402 170 402 186 335 186 491 169 521 169 521 186 491 186 426 163 441 176 426 191 412 179 292 170 315 170 315 186 292 186 219 170 252 170 252 185 219 185 177 171 189 171 189 185 177 185 62 170 127 168 127 182 62 184 454 167 464 177 455 186 445 176 142 169 164 169 164 185 142 185 492 158 525 158 525 172 492 172 399 159 436 159 436 169 399 169 334 157 403 157 403 170 334 170 295 157 327 157 327 171 295 171 219 156 253 156 253 170 219 170 143 156 164 156 164 171 143 171 60 157 127 155 127 169 60 171 491 142 543 142 543 158 491 158 449 143 480 143 480 157 449 157 334 142 441 142 441 157 334 157 294 143 328 143 328 157 294 157 219 143 254 143 254 157 219 157 61 143 105 143 105 156 61 156 142 141 164 141 164 157 142 157 17 150 31 136 45 149 30 162 285 133 293 133 293 141 285 141 177 132 193 132 193 145 177 145 335 130 389 130 389 143 335 143 491 129 528 129 528 143 491 143 449 129 479 129 479 143 449 143 291 129 323 129 323 143 291 143 217 130 256 128 257 143 218 145 61 129 97 129 97 143 61 143 416 128 439 128 439 143 416 143 143 128 161 128 161 145 143 145 29 123 45 132 34 149 18 139 492 117 537 117 537 130 492 130 335 117 389 117 389 130 335 130 218 118 256 118 256 128 218 128 450 116 480 116 480 130 450 130 417 116 440 116 440 131 417 131 177 116 210 116 210 130 177 130 143 116 164 116 164 131 143 131 60 115 90 115 90 132 60 132 17 121 32 110 45 124 29 136 490 105 527 105 527 115 490 115 448 105 479 105 479 115 448 115 419 106 436 106 436 114 419 114 292 105 321 105 321 116 292 116 218 105 244 105 244 115 218 115 175 105 205 105 205 115 175 115 143 105 163 105 163 116 143 116 334 104 373 104 373 115 334 115 61 104 88 104 88 115 61 115 483 89 523 89 523 99 483 99 330 87 381 87 381 100 330 100 274 87 336 87 336 100 274 100 213 87 248 87 248 100 213 100 5 85 103 85 103 101 5 101 414 64 464 64 464 78 414 78 287 64 335 64 335 78 287 78 155 62 208 62 208 79 155 79 414 47 525 48 525 64 414 63 287 48 377 48 377 64 287 64 157 48 270 48 270 63 157 63 415 34 483 34 483 48 415 48 287 33 338 33 338 50 287 50 26 34 45 34 45 52 26 52 155 32 207 32 207 49 155 49 55 32 115 31 116 51 56 53 411 2 529 2 529 19 411 19 144 2 346 0 346 17 144 19 +../../inference/ch_det_data_50/all-sum-510/test_add_2.jpg 251 404 535 404 535 430 251 430 302 339 483 339 483 385 302 385 302 303 482 303 482 326 302 326 573 217 693 217 693 240 573 240 331 216 455 214 455 240 331 242 108 212 182 214 181 244 107 242 313 98 672 99 672 121 313 120 311 60 585 61 585 87 311 86 +../../inference/ch_det_data_50/all-sum-510/test_add_20.jpg 30 345 607 345 607 372 30 372 216 292 512 292 512 323 216 323 472 270 527 270 527 287 472 287 216 266 292 266 292 287 216 287 218 238 486 238 486 265 218 265 220 215 305 215 305 236 220 236 221 185 343 185 343 209 221 209 220 160 289 160 289 182 220 182 374 120 477 122 477 147 374 145 221 122 367 120 367 145 221 147 217 80 354 82 354 117 217 115 439 33 607 33 607 60 439 60 67 15 400 15 400 46 67 46 +../../inference/ch_det_data_50/all-sum-510/test_add_3.jpg 168 326 339 324 339 341 168 343 169 286 309 288 309 314 169 312 169 219 324 219 324 235 169 235 339 219 451 216 451 232 339 235 168 200 373 200 373 216 168 216 168 180 418 180 418 197 168 197 169 147 417 147 417 165 169 165 170 117 419 117 419 141 170 141 325 62 480 62 480 93 325 93 170 62 310 59 311 91 171 94 +../../inference/ch_det_data_50/all-sum-510/test_add_4.png +../../inference/ch_det_data_50/all-sum-510/test_add_5.png 47 162 109 162 109 176 47 176 51 119 170 119 170 136 51 136 49 100 166 100 166 119 49 119 51 83 166 83 166 102 51 102 50 66 169 66 169 85 50 85 49 47 149 46 149 68 49 69 5 9 81 9 81 43 5 43 +../../inference/ch_det_data_50/all-sum-510/test_add_6.jpg 122 222 220 226 219 253 121 249 160 176 185 180 182 200 157 196 +../../inference/ch_det_data_50/all-sum-510/test_add_7.jpg 47 937 175 933 176 964 48 967 224 870 632 873 632 955 224 952 53 743 640 743 640 793 53 793 148 673 546 676 546 723 148 720 71 502 636 502 636 604 71 604 54 264 660 274 657 446 51 436 59 173 534 173 534 241 59 241 502 173 646 173 646 239 502 239 +../../inference/ch_det_data_50/all-sum-510/test_add_8.jpg 249 584 455 578 456 608 250 614 106 531 458 524 458 561 107 568 334 492 385 492 385 509 334 509 26 306 356 296 357 321 27 331 21 258 447 250 447 275 21 283 77 208 447 204 447 226 77 230 158 20 322 28 319 82 155 74 +../../inference/ch_det_data_50/all-sum-510/test_add_9.png 264 684 486 684 486 697 264 697 194 666 556 666 556 682 194 682 152 595 600 595 600 608 152 608 211 577 543 577 543 590 211 590 131 559 617 558 617 572 131 573 84 540 665 540 665 553 84 553 95 521 654 521 654 536 95 536 361 448 390 448 390 461 361 461 236 375 515 375 515 391 236 391 174 353 575 353 575 369 174 369 342 279 409 281 409 298 342 296 254 203 493 203 493 220 254 220 diff --git a/tests/test.sh b/tests/test.sh index 8040793f1d7e25426ac015b80383c790aab79a2e..5649e344b76cf4485db533eee4035e1cbdd5adae 100644 --- a/tests/test.sh +++ b/tests/test.sh @@ -321,7 +321,7 @@ function func_serving(){ if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then continue fi - if [[ ${use_trt} = "Falg_quantse" || ${precision} =~ "int8" ]]; then + if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [[ ${_flag_quant} = "True" ]]; then continue fi _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_1.log" @@ -433,7 +433,9 @@ if [ ${MODE} = "infer" ]; then save_infer_dir=$(dirname $infer_model) set_export_weight=$(func_set_params "${export_weight}" "${infer_model}") set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}") - export_cmd="${python} ${norm_export} ${set_export_weight} ${set_save_infer_key}" + export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}" + echo ${infer_run_exports[Count]} + echo $export_cmd eval $export_cmd status_export=$? status_check $status_export "${export_cmd}" "${status_log}" diff --git a/tools/export_model.py b/tools/export_model.py index cae87aca129134d64711e364bf10428d69500a06..d8fe297235b2f5de6861d387cff64e8737cd30c0 100755 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -60,6 +60,8 @@ def export_single_model(model, arch_config, save_path, logger): "When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training" ) infer_shape[-1] = 100 + if arch_config["algorithm"] == "NRTR": + infer_shape = [1, 32, 100] elif arch_config["model_type"] == "table": infer_shape = [3, 488, 488] model = to_static( diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py index 6347ca6dc719f0d489736dbf285eedd775d3790e..b24ad2bbb504caf1f262b4e47625348ce32d6fce 100755 --- a/tools/infer/predict_det.py +++ b/tools/infer/predict_det.py @@ -89,6 +89,14 @@ class TextDetector(object): postprocess_params["sample_pts_num"] = 2 postprocess_params["expand_scale"] = 1.0 postprocess_params["shrink_ratio_of_width"] = 0.3 + elif self.det_algorithm == "PSE": + postprocess_params['name'] = 'PSEPostProcess' + postprocess_params["thresh"] = args.det_pse_thresh + postprocess_params["box_thresh"] = args.det_pse_box_thresh + postprocess_params["min_area"] = args.det_pse_min_area + postprocess_params["box_type"] = args.det_pse_box_type + postprocess_params["scale"] = args.det_pse_scale + self.det_pse_box_type = args.det_pse_box_type else: logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) sys.exit(0) @@ -209,7 +217,7 @@ class TextDetector(object): preds['f_score'] = outputs[1] preds['f_tco'] = outputs[2] preds['f_tvo'] = outputs[3] - elif self.det_algorithm == 'DB': + elif self.det_algorithm in ['DB', 'PSE']: preds['maps'] = outputs[0] else: raise NotImplementedError @@ -217,7 +225,9 @@ class TextDetector(object): #self.predictor.try_shrink_memory() post_result = self.postprocess_op(preds, shape_list) dt_boxes = post_result[0]['points'] - if self.det_algorithm == "SAST" and self.det_sast_polygon: + if (self.det_algorithm == "SAST" and + self.det_sast_polygon) or (self.det_algorithm == "PSE" and + self.det_pse_box_type == 'poly'): dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape) else: dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index 7401a16ee662ceed1f8010adc3db0769e3efadb6..332cffd5395f8f511089b0bfde762820af7bbe8c 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -13,7 +13,7 @@ # limitations under the License. import os import sys - +from PIL import Image __dir__ = os.path.dirname(os.path.abspath(__file__)) sys.path.append(__dir__) sys.path.append(os.path.abspath(os.path.join(__dir__, '../..'))) @@ -61,6 +61,13 @@ class TextRecognizer(object): "character_dict_path": args.rec_char_dict_path, "use_space_char": args.use_space_char } + elif self.rec_algorithm == 'NRTR': + postprocess_params = { + 'name': 'NRTRLabelDecode', + "character_type": args.rec_char_type, + "character_dict_path": args.rec_char_dict_path, + "use_space_char": args.use_space_char + } self.postprocess_op = build_post_process(postprocess_params) self.predictor, self.input_tensor, self.output_tensors, self.config = \ utility.create_predictor(args, 'rec', logger) @@ -87,6 +94,16 @@ class TextRecognizer(object): def resize_norm_img(self, img, max_wh_ratio): imgC, imgH, imgW = self.rec_image_shape + if self.rec_algorithm == 'NRTR': + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + # return padding_im + image_pil = Image.fromarray(np.uint8(img)) + img = image_pil.resize([100, 32], Image.ANTIALIAS) + img = np.array(img) + norm_img = np.expand_dims(img, -1) + norm_img = norm_img.transpose((2, 0, 1)) + return norm_img.astype(np.float32) / 128. - 1. + assert imgC == img.shape[2] max_wh_ratio = max(max_wh_ratio, imgW / imgH) imgW = int((32 * max_wh_ratio)) @@ -252,14 +269,16 @@ class TextRecognizer(object): else: self.input_tensor.copy_from_cpu(norm_img_batch) self.predictor.run() - outputs = [] for output_tensor in self.output_tensors: output = output_tensor.copy_to_cpu() outputs.append(output) if self.benchmark: self.autolog.times.stamp() - preds = outputs[0] + if len(outputs) != 1: + preds = outputs + else: + preds = outputs[0] rec_result = self.postprocess_op(preds) for rno in range(len(rec_result)): rec_res[indices[beg_img_no + rno]] = rec_result[rno] diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 466f824c29d5493f56e56bc3243fc907aec24d60..538f55c42b223f9741c5c7006dd7d1478ce1920b 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -63,6 +63,13 @@ def init_args(): parser.add_argument("--det_sast_nms_thresh", type=float, default=0.2) parser.add_argument("--det_sast_polygon", type=str2bool, default=False) + # PSE parmas + parser.add_argument("--det_pse_thresh", type=float, default=0) + parser.add_argument("--det_pse_box_thresh", type=float, default=0.85) + parser.add_argument("--det_pse_min_area", type=float, default=16) + parser.add_argument("--det_pse_box_type", type=str, default='box') + parser.add_argument("--det_pse_scale", type=int, default=1) + # params for text recognizer parser.add_argument("--rec_algorithm", type=str, default='CRNN') parser.add_argument("--rec_model_dir", type=str) diff --git a/tools/program.py b/tools/program.py index d6d47d047b5bc65024b5db866b67ee260f8f8bb3..f484cf4a1f512107bd755a6b446935751563bfcb 100755 --- a/tools/program.py +++ b/tools/program.py @@ -402,7 +402,7 @@ def preprocess(is_train=False): alg = config['Architecture']['algorithm'] assert alg in [ 'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN', - 'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR' + 'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE' ] device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'