From 338ba3ee4a0208cee354cd3b7d2c93b320e0ea54 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Sun, 10 May 2020 16:26:57 +0800 Subject: [PATCH] upload PaddleOCR code --- .pre-commit-config.yaml | 35 + .style.yapf | 3 + configs/det/det_db_icdar15_reader.yml | 22 + configs/det/det_db_mv3.yml | 51 + configs/det/det_db_r50_vd.yml | 51 + configs/det/det_east_icdar15_reader.yml | 23 + configs/det/det_east_mv3.yml | 43 + configs/det/det_east_r50_vd.yml | 42 + configs/rec/rec_benchmark_reader.yml | 12 + configs/rec/rec_chinese_lite_train.yml | 42 + configs/rec/rec_chinese_reader.yml | 14 + configs/rec/rec_mv3_none_bilstm_ctc.yml | 40 + configs/rec/rec_mv3_none_none_ctc.yml | 38 + configs/rec/rec_mv3_tps_bilstm_attn.yml | 49 + configs/rec/rec_mv3_tps_bilstm_ctc.yml | 46 + configs/rec/rec_r34_vd_none_bilstm_ctc.yml | 39 + configs/rec/rec_r34_vd_none_none_ctc.yml | 37 + configs/rec/rec_r34_vd_tps_bilstm_attn.yml | 48 + configs/rec/rec_r34_vd_tps_bilstm_ctc.yml | 45 + ppocr/__init__.py | 13 + ppocr/data/__init__.py | 13 + ppocr/data/det/data_augment.py | 47 + ppocr/data/det/dataset_traversal.py | 110 + ppocr/data/det/db_process.py | 192 + ppocr/data/det/east_process.py | 509 ++ ppocr/data/det/make_border_map.py | 147 + ppocr/data/det/make_shrink_map.py | 88 + ppocr/data/det/random_crop_data.py | 155 + ppocr/data/reader_main.py | 81 + ppocr/data/rec/__init__.py | 13 + ppocr/data/rec/dataset_traversal.py | 201 + ppocr/data/rec/img_tools.py | 92 + ppocr/modeling/__init__.py | 13 + ppocr/modeling/architectures/det_model.py | 119 + ppocr/modeling/architectures/rec_model.py | 114 + ppocr/modeling/backbones/det_mobilenet_v3.py | 251 + ppocr/modeling/backbones/det_resnet_vd.py | 252 + ppocr/modeling/backbones/rec_mobilenet_v3.py | 255 + ppocr/modeling/backbones/rec_resnet_vd.py | 271 + ppocr/modeling/common_functions.py | 95 + ppocr/modeling/heads/det_db_head.py | 206 + ppocr/modeling/heads/det_east_head.py | 116 + ppocr/modeling/heads/rec_attention_head.py | 232 + ppocr/modeling/heads/rec_ctc_head.py | 51 + ppocr/modeling/heads/rec_seq_encoder.py | 100 + ppocr/modeling/losses/det_basic_loss.py | 116 + ppocr/modeling/losses/det_db_loss.py | 68 + ppocr/modeling/losses/det_east_loss.py | 61 + ppocr/modeling/losses/rec_attention_loss.py | 38 + ppocr/modeling/losses/rec_ctc_loss.py | 36 + ppocr/modeling/stns/tps.py | 261 + ppocr/optimizer.py | 36 + ppocr/postprocess/db_postprocess.py | 152 + ppocr/postprocess/east_postprocess.py | 121 + ppocr/postprocess/locality_aware_nms.py | 199 + ppocr/utils/__init__.py | 13 + ppocr/utils/character.py | 171 + ppocr/utils/check.py | 33 + ppocr/utils/ppocr_keys_v1.txt | 6623 ++++++++++++++++++ ppocr/utils/save_load.py | 131 + ppocr/utils/stats.py | 65 + ppocr/utils/utility.py | 71 + tools/eval.py | 102 + tools/eval_utils/__init__.py | 13 + tools/eval_utils/eval_det_iou.py | 231 + tools/eval_utils/eval_det_utils.py | 131 + tools/eval_utils/eval_rec_utils.py | 111 + tools/export_model.py | 88 + tools/infer/det_program.txt | 1 + tools/infer/predict_det.py | 169 + tools/infer/predict_eval.py | 76 + tools/infer/predict_eval_new.py | 72 + tools/infer/predict_rec.py | 115 + tools/infer/predict_system.py | 97 + tools/infer/utility.py | 147 + tools/infer_rec.py | 125 + tools/program.py | 365 + tools/tmp/eval_det.py | 134 + tools/tmp/infer_det.py | 160 + tools/tmp/infer_rec.py | 116 + tools/tmp/test_rec_benchmark.py | 128 + tools/tmp/train_det.py | 216 + tools/tmp/train_rec.py | 222 + tools/train.py | 113 + 84 files changed, 15543 insertions(+) create mode 100644 .pre-commit-config.yaml create mode 100644 .style.yapf create mode 100755 configs/det/det_db_icdar15_reader.yml create mode 100755 configs/det/det_db_mv3.yml create mode 100755 configs/det/det_db_r50_vd.yml create mode 100755 configs/det/det_east_icdar15_reader.yml create mode 100755 configs/det/det_east_mv3.yml create mode 100755 configs/det/det_east_r50_vd.yml create mode 100755 configs/rec/rec_benchmark_reader.yml create mode 100755 configs/rec/rec_chinese_lite_train.yml create mode 100755 configs/rec/rec_chinese_reader.yml create mode 100755 configs/rec/rec_mv3_none_bilstm_ctc.yml create mode 100755 configs/rec/rec_mv3_none_none_ctc.yml create mode 100755 configs/rec/rec_mv3_tps_bilstm_attn.yml create mode 100755 configs/rec/rec_mv3_tps_bilstm_ctc.yml create mode 100755 configs/rec/rec_r34_vd_none_bilstm_ctc.yml create mode 100755 configs/rec/rec_r34_vd_none_none_ctc.yml create mode 100755 configs/rec/rec_r34_vd_tps_bilstm_attn.yml create mode 100755 configs/rec/rec_r34_vd_tps_bilstm_ctc.yml create mode 100755 ppocr/__init__.py create mode 100755 ppocr/data/__init__.py create mode 100644 ppocr/data/det/data_augment.py create mode 100755 ppocr/data/det/dataset_traversal.py create mode 100644 ppocr/data/det/db_process.py create mode 100755 ppocr/data/det/east_process.py create mode 100644 ppocr/data/det/make_border_map.py create mode 100644 ppocr/data/det/make_shrink_map.py create mode 100644 ppocr/data/det/random_crop_data.py create mode 100755 ppocr/data/reader_main.py create mode 100755 ppocr/data/rec/__init__.py create mode 100755 ppocr/data/rec/dataset_traversal.py create mode 100755 ppocr/data/rec/img_tools.py create mode 100755 ppocr/modeling/__init__.py create mode 100755 ppocr/modeling/architectures/det_model.py create mode 100755 ppocr/modeling/architectures/rec_model.py create mode 100755 ppocr/modeling/backbones/det_mobilenet_v3.py create mode 100755 ppocr/modeling/backbones/det_resnet_vd.py create mode 100755 ppocr/modeling/backbones/rec_mobilenet_v3.py create mode 100755 ppocr/modeling/backbones/rec_resnet_vd.py create mode 100755 ppocr/modeling/common_functions.py create mode 100644 ppocr/modeling/heads/det_db_head.py create mode 100755 ppocr/modeling/heads/det_east_head.py create mode 100755 ppocr/modeling/heads/rec_attention_head.py create mode 100755 ppocr/modeling/heads/rec_ctc_head.py create mode 100755 ppocr/modeling/heads/rec_seq_encoder.py create mode 100644 ppocr/modeling/losses/det_basic_loss.py create mode 100755 ppocr/modeling/losses/det_db_loss.py create mode 100755 ppocr/modeling/losses/det_east_loss.py create mode 100755 ppocr/modeling/losses/rec_attention_loss.py create mode 100755 ppocr/modeling/losses/rec_ctc_loss.py create mode 100755 ppocr/modeling/stns/tps.py create mode 100755 ppocr/optimizer.py create mode 100644 ppocr/postprocess/db_postprocess.py create mode 100755 ppocr/postprocess/east_postprocess.py create mode 100644 ppocr/postprocess/locality_aware_nms.py create mode 100755 ppocr/utils/__init__.py create mode 100755 ppocr/utils/character.py create mode 100755 ppocr/utils/check.py create mode 100644 ppocr/utils/ppocr_keys_v1.txt create mode 100755 ppocr/utils/save_load.py create mode 100755 ppocr/utils/stats.py create mode 100755 ppocr/utils/utility.py create mode 100755 tools/eval.py create mode 100644 tools/eval_utils/__init__.py create mode 100644 tools/eval_utils/eval_det_iou.py create mode 100644 tools/eval_utils/eval_det_utils.py create mode 100644 tools/eval_utils/eval_rec_utils.py create mode 100644 tools/export_model.py create mode 100644 tools/infer/det_program.txt create mode 100755 tools/infer/predict_det.py create mode 100755 tools/infer/predict_eval.py create mode 100755 tools/infer/predict_eval_new.py create mode 100755 tools/infer/predict_rec.py create mode 100755 tools/infer/predict_system.py create mode 100755 tools/infer/utility.py create mode 100755 tools/infer_rec.py create mode 100755 tools/program.py create mode 100755 tools/tmp/eval_det.py create mode 100755 tools/tmp/infer_det.py create mode 100755 tools/tmp/infer_rec.py create mode 100755 tools/tmp/test_rec_benchmark.py create mode 100755 tools/tmp/train_det.py create mode 100755 tools/tmp/train_rec.py create mode 100755 tools/train.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..1584bc76 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,35 @@ +- repo: https://github.com/PaddlePaddle/mirrors-yapf.git + sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37 + hooks: + - id: yapf + files: \.py$ +- repo: https://github.com/pre-commit/pre-commit-hooks + sha: a11d9314b22d8f8c7556443875b731ef05965464 + hooks: + - id: check-merge-conflict + - id: check-symlinks + - id: detect-private-key + files: (?!.*paddle)^.*$ + - id: end-of-file-fixer + files: \.md$ + - id: trailing-whitespace + files: \.md$ +- repo: https://github.com/Lucas-C/pre-commit-hooks + sha: v1.0.1 + hooks: + - id: forbid-crlf + files: \.md$ + - id: remove-crlf + files: \.md$ + - id: forbid-tabs + files: \.md$ + - id: remove-tabs + files: \.md$ +- repo: local + hooks: + - id: clang-format + name: clang-format + description: Format files with ClangFormat + entry: bash .clang_format.hook -i + language: system + files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 00000000..4741fb4f --- /dev/null +++ b/.style.yapf @@ -0,0 +1,3 @@ +[style] +based_on_style = pep8 +column_limit = 80 diff --git a/configs/det/det_db_icdar15_reader.yml b/configs/det/det_db_icdar15_reader.yml new file mode 100755 index 00000000..388cd318 --- /dev/null +++ b/configs/det/det_db_icdar15_reader.yml @@ -0,0 +1,22 @@ +TrainReader: + reader_function: ppocr.data.det.dataset_traversal,TrainReader + process_function: ppocr.data.det.db_process,DBProcessTrain + num_workers: 8 + img_set_dir: ./train_data/icdar2015/text_localization/ + label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + +EvalReader: + reader_function: ppocr.data.det.dataset_traversal,EvalTestReader + process_function: ppocr.data.det.db_process,DBProcessTest + img_set_dir: ./train_data/icdar2015/text_localization/ + label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + test_image_shape: [736, 1280] + +TestReader: + reader_function: ppocr.data.det.dataset_traversal,EvalTestReader + process_function: ppocr.data.det.db_process,DBProcessTest + single_img_path: + img_set_dir: ./train_data/icdar2015/text_localization/ + label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + test_image_shape: [736, 1280] + do_eval: True diff --git a/configs/det/det_db_mv3.yml b/configs/det/det_db_mv3.yml new file mode 100755 index 00000000..a41c901e --- /dev/null +++ b/configs/det/det_db_mv3.yml @@ -0,0 +1,51 @@ +Global: + algorithm: DB + use_gpu: true + epoch_num: 1200 + log_smooth_window: 20 + print_batch_step: 2 + save_model_dir: output + save_epoch_step: 200 + eval_batch_step: 5000 + train_batch_size_per_card: 16 + test_batch_size_per_card: 16 + image_shape: [3, 640, 640] + reader_yml: ./configs/det/det_db_icdar15_reader.yml + pretrain_weights: ./pretrain_models/MobileNetV3_pretrained/MobileNetV3_large_x0_5_pretrained/ + save_res_path: ./output/predicts_db.txt + +Architecture: + function: ppocr.modeling.architectures.det_model,DetModel + +Backbone: + function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3 + scale: 0.5 + model_name: large + +Head: + function: ppocr.modeling.heads.det_db_head,DBHead + model_name: large + k: 50 + inner_channels: 96 + out_channels: 2 + +Loss: + function: ppocr.modeling.losses.det_db_loss,DBLoss + balance_loss: true + main_loss_type: DiceLoss + alpha: 5 + beta: 10 + ohem_ratio: 3 + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 + +PostProcess: + function: ppocr.postprocess.db_postprocess,DBPostProcess + thresh: 0.3 + box_thresh: 0.7 + max_candidates: 1000 + unclip_ratio: 1.5 \ No newline at end of file diff --git a/configs/det/det_db_r50_vd.yml b/configs/det/det_db_r50_vd.yml new file mode 100755 index 00000000..1faecbb5 --- /dev/null +++ b/configs/det/det_db_r50_vd.yml @@ -0,0 +1,51 @@ +Global: + algorithm: DB + use_gpu: true + epoch_num: 1200 + log_smooth_window: 20 + print_batch_step: 2 + save_model_dir: output + save_epoch_step: 200 + eval_batch_step: 5000 + train_batch_size_per_card: 8 + test_batch_size_per_card: 16 + image_shape: [3, 640, 640] + reader_yml: ./configs/det/det_db_icdar15_reader.yml + pretrain_weights: ./pretrain_models/ResNet50_vd_pretrained/ + save_res_path: ./output/predicts_db.txt + +Architecture: + function: ppocr.modeling.architectures.det_model,DetModel + +Backbone: + function: ppocr.modeling.backbones.det_resnet_vd,ResNet + layers: 50 + +Head: + function: ppocr.modeling.heads.det_db_head,DBHead + model_name: large + k: 50 + inner_channels: 256 + out_channels: 2 + +Loss: + function: ppocr.modeling.losses.det_db_loss,DBLoss + balance_loss: true + main_loss_type: DiceLoss + alpha: 5 + beta: 10 + ohem_ratio: 3 + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 + +PostProcess: + function: ppocr.postprocess.db_postprocess,DBPostProcess + thresh: 0.3 + box_thresh: 0.7 + max_candidates: 1000 + unclip_ratio: 1.5 + \ No newline at end of file diff --git a/configs/det/det_east_icdar15_reader.yml b/configs/det/det_east_icdar15_reader.yml new file mode 100755 index 00000000..478bfcd8 --- /dev/null +++ b/configs/det/det_east_icdar15_reader.yml @@ -0,0 +1,23 @@ +TrainReader: + reader_function: ppocr.data.det.dataset_traversal,TrainReader + process_function: ppocr.data.det.east_process,EASTProcessTrain + num_workers: 8 + img_set_dir: ./train_data/icdar2015/text_localization/ + label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + background_ratio: 0.125 + min_crop_side_ratio: 0.1 + min_text_size: 10 + +EvalReader: + reader_function: ppocr.data.det.dataset_traversal,EvalTestReader + process_function: ppocr.data.det.east_process,EASTProcessTest + img_set_dir: ./train_data/icdar2015/text_localization/ + label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + +TestReader: + reader_function: ppocr.data.det.dataset_traversal,EvalTestReader + process_function: ppocr.data.det.east_process,EASTProcessTest + single_img_path: + img_set_dir: ./train_data/icdar2015/text_localization/ + label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + do_eval: True diff --git a/configs/det/det_east_mv3.yml b/configs/det/det_east_mv3.yml new file mode 100755 index 00000000..6a77768e --- /dev/null +++ b/configs/det/det_east_mv3.yml @@ -0,0 +1,43 @@ +Global: + algorithm: EAST + use_gpu: true + epoch_num: 100000 + log_smooth_window: 20 + print_batch_step: 5 + save_model_dir: output + save_epoch_step: 200 + eval_batch_step: 5000 + train_batch_size_per_card: 16 + test_batch_size_per_card: 16 + image_shape: [3, 512, 512] + reader_yml: ./configs/det/det_east_icdar15_reader.yml + pretrain_weights: ./pretrain_models/MobileNetV3_pretrained/MobileNetV3_large_x0_5_pretrained/ + save_res_path: ./output/predicts_east.txt + +Architecture: + function: ppocr.modeling.architectures.det_model,DetModel + +Backbone: + function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3 + scale: 0.5 + model_name: large + +Head: + function: ppocr.modeling.heads.det_east_head,EASTHead + model_name: small + +Loss: + function: ppocr.modeling.losses.det_east_loss,EASTLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 + +PostProcess: + function: ppocr.postprocess.east_postprocess,EASTPostPocess + score_thresh: 0.8 + cover_thresh: 0.1 + nms_thresh: 0.2 + \ No newline at end of file diff --git a/configs/det/det_east_r50_vd.yml b/configs/det/det_east_r50_vd.yml new file mode 100755 index 00000000..6e9d23c6 --- /dev/null +++ b/configs/det/det_east_r50_vd.yml @@ -0,0 +1,42 @@ +Global: + algorithm: EAST + use_gpu: true + epoch_num: 100000 + log_smooth_window: 20 + print_batch_step: 5 + save_model_dir: output + save_epoch_step: 200 + eval_batch_step: 5000 + train_batch_size_per_card: 8 + test_batch_size_per_card: 16 + image_shape: [3, 512, 512] + reader_yml: ./configs/det/det_east_icdar15_reader.yml + pretrain_weights: ./pretrain_models/ResNet50_vd_pretrained/ + save_res_path: ./output/predicts_east.txt + +Architecture: + function: ppocr.modeling.architectures.det_model,DetModel + +Backbone: + function: ppocr.modeling.backbones.det_resnet_vd,ResNet + layers: 50 + +Head: + function: ppocr.modeling.heads.det_east_head,EASTHead + model_name: large + +Loss: + function: ppocr.modeling.losses.det_east_loss,EASTLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 + +PostProcess: + function: ppocr.postprocess.east_postprocess,EASTPostPocess + score_thresh: 0.8 + cover_thresh: 0.1 + nms_thresh: 0.2 + \ No newline at end of file diff --git a/configs/rec/rec_benchmark_reader.yml b/configs/rec/rec_benchmark_reader.yml new file mode 100755 index 00000000..d119c7aa --- /dev/null +++ b/configs/rec/rec_benchmark_reader.yml @@ -0,0 +1,12 @@ +TrainReader: + reader_function: ppocr.data.rec.dataset_traversal,LMDBReader + num_workers: 8 + lmdb_sets_dir: ./train_data/data_lmdb_release/training/ + +EvalReader: + reader_function: ppocr.data.rec.dataset_traversal,LMDBReader + lmdb_sets_dir: ./train_data/data_lmdb_release/validation/ + +TestReader: + reader_function: ppocr.data.rec.dataset_traversal,LMDBReader + lmdb_sets_dir: ./train_data/data_lmdb_release/evaluation/ \ No newline at end of file diff --git a/configs/rec/rec_chinese_lite_train.yml b/configs/rec/rec_chinese_lite_train.yml new file mode 100755 index 00000000..29506ff4 --- /dev/null +++ b/configs/rec/rec_chinese_lite_train.yml @@ -0,0 +1,42 @@ +Global: + algorithm: CRNN + dataset: common + use_gpu: true + epoch_num: 300 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: output + save_epoch_step: 3 + eval_batch_step: 2000 + train_batch_size_per_card: 256 + test_batch_size_per_card: 256 + image_shape: [3, 32, 100] + max_text_length: 25 + character_type: ch + character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt + loss_type: ctc + reader_yml: ./configs/rec/rec_chinese_reader.yml + pretrain_weights: + +Architecture: + function: ppocr.modeling.architectures.rec_model,RecModel + +Backbone: + function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3 + scale: 0.5 + model_name: small + +Head: + function: ppocr.modeling.heads.rec_ctc_head,CTCPredict + encoder_type: rnn + SeqRNN: + hidden_size: 48 + +Loss: + function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 diff --git a/configs/rec/rec_chinese_reader.yml b/configs/rec/rec_chinese_reader.yml new file mode 100755 index 00000000..95e1b500 --- /dev/null +++ b/configs/rec/rec_chinese_reader.yml @@ -0,0 +1,14 @@ +TrainReader: + reader_function: ppocr.data.rec.dataset_traversal,SimpleReader + num_workers: 8 + img_set_dir: . + label_file_path: ./train_data/hard_label.txt + +EvalReader: + reader_function: ppocr.data.rec.dataset_traversal,SimpleReader + img_set_dir: . + label_file_path: ./train_data/label_val_all.txt + +TestReader: + reader_function: ppocr.data.rec.dataset_traversal,SimpleReader + infer_img: ./infer_img diff --git a/configs/rec/rec_mv3_none_bilstm_ctc.yml b/configs/rec/rec_mv3_none_bilstm_ctc.yml new file mode 100755 index 00000000..09479175 --- /dev/null +++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml @@ -0,0 +1,40 @@ +Global: + algorithm: CRNN + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: output + save_epoch_step: 3 + eval_batch_step: 2000 + train_batch_size_per_card: 256 + test_batch_size_per_card: 256 + image_shape: [3, 32, 100] + max_text_length: 25 + character_type: en + loss_type: ctc + reader_yml: ./configs/rec/rec_benchmark_reader.yml + pretrain_weights: + +Architecture: + function: ppocr.modeling.architectures.rec_model,RecModel + +Backbone: + function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3 + scale: 0.5 + model_name: large + +Head: + function: ppocr.modeling.heads.rec_ctc_head,CTCPredict + encoder_type: rnn + SeqRNN: + hidden_size: 96 + +Loss: + function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 diff --git a/configs/rec/rec_mv3_none_none_ctc.yml b/configs/rec/rec_mv3_none_none_ctc.yml new file mode 100755 index 00000000..ae926468 --- /dev/null +++ b/configs/rec/rec_mv3_none_none_ctc.yml @@ -0,0 +1,38 @@ +Global: + algorithm: Rosetta + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: output + save_epoch_step: 3 + eval_batch_step: 2000 + train_batch_size_per_card: 256 + test_batch_size_per_card: 256 + image_shape: [3, 32, 100] + max_text_length: 25 + character_type: en + loss_type: ctc + reader_yml: ./configs/rec/rec_benchmark_reader.yml + pretrain_weights: + +Architecture: + function: ppocr.modeling.architectures.rec_model,RecModel + +Backbone: + function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3 + scale: 0.5 + model_name: large + +Head: + function: ppocr.modeling.heads.rec_ctc_head,CTCPredict + encoder_type: reshape + +Loss: + function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 diff --git a/configs/rec/rec_mv3_tps_bilstm_attn.yml b/configs/rec/rec_mv3_tps_bilstm_attn.yml new file mode 100755 index 00000000..59cb3c2c --- /dev/null +++ b/configs/rec/rec_mv3_tps_bilstm_attn.yml @@ -0,0 +1,49 @@ +Global: + algorithm: RARE + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: output + save_epoch_step: 3 + eval_batch_step: 2000 + train_batch_size_per_card: 256 + test_batch_size_per_card: 256 + image_shape: [3, 32, 100] + max_text_length: 25 + character_type: en + loss_type: attention + reader_yml: ./configs/rec/rec_benchmark_reader.yml + pretrain_weights: + +Architecture: + function: ppocr.modeling.architectures.rec_model,RecModel + +TPS: + function: ppocr.modeling.stns.tps,TPS + num_fiducial: 20 + loc_lr: 0.1 + model_name: small + +Backbone: + function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3 + scale: 0.5 + model_name: large + +Head: + function: ppocr.modeling.heads.rec_attention_head,AttentionPredict + encoder_type: rnn + SeqRNN: + hidden_size: 96 + Attention: + decoder_size: 96 + word_vector_dim: 96 + +Loss: + function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 diff --git a/configs/rec/rec_mv3_tps_bilstm_ctc.yml b/configs/rec/rec_mv3_tps_bilstm_ctc.yml new file mode 100755 index 00000000..3acc2355 --- /dev/null +++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml @@ -0,0 +1,46 @@ +Global: + algorithm: STARNet + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: output + save_epoch_step: 3 + eval_batch_step: 2000 + train_batch_size_per_card: 256 + test_batch_size_per_card: 256 + image_shape: [3, 32, 100] + max_text_length: 25 + character_type: en + loss_type: ctc + reader_yml: ./configs/rec/rec_benchmark_reader.yml + pretrain_weights: + +Architecture: + function: ppocr.modeling.architectures.rec_model,RecModel + +TPS: + function: ppocr.modeling.stns.tps,TPS + num_fiducial: 20 + loc_lr: 0.1 + model_name: small + +Backbone: + function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3 + scale: 0.5 + model_name: large + +Head: + function: ppocr.modeling.heads.rec_ctc_head,CTCPredict + encoder_type: rnn + SeqRNN: + hidden_size: 96 + +Loss: + function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 diff --git a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml new file mode 100755 index 00000000..20fb0c85 --- /dev/null +++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml @@ -0,0 +1,39 @@ +Global: + algorithm: CRNN + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: output + save_epoch_step: 3 + eval_batch_step: 2000 + train_batch_size_per_card: 256 + test_batch_size_per_card: 256 + image_shape: [3, 32, 100] + max_text_length: 25 + character_type: en + loss_type: ctc + reader_yml: ./configs/rec/rec_benchmark_reader.yml + pretrain_weights: + +Architecture: + function: ppocr.modeling.architectures.rec_model,RecModel + +Backbone: + function: ppocr.modeling.backbones.rec_resnet_vd,ResNet + layers: 34 + +Head: + function: ppocr.modeling.heads.rec_ctc_head,CTCPredict + encoder_type: rnn + SeqRNN: + hidden_size: 256 + +Loss: + function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 diff --git a/configs/rec/rec_r34_vd_none_none_ctc.yml b/configs/rec/rec_r34_vd_none_none_ctc.yml new file mode 100755 index 00000000..0de87aea --- /dev/null +++ b/configs/rec/rec_r34_vd_none_none_ctc.yml @@ -0,0 +1,37 @@ +Global: + algorithm: Rosetta + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: output + save_epoch_step: 3 + eval_batch_step: 2000 + train_batch_size_per_card: 256 + test_batch_size_per_card: 256 + image_shape: [3, 32, 100] + max_text_length: 25 + character_type: en + loss_type: ctc + reader_yml: ./configs/rec/rec_benchmark_reader.yml + pretrain_weights: + +Architecture: + function: ppocr.modeling.architectures.rec_model,RecModel + +Backbone: + function: ppocr.modeling.backbones.rec_resnet_vd,ResNet + layers: 34 + +Head: + function: ppocr.modeling.heads.rec_ctc_head,CTCPredict + encoder_type: reshape + +Loss: + function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 diff --git a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml new file mode 100755 index 00000000..71d770b9 --- /dev/null +++ b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml @@ -0,0 +1,48 @@ +Global: + algorithm: RARE + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: output + save_epoch_step: 3 + eval_batch_step: 2000 + train_batch_size_per_card: 256 + test_batch_size_per_card: 256 + image_shape: [3, 32, 100] + max_text_length: 25 + character_type: en + loss_type: attention + reader_yml: ./configs/rec/rec_benchmark_reader.yml + pretrain_weights: + +Architecture: + function: ppocr.modeling.architectures.rec_model,RecModel + +TPS: + function: ppocr.modeling.stns.tps,TPS + num_fiducial: 20 + loc_lr: 0.1 + model_name: large + +Backbone: + function: ppocr.modeling.backbones.rec_resnet_vd,ResNet + layers: 34 + +Head: + function: ppocr.modeling.heads.rec_attention_head,AttentionPredict + encoder_type: rnn + SeqRNN: + hidden_size: 256 + Attention: + decoder_size: 128 + word_vector_dim: 128 + +Loss: + function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 diff --git a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml new file mode 100755 index 00000000..beb64372 --- /dev/null +++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml @@ -0,0 +1,45 @@ +Global: + algorithm: STARNet + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: output + save_epoch_step: 3 + eval_batch_step: 2000 + train_batch_size_per_card: 256 + test_batch_size_per_card: 256 + image_shape: [3, 32, 100] + max_text_length: 25 + character_type: en + loss_type: ctc + reader_yml: ./configs/rec/rec_benchmark_reader.yml + pretrain_weights: + +Architecture: + function: ppocr.modeling.architectures.rec_model,RecModel + +TPS: + function: ppocr.modeling.stns.tps,TPS + num_fiducial: 20 + loc_lr: 0.1 + model_name: large + +Backbone: + function: ppocr.modeling.backbones.rec_resnet_vd,ResNet + layers: 34 + +Head: + function: ppocr.modeling.heads.rec_ctc_head,CTCPredict + encoder_type: rnn + SeqRNN: + hidden_size: 256 + +Loss: + function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.001 + beta1: 0.9 + beta2: 0.999 diff --git a/ppocr/__init__.py b/ppocr/__init__.py new file mode 100755 index 00000000..d0c32e26 --- /dev/null +++ b/ppocr/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ppocr/data/__init__.py b/ppocr/data/__init__.py new file mode 100755 index 00000000..abf198b9 --- /dev/null +++ b/ppocr/data/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ppocr/data/det/data_augment.py b/ppocr/data/det/data_augment.py new file mode 100644 index 00000000..f46c14ae --- /dev/null +++ b/ppocr/data/det/data_augment.py @@ -0,0 +1,47 @@ +# -*- coding:utf-8 -*- + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import numpy as np +import random +import cv2 +import math + +import imgaug +import imgaug.augmenters as iaa + + +def AugmentData(data): + img = data['image'] + shape = img.shape + + aug = iaa.Sequential( + [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize( + (0.5, 3))]).to_deterministic() + + def may_augment_annotation(aug, data, shape): + if aug is None: + return data + + line_polys = [] + for poly in data['polys']: + new_poly = may_augment_poly(aug, shape, poly) + line_polys.append(new_poly) + data['polys'] = np.array(line_polys) + return data + + def may_augment_poly(aug, img_shape, poly): + keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly] + keypoints = aug.augment_keypoints( + [imgaug.KeypointsOnImage( + keypoints, shape=img_shape)])[0].keypoints + poly = [(p.x, p.y) for p in keypoints] + return poly + + img_aug = aug.augment_image(img) + data['image'] = img_aug + data = may_augment_annotation(aug, data, shape) + return data diff --git a/ppocr/data/det/dataset_traversal.py b/ppocr/data/det/dataset_traversal.py new file mode 100755 index 00000000..5ba01ee7 --- /dev/null +++ b/ppocr/data/det/dataset_traversal.py @@ -0,0 +1,110 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import math +import random +import functools +import numpy as np +import cv2 +import string +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.utils.utility import create_module +import time + + +class TrainReader(object): + def __init__(self, params): + self.num_workers = params['num_workers'] + self.label_file_path = params['label_file_path'] + self.batch_size = params['train_batch_size_per_card'] + assert 'process_function' in params,\ + "absence process_function in Reader" + self.process = create_module(params['process_function'])(params) + + def __call__(self, process_id): + def sample_iter_reader(): + with open(self.label_file_path, "rb") as fin: + label_infor_list = fin.readlines() + img_num = len(label_infor_list) + img_id_list = list(range(img_num)) + random.shuffle(img_id_list) + for img_id in range(process_id, img_num, self.num_workers): + label_infor = label_infor_list[img_id_list[img_id]] + outs = self.process(label_infor) + if outs is None: + continue + yield outs + + def batch_iter_reader(): + batch_outs = [] + for outs in sample_iter_reader(): + batch_outs.append(outs) + if len(batch_outs) == self.batch_size: + yield batch_outs + batch_outs = [] + if len(batch_outs) != 0: + yield batch_outs + + return batch_iter_reader + + +class EvalTestReader(object): + def __init__(self, params): + self.params = params + assert 'process_function' in params,\ + "absence process_function in EvalTestReader" + + def __call__(self, mode): + process_function = create_module(self.params['process_function'])( + self.params) + batch_size = self.params['test_batch_size_per_card'] + + flag_test_single_img = False + if mode == "test": + single_img_path = self.params['single_img_path'] + if single_img_path is not None: + flag_test_single_img = True + + img_list = [] + if flag_test_single_img: + img_list.append([single_img_path, single_img_path]) + else: + img_set_dir = self.params['img_set_dir'] + img_name_list_path = self.params['label_file_path'] + with open(img_name_list_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + img_name = line.decode().strip("\n").split("\t")[0] + img_path = img_set_dir + "/" + img_name + img_list.append([img_path, img_name]) + + def batch_iter_reader(): + batch_outs = [] + for img_path, img_name in img_list: + img = cv2.imread(img_path) + if img is None: + logger.info("load image error:" + img_path) + continue + outs = process_function(img) + outs.append(img_name) + batch_outs.append(outs) + if len(batch_outs) == batch_size: + yield batch_outs + batch_outs = [] + if len(batch_outs) != 0: + yield batch_outs + + return batch_iter_reader diff --git a/ppocr/data/det/db_process.py b/ppocr/data/det/db_process.py new file mode 100644 index 00000000..2a6393a1 --- /dev/null +++ b/ppocr/data/det/db_process.py @@ -0,0 +1,192 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import math +import cv2 +import numpy as np +import json +import sys + +from .data_augment import AugmentData +from .random_crop_data import RandomCropData +from .make_shrink_map import MakeShrinkMap +from .make_border_map import MakeBorderMap + + +class DBProcessTrain(object): + def __init__(self, params): + self.img_set_dir = params['img_set_dir'] + self.image_shape = params['image_shape'] + + def order_points_clockwise(self, pts): + rect = np.zeros((4, 2), dtype="float32") + s = pts.sum(axis=1) + rect[0] = pts[np.argmin(s)] + rect[2] = pts[np.argmax(s)] + diff = np.diff(pts, axis=1) + rect[1] = pts[np.argmin(diff)] + rect[3] = pts[np.argmax(diff)] + return rect + + def make_data_dict(self, imgvalue, entry): + boxes = [] + texts = [] + ignores = [] + for rect in entry: + points = rect['points'] + transcription = rect['transcription'] + try: + box = self.order_points_clockwise( + np.array(points).reshape(-1, 2)) + if cv2.contourArea(box) > 0: + boxes.append(box) + texts.append(transcription) + ignores.append(transcription in ['*', '###']) + except: + print('load label failed!') + data = { + 'image': imgvalue, + 'shape': [imgvalue.shape[0], imgvalue.shape[1]], + 'polys': np.array(boxes), + 'texts': texts, + 'ignore_tags': ignores, + } + return data + + def NormalizeImage(self, data): + im = data['image'] + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + im = im / 255 + im -= img_mean + im /= img_std + channel_swap = (2, 0, 1) + im = im.transpose(channel_swap) + data['image'] = im + return data + + def FilterKeys(self, data): + filter_keys = ['polys', 'texts', 'ignore_tags', 'shape'] + for key in filter_keys: + if key in data: + del data[key] + return data + + def convert_label_infor(self, label_infor): + label_infor = label_infor.decode() + label_infor = label_infor.encode('utf-8').decode('utf-8-sig') + substr = label_infor.strip("\n").split("\t") + img_path = self.img_set_dir + substr[0] + label = json.loads(substr[1]) + return img_path, label + + def __call__(self, label_infor): + img_path, gt_label = self.convert_label_infor(label_infor) + imgvalue = cv2.imread(img_path) + if imgvalue is None: + return None + data = self.make_data_dict(imgvalue, gt_label) + data = AugmentData(data) + data = RandomCropData(data, self.image_shape[1:]) + data = MakeShrinkMap(data) + data = MakeBorderMap(data) + data = self.NormalizeImage(data) + data = self.FilterKeys(data) + return data['image'], data['shrink_map'], data['shrink_mask'], data[ + 'threshold_map'], data['threshold_mask'] + + +class DBProcessTest(object): + def __init__(self, params): + super(DBProcessTest, self).__init__() + self.resize_type = 0 + if 'det_image_shape' in params: + self.image_shape = params['det_image_shape'] + # print(self.image_shape) + self.resize_type = 1 + if 'max_side_len' in params: + self.max_side_len = params['max_side_len'] + else: + self.max_side_len = 2400 + + def resize_image_type0(self, im): + """ + resize image to a size multiple of 32 which is required by the network + :param im: the resized image + :param max_side_len: limit of max image size to avoid out of memory in gpu + :return: the resized image and the resize ratio + """ + max_side_len = self.max_side_len + h, w, _ = im.shape + + resize_w = w + resize_h = h + + # limit the max side + if max(resize_h, resize_w) > max_side_len: + if resize_h > resize_w: + ratio = float(max_side_len) / resize_h + else: + ratio = float(max_side_len) / resize_w + else: + ratio = 1. + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + if resize_h % 32 == 0: + resize_h = resize_h + else: + resize_h = (resize_h // 32 + 1) * 32 + if resize_w % 32 == 0: + resize_w = resize_w + else: + resize_w = (resize_w // 32 + 1) * 32 + try: + if int(resize_w) <= 0 or int(resize_h) <= 0: + return None, (None, None) + im = cv2.resize(im, (int(resize_w), int(resize_h))) + except: + print(im.shape, resize_w, resize_h) + sys.exit(0) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + return im, (ratio_h, ratio_w) + + def resize_image_type1(self, im): + resize_h, resize_w = self.image_shape + ori_h, ori_w = im.shape[:2] # (h, w, c) + im = cv2.resize(im, (int(resize_w), int(resize_h))) + ratio_h = float(resize_h) / ori_h + ratio_w = float(resize_w) / ori_w + return im, (ratio_h, ratio_w) + + def normalize(self, im): + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + im = im.astype(np.float32, copy=False) + im = im / 255 + im -= img_mean + im /= img_std + channel_swap = (2, 0, 1) + im = im.transpose(channel_swap) + return im + + def __call__(self, im): + if self.resize_type == 0: + im, (ratio_h, ratio_w) = self.resize_image_type0(im) + else: + im, (ratio_h, ratio_w) = self.resize_image_type1(im) + im = self.normalize(im) + im = im[np.newaxis, :] + return [im, (ratio_h, ratio_w)] diff --git a/ppocr/data/det/east_process.py b/ppocr/data/det/east_process.py new file mode 100755 index 00000000..019eef57 --- /dev/null +++ b/ppocr/data/det/east_process.py @@ -0,0 +1,509 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import math +import cv2 +import numpy as np +import json + + +class EASTProcessTrain(object): + def __init__(self, params): + self.img_set_dir = params['img_set_dir'] + self.random_scale = np.array([0.5, 1, 2.0, 3.0]) + self.background_ratio = params['background_ratio'] + self.min_crop_side_ratio = params['min_crop_side_ratio'] + image_shape = params['image_shape'] + self.input_size = image_shape[1] + self.min_text_size = params['min_text_size'] + + def preprocess(self, im): + input_size = self.input_size + im_shape = im.shape + im_size_min = np.min(im_shape[0:2]) + im_size_max = np.max(im_shape[0:2]) + im_scale = float(input_size) / float(im_size_max) + im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale) + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + im = im[:, :, ::-1].astype(np.float32) + im = im / 255 + im -= img_mean + im /= img_std + new_h, new_w, _ = im.shape + im_padded = np.zeros((input_size, input_size, 3), dtype=np.float32) + im_padded[:new_h, :new_w, :] = im + im_padded = im_padded.transpose((2, 0, 1)) + im_padded = im_padded[np.newaxis, :] + return im_padded, im_scale + + def convert_label_infor(self, label_infor): + label_infor = label_infor.decode() + label_infor = label_infor.encode('utf-8').decode('utf-8-sig') + substr = label_infor.strip("\n").split("\t") + img_path = self.img_set_dir + substr[0] + label = json.loads(substr[1]) + nBox = len(label) + wordBBs, txts, txt_tags = [], [], [] + for bno in range(0, nBox): + wordBB = label[bno]['points'] + txt = label[bno]['transcription'] + wordBBs.append(wordBB) + txts.append(txt) + if txt == '###': + txt_tags.append(True) + else: + txt_tags.append(False) + wordBBs = np.array(wordBBs, dtype=np.float32) + txt_tags = np.array(txt_tags, dtype=np.bool) + return img_path, wordBBs, txt_tags, txts + + def rotate_im_poly(self, im, text_polys): + """ + rotate image with 90 / 180 / 270 degre + """ + im_w, im_h = im.shape[1], im.shape[0] + dst_im = im.copy() + dst_polys = [] + rand_degree_ratio = np.random.rand() + rand_degree_cnt = 1 + if rand_degree_ratio > 0.333 and rand_degree_ratio < 0.666: + rand_degree_cnt = 2 + elif rand_degree_ratio > 0.666: + rand_degree_cnt = 3 + for i in range(rand_degree_cnt): + dst_im = np.rot90(dst_im) + rot_degree = -90 * rand_degree_cnt + rot_angle = rot_degree * math.pi / 180.0 + n_poly = text_polys.shape[0] + cx, cy = 0.5 * im_w, 0.5 * im_h + ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0] + for i in range(n_poly): + wordBB = text_polys[i] + poly = [] + for j in range(4): + sx, sy = wordBB[j][0], wordBB[j][1] + dx = math.cos(rot_angle) * (sx - cx)\ + - math.sin(rot_angle) * (sy - cy) + ncx + dy = math.sin(rot_angle) * (sx - cx)\ + + math.cos(rot_angle) * (sy - cy) + ncy + poly.append([dx, dy]) + dst_polys.append(poly) + dst_polys = np.array(dst_polys, dtype=np.float32) + return dst_im, dst_polys + + def polygon_area(self, poly): + """ + compute area of a polygon + :param poly: + :return: + """ + edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), + (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), + (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), + (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])] + return np.sum(edge) / 2. + + def check_and_validate_polys(self, polys, tags, img_height, img_width): + """ + check so that the text poly is in the same direction, + and also filter some invalid polygons + :param polys: + :param tags: + :return: + """ + h, w = img_height, img_width + if polys.shape[0] == 0: + return polys + polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1) + polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1) + + validated_polys = [] + validated_tags = [] + for poly, tag in zip(polys, tags): + p_area = self.polygon_area(poly) + #invalid poly + if abs(p_area) < 1: + continue + if p_area > 0: + #'poly in wrong direction' + if tag == False: + tag = True #reversed cases should be ignore + poly = poly[(0, 3, 2, 1), :] + validated_polys.append(poly) + validated_tags.append(tag) + return np.array(validated_polys), np.array(validated_tags) + + def draw_img_polys(self, img, polys): + if len(img.shape) == 4: + img = np.squeeze(img, axis=0) + if img.shape[0] == 3: + img = img.transpose((1, 2, 0)) + img[:, :, 2] += 123.68 + img[:, :, 1] += 116.78 + img[:, :, 0] += 103.94 + cv2.imwrite("tmp.jpg", img) + img = cv2.imread("tmp.jpg") + for box in polys: + box = box.astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2) + import random + ino = random.randint(0, 100) + cv2.imwrite("tmp_%d.jpg" % ino, img) + return + + def shrink_poly(self, poly, r): + """ + fit a poly inside the origin poly, maybe bugs here... + used for generate the score map + :param poly: the text poly + :param r: r in the paper + :return: the shrinked poly + """ + # shrink ratio + R = 0.3 + # find the longer pair + dist0 = np.linalg.norm(poly[0] - poly[1]) + dist1 = np.linalg.norm(poly[2] - poly[3]) + dist2 = np.linalg.norm(poly[0] - poly[3]) + dist3 = np.linalg.norm(poly[1] - poly[2]) + if dist0 + dist1 > dist2 + dist3: + # first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2) + ## p0, p1 + theta = np.arctan2((poly[1][1] - poly[0][1]), + (poly[1][0] - poly[0][0])) + poly[0][0] += R * r[0] * np.cos(theta) + poly[0][1] += R * r[0] * np.sin(theta) + poly[1][0] -= R * r[1] * np.cos(theta) + poly[1][1] -= R * r[1] * np.sin(theta) + ## p2, p3 + theta = np.arctan2((poly[2][1] - poly[3][1]), + (poly[2][0] - poly[3][0])) + poly[3][0] += R * r[3] * np.cos(theta) + poly[3][1] += R * r[3] * np.sin(theta) + poly[2][0] -= R * r[2] * np.cos(theta) + poly[2][1] -= R * r[2] * np.sin(theta) + ## p0, p3 + theta = np.arctan2((poly[3][0] - poly[0][0]), + (poly[3][1] - poly[0][1])) + poly[0][0] += R * r[0] * np.sin(theta) + poly[0][1] += R * r[0] * np.cos(theta) + poly[3][0] -= R * r[3] * np.sin(theta) + poly[3][1] -= R * r[3] * np.cos(theta) + ## p1, p2 + theta = np.arctan2((poly[2][0] - poly[1][0]), + (poly[2][1] - poly[1][1])) + poly[1][0] += R * r[1] * np.sin(theta) + poly[1][1] += R * r[1] * np.cos(theta) + poly[2][0] -= R * r[2] * np.sin(theta) + poly[2][1] -= R * r[2] * np.cos(theta) + else: + ## p0, p3 + # print poly + theta = np.arctan2((poly[3][0] - poly[0][0]), + (poly[3][1] - poly[0][1])) + poly[0][0] += R * r[0] * np.sin(theta) + poly[0][1] += R * r[0] * np.cos(theta) + poly[3][0] -= R * r[3] * np.sin(theta) + poly[3][1] -= R * r[3] * np.cos(theta) + ## p1, p2 + theta = np.arctan2((poly[2][0] - poly[1][0]), + (poly[2][1] - poly[1][1])) + poly[1][0] += R * r[1] * np.sin(theta) + poly[1][1] += R * r[1] * np.cos(theta) + poly[2][0] -= R * r[2] * np.sin(theta) + poly[2][1] -= R * r[2] * np.cos(theta) + ## p0, p1 + theta = np.arctan2((poly[1][1] - poly[0][1]), + (poly[1][0] - poly[0][0])) + poly[0][0] += R * r[0] * np.cos(theta) + poly[0][1] += R * r[0] * np.sin(theta) + poly[1][0] -= R * r[1] * np.cos(theta) + poly[1][1] -= R * r[1] * np.sin(theta) + ## p2, p3 + theta = np.arctan2((poly[2][1] - poly[3][1]), + (poly[2][0] - poly[3][0])) + poly[3][0] += R * r[3] * np.cos(theta) + poly[3][1] += R * r[3] * np.sin(theta) + poly[2][0] -= R * r[2] * np.cos(theta) + poly[2][1] -= R * r[2] * np.sin(theta) + return poly + + def generate_quad(self, im_size, polys, tags): + """ + Generate quadrangle. + """ + h, w = im_size + poly_mask = np.zeros((h, w), dtype=np.uint8) + score_map = np.zeros((h, w), dtype=np.uint8) + # (x1, y1, ..., x4, y4, short_edge_norm) + geo_map = np.zeros((h, w, 9), dtype=np.float32) + # mask used during traning, to ignore some hard areas + training_mask = np.ones((h, w), dtype=np.uint8) + for poly_idx, poly_tag in enumerate(zip(polys, tags)): + poly = poly_tag[0] + tag = poly_tag[1] + + r = [None, None, None, None] + for i in range(4): + dist1 = np.linalg.norm(poly[i] - poly[(i + 1) % 4]) + dist2 = np.linalg.norm(poly[i] - poly[(i - 1) % 4]) + r[i] = min(dist1, dist2) + # score map + shrinked_poly = self.shrink_poly( + poly.copy(), r).astype(np.int32)[np.newaxis, :, :] + cv2.fillPoly(score_map, shrinked_poly, 1) + cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1) + # if the poly is too small, then ignore it during training + poly_h = min( + np.linalg.norm(poly[0] - poly[3]), + np.linalg.norm(poly[1] - poly[2])) + poly_w = min( + np.linalg.norm(poly[0] - poly[1]), + np.linalg.norm(poly[2] - poly[3])) + if min(poly_h, poly_w) < self.min_text_size: + cv2.fillPoly(training_mask, + poly.astype(np.int32)[np.newaxis, :, :], 0) + + if tag: + cv2.fillPoly(training_mask, + poly.astype(np.int32)[np.newaxis, :, :], 0) + + xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1)) + # geo map. + y_in_poly = xy_in_poly[:, 0] + x_in_poly = xy_in_poly[:, 1] + poly[:, 0] = np.minimum(np.maximum(poly[:, 0], 0), w) + poly[:, 1] = np.minimum(np.maximum(poly[:, 1], 0), h) + for pno in range(4): + geo_channel_beg = pno * 2 + geo_map[y_in_poly, x_in_poly, geo_channel_beg] =\ + x_in_poly - poly[pno, 0] + geo_map[y_in_poly, x_in_poly, geo_channel_beg+1] =\ + y_in_poly - poly[pno, 1] + geo_map[y_in_poly, x_in_poly, 8] = \ + 1.0 / max(min(poly_h, poly_w), 1.0) + return score_map, geo_map, training_mask + + def crop_area(self, + im, + polys, + tags, + txts, + crop_background=False, + max_tries=50): + """ + make random crop from the input image + :param im: + :param polys: + :param tags: + :param crop_background: + :param max_tries: + :return: + """ + h, w, _ = im.shape + pad_h = h // 10 + pad_w = w // 10 + h_array = np.zeros((h + pad_h * 2), dtype=np.int32) + w_array = np.zeros((w + pad_w * 2), dtype=np.int32) + for poly in polys: + poly = np.round(poly, decimals=0).astype(np.int32) + minx = np.min(poly[:, 0]) + maxx = np.max(poly[:, 0]) + w_array[minx + pad_w:maxx + pad_w] = 1 + miny = np.min(poly[:, 1]) + maxy = np.max(poly[:, 1]) + h_array[miny + pad_h:maxy + pad_h] = 1 + # ensure the cropped area not across a text + h_axis = np.where(h_array == 0)[0] + w_axis = np.where(w_array == 0)[0] + if len(h_axis) == 0 or len(w_axis) == 0: + return im, polys, tags, txts + + for i in range(max_tries): + xx = np.random.choice(w_axis, size=2) + xmin = np.min(xx) - pad_w + xmax = np.max(xx) - pad_w + xmin = np.clip(xmin, 0, w - 1) + xmax = np.clip(xmax, 0, w - 1) + yy = np.random.choice(h_axis, size=2) + ymin = np.min(yy) - pad_h + ymax = np.max(yy) - pad_h + ymin = np.clip(ymin, 0, h - 1) + ymax = np.clip(ymax, 0, h - 1) + if xmax - xmin < self.min_crop_side_ratio * w or \ + ymax - ymin < self.min_crop_side_ratio * h: + # area too small + continue + if polys.shape[0] != 0: + poly_axis_in_area = (polys[:, :, 0] >= xmin)\ + & (polys[:, :, 0] <= xmax)\ + & (polys[:, :, 1] >= ymin)\ + & (polys[:, :, 1] <= ymax) + selected_polys = np.where( + np.sum(poly_axis_in_area, axis=1) == 4)[0] + else: + selected_polys = [] + + if len(selected_polys) == 0: + # no text in this area + if crop_background: + im = im[ymin:ymax + 1, xmin:xmax + 1, :] + polys = [] + tags = [] + txts = [] + return im, polys, tags, txts + else: + continue + + im = im[ymin:ymax + 1, xmin:xmax + 1, :] + polys = polys[selected_polys] + tags = tags[selected_polys] + txts_tmp = [] + for selected_poly in selected_polys: + txts_tmp.append(txts[selected_poly]) + txts = txts_tmp + polys[:, :, 0] -= xmin + polys[:, :, 1] -= ymin + return im, polys, tags, txts + return im, polys, tags, txts + + def crop_background_infor(self, im, text_polys, text_tags, text_strs): + im, text_polys, text_tags, text_strs = self.crop_area( + im, text_polys, text_tags, text_strs, crop_background=True) + if len(text_polys) > 0: + return None + # pad and resize image + input_size = self.input_size + im, ratio = self.preprocess(im) + score_map = np.zeros((input_size, input_size), dtype=np.float32) + geo_map = np.zeros((input_size, input_size, 9), dtype=np.float32) + training_mask = np.ones((input_size, input_size), dtype=np.float32) + return im, score_map, geo_map, training_mask + + def crop_foreground_infor(self, im, text_polys, text_tags, text_strs): + im, text_polys, text_tags, text_strs = self.crop_area( + im, text_polys, text_tags, text_strs, crop_background=False) + if text_polys.shape[0] == 0: + return None + #continue for all ignore case + if np.sum((text_tags * 1.0)) >= text_tags.size: + return None + # pad and resize image + input_size = self.input_size + im, ratio = self.preprocess(im) + text_polys[:, :, 0] *= ratio + text_polys[:, :, 1] *= ratio + _, _, new_h, new_w = im.shape + # print(im.shape) + # self.draw_img_polys(im, text_polys) + score_map, geo_map, training_mask = self.generate_quad( + (new_h, new_w), text_polys, text_tags) + return im, score_map, geo_map, training_mask + + def __call__(self, label_infor): + infor = self.convert_label_infor(label_infor) + im_path, text_polys, text_tags, text_strs = infor + im = cv2.imread(im_path) + if im is None: + return None + if text_polys.shape[0] == 0: + return None + #add rotate cases + if np.random.rand() < 0.5: + im, text_polys = self.rotate_im_poly(im, text_polys) + h, w, _ = im.shape + text_polys, text_tags = self.check_and_validate_polys(text_polys, + text_tags, h, w) + if text_polys.shape[0] == 0: + return None + + # random scale this image + rd_scale = np.random.choice(self.random_scale) + im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale) + text_polys *= rd_scale + if np.random.rand() < self.background_ratio: + outs = self.crop_background_infor(im, text_polys, text_tags, + text_strs) + else: + outs = self.crop_foreground_infor(im, text_polys, text_tags, + text_strs) + + if outs is None: + return None + im, score_map, geo_map, training_mask = outs + score_map = score_map[np.newaxis, ::4, ::4].astype(np.float32) + geo_map = np.swapaxes(geo_map, 1, 2) + geo_map = np.swapaxes(geo_map, 1, 0) + geo_map = geo_map[:, ::4, ::4].astype(np.float32) + training_mask = training_mask[np.newaxis, ::4, ::4] + training_mask = training_mask.astype(np.float32) + return im, score_map, geo_map, training_mask + + +class EASTProcessTest(object): + def __init__(self, params): + super(EASTProcessTest, self).__init__() + if 'max_side_len' in params: + self.max_side_len = params['max_side_len'] + else: + self.max_side_len = 2400 + + def resize_image(self, im): + """ + resize image to a size multiple of 32 which is required by the network + :param im: the resized image + :param max_side_len: limit of max image size to avoid out of memory in gpu + :return: the resized image and the resize ratio + """ + max_side_len = self.max_side_len + h, w, _ = im.shape + + resize_w = w + resize_h = h + + # limit the max side + if max(resize_h, resize_w) > max_side_len: + if resize_h > resize_w: + ratio = float(max_side_len) / resize_h + else: + ratio = float(max_side_len) / resize_w + else: + ratio = 1. + resize_h = int(resize_h * ratio) + resize_w = int(resize_w * ratio) + if resize_h % 32 == 0: + resize_h = resize_h + else: + resize_h = (resize_h // 32 - 1) * 32 + if resize_w % 32 == 0: + resize_w = resize_w + else: + resize_w = (resize_w // 32 - 1) * 32 + im = cv2.resize(im, (int(resize_w), int(resize_h))) + ratio_h = resize_h / float(h) + ratio_w = resize_w / float(w) + return im, (ratio_h, ratio_w) + + def __call__(self, im): + im, (ratio_h, ratio_w) = self.resize_image(im) + img_mean = [0.485, 0.456, 0.406] + img_std = [0.229, 0.224, 0.225] + im = im[:, :, ::-1].astype(np.float32) + im = im / 255 + im -= img_mean + im /= img_std + im = im.transpose((2, 0, 1)) + im = im[np.newaxis, :] + return [im, (ratio_h, ratio_w)] diff --git a/ppocr/data/det/make_border_map.py b/ppocr/data/det/make_border_map.py new file mode 100644 index 00000000..55941646 --- /dev/null +++ b/ppocr/data/det/make_border_map.py @@ -0,0 +1,147 @@ +# -*- coding:utf-8 -*- + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import numpy as np +import cv2 +np.seterr(divide='ignore', invalid='ignore') +import pyclipper +from shapely.geometry import Polygon +import sys +import warnings +warnings.simplefilter("ignore") + + +def draw_border_map(polygon, canvas, mask, shrink_ratio): + polygon = np.array(polygon) + assert polygon.ndim == 2 + assert polygon.shape[1] == 2 + + polygon_shape = Polygon(polygon) + if polygon_shape.area <= 0: + return + distance = polygon_shape.area * ( + 1 - np.power(shrink_ratio, 2)) / polygon_shape.length + subject = [tuple(l) for l in polygon] + padding = pyclipper.PyclipperOffset() + padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) + + padded_polygon = np.array(padding.Execute(distance)[0]) + cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0) + + xmin = padded_polygon[:, 0].min() + xmax = padded_polygon[:, 0].max() + ymin = padded_polygon[:, 1].min() + ymax = padded_polygon[:, 1].max() + width = xmax - xmin + 1 + height = ymax - ymin + 1 + + polygon[:, 0] = polygon[:, 0] - xmin + polygon[:, 1] = polygon[:, 1] - ymin + + xs = np.broadcast_to( + np.linspace( + 0, width - 1, num=width).reshape(1, width), (height, width)) + ys = np.broadcast_to( + np.linspace( + 0, height - 1, num=height).reshape(height, 1), (height, width)) + + distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32) + for i in range(polygon.shape[0]): + j = (i + 1) % polygon.shape[0] + absolute_distance = _distance(xs, ys, polygon[i], polygon[j]) + distance_map[i] = np.clip(absolute_distance / distance, 0, 1) + distance_map = distance_map.min(axis=0) + + xmin_valid = min(max(0, xmin), canvas.shape[1] - 1) + xmax_valid = min(max(0, xmax), canvas.shape[1] - 1) + ymin_valid = min(max(0, ymin), canvas.shape[0] - 1) + ymax_valid = min(max(0, ymax), canvas.shape[0] - 1) + canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax( + 1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height, + xmin_valid - xmin:xmax_valid - xmax + width], + canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1]) + + +def _distance(xs, ys, point_1, point_2): + ''' + compute the distance from point to a line + ys: coordinates in the first axis + xs: coordinates in the second axis + point_1, point_2: (x, y), the end of the line + ''' + height, width = xs.shape[:2] + square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1]) + square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1]) + square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[ + 1] - point_2[1]) + + cosin = (square_distance - square_distance_1 - square_distance_2) / ( + 2 * np.sqrt(square_distance_1 * square_distance_2)) + square_sin = 1 - np.square(cosin) + square_sin = np.nan_to_num(square_sin) + result = np.sqrt(square_distance_1 * square_distance_2 * square_sin / + square_distance) + + result[cosin < + 0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin < + 0] + # self.extend_line(point_1, point_2, result) + return result + + +def extend_line(point_1, point_2, result, shrink_ratio): + ex_point_1 = ( + int( + round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))), + int( + round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio)))) + cv2.line( + result, + tuple(ex_point_1), + tuple(point_1), + 4096.0, + 1, + lineType=cv2.LINE_AA, + shift=0) + ex_point_2 = ( + int( + round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))), + int( + round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio)))) + cv2.line( + result, + tuple(ex_point_2), + tuple(point_2), + 4096.0, + 1, + lineType=cv2.LINE_AA, + shift=0) + return ex_point_1, ex_point_2 + + +def MakeBorderMap(data): + shrink_ratio = 0.4 + thresh_min = 0.3 + thresh_max = 0.7 + + im = data['image'] + text_polys = data['polys'] + ignore_tags = data['ignore_tags'] + + canvas = np.zeros(im.shape[:2], dtype=np.float32) + mask = np.zeros(im.shape[:2], dtype=np.float32) + + for i in range(len(text_polys)): + if ignore_tags[i]: + continue + draw_border_map( + text_polys[i], canvas, mask=mask, shrink_ratio=shrink_ratio) + canvas = canvas * (thresh_max - thresh_min) + thresh_min + + data['threshold_map'] = canvas + data['threshold_mask'] = mask + return data diff --git a/ppocr/data/det/make_shrink_map.py b/ppocr/data/det/make_shrink_map.py new file mode 100644 index 00000000..dec5c6f2 --- /dev/null +++ b/ppocr/data/det/make_shrink_map.py @@ -0,0 +1,88 @@ +# -*- coding:utf-8 -*- + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import numpy as np +import cv2 +from shapely.geometry import Polygon +import pyclipper + + +def validate_polygons(polygons, ignore_tags, h, w): + ''' + polygons (numpy.array, required): of shape (num_instances, num_points, 2) + ''' + if len(polygons) == 0: + return polygons, ignore_tags + assert len(polygons) == len(ignore_tags) + for polygon in polygons: + polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1) + polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1) + + for i in range(len(polygons)): + area = polygon_area(polygons[i]) + if abs(area) < 1: + ignore_tags[i] = True + if area > 0: + polygons[i] = polygons[i][::-1, :] + return polygons, ignore_tags + + +def polygon_area(polygon): + edge = 0 + for i in range(polygon.shape[0]): + next_index = (i + 1) % polygon.shape[0] + edge += (polygon[next_index, 0] - polygon[i, 0]) * ( + polygon[next_index, 1] - polygon[i, 1]) + + return edge / 2. + + +def MakeShrinkMap(data): + min_text_size = 8 + shrink_ratio = 0.4 + + image = data['image'] + text_polys = data['polys'] + ignore_tags = data['ignore_tags'] + + h, w = image.shape[:2] + text_polys, ignore_tags = validate_polygons(text_polys, ignore_tags, h, w) + gt = np.zeros((h, w), dtype=np.float32) + # gt = np.zeros((1, h, w), dtype=np.float32) + mask = np.ones((h, w), dtype=np.float32) + for i in range(len(text_polys)): + polygon = text_polys[i] + height = max(polygon[:, 1]) - min(polygon[:, 1]) + width = max(polygon[:, 0]) - min(polygon[:, 0]) + # height = min(np.linalg.norm(polygon[0] - polygon[3]), + # np.linalg.norm(polygon[1] - polygon[2])) + # width = min(np.linalg.norm(polygon[0] - polygon[1]), + # np.linalg.norm(polygon[2] - polygon[3])) + if ignore_tags[i] or min(height, width) < min_text_size: + cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0) + ignore_tags[i] = True + else: + polygon_shape = Polygon(polygon) + distance = polygon_shape.area * ( + 1 - np.power(shrink_ratio, 2)) / polygon_shape.length + subject = [tuple(l) for l in text_polys[i]] + padding = pyclipper.PyclipperOffset() + padding.AddPath(subject, pyclipper.JT_ROUND, + pyclipper.ET_CLOSEDPOLYGON) + shrinked = padding.Execute(-distance) + if shrinked == []: + cv2.fillPoly(mask, + polygon.astype(np.int32)[np.newaxis, :, :], 0) + ignore_tags[i] = True + continue + shrinked = np.array(shrinked[0]).reshape(-1, 2) + cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1) + # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1) + + data['shrink_map'] = gt + data['shrink_mask'] = mask + return data diff --git a/ppocr/data/det/random_crop_data.py b/ppocr/data/det/random_crop_data.py new file mode 100644 index 00000000..d0c081e7 --- /dev/null +++ b/ppocr/data/det/random_crop_data.py @@ -0,0 +1,155 @@ +# -*- coding:utf-8 -*- + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import numpy as np +import cv2 +import random + + +def is_poly_in_rect(poly, x, y, w, h): + poly = np.array(poly) + if poly[:, 0].min() < x or poly[:, 0].max() > x + w: + return False + if poly[:, 1].min() < y or poly[:, 1].max() > y + h: + return False + return True + + +def is_poly_outside_rect(poly, x, y, w, h): + poly = np.array(poly) + if poly[:, 0].max() < x or poly[:, 0].min() > x + w: + return True + if poly[:, 1].max() < y or poly[:, 1].min() > y + h: + return True + return False + + +def split_regions(axis): + regions = [] + min_axis = 0 + for i in range(1, axis.shape[0]): + if axis[i] != axis[i - 1] + 1: + region = axis[min_axis:i] + min_axis = i + regions.append(region) + return regions + + +def random_select(axis, max_size): + xx = np.random.choice(axis, size=2) + xmin = np.min(xx) + xmax = np.max(xx) + xmin = np.clip(xmin, 0, max_size - 1) + xmax = np.clip(xmax, 0, max_size - 1) + return xmin, xmax + + +def region_wise_random_select(regions, max_size): + selected_index = list(np.random.choice(len(regions), 2)) + selected_values = [] + for index in selected_index: + axis = regions[index] + xx = int(np.random.choice(axis, size=1)) + selected_values.append(xx) + xmin = min(selected_values) + xmax = max(selected_values) + return xmin, xmax + + +def crop_area(im, text_polys, min_crop_side_ratio, max_tries): + h, w, _ = im.shape + h_array = np.zeros(h, dtype=np.int32) + w_array = np.zeros(w, dtype=np.int32) + for points in text_polys: + points = np.round(points, decimals=0).astype(np.int32) + minx = np.min(points[:, 0]) + maxx = np.max(points[:, 0]) + w_array[minx:maxx] = 1 + miny = np.min(points[:, 1]) + maxy = np.max(points[:, 1]) + h_array[miny:maxy] = 1 + # ensure the cropped area not across a text + h_axis = np.where(h_array == 0)[0] + w_axis = np.where(w_array == 0)[0] + + if len(h_axis) == 0 or len(w_axis) == 0: + return 0, 0, w, h + + h_regions = split_regions(h_axis) + w_regions = split_regions(w_axis) + + for i in range(max_tries): + if len(w_regions) > 1: + xmin, xmax = region_wise_random_select(w_regions, w) + else: + xmin, xmax = random_select(w_axis, w) + if len(h_regions) > 1: + ymin, ymax = region_wise_random_select(h_regions, h) + else: + ymin, ymax = random_select(h_axis, h) + + if xmax - xmin < min_crop_side_ratio * w or ymax - ymin < min_crop_side_ratio * h: + # area too small + continue + num_poly_in_rect = 0 + for poly in text_polys: + if not is_poly_outside_rect(poly, xmin, ymin, xmax - xmin, + ymax - ymin): + num_poly_in_rect += 1 + break + + if num_poly_in_rect > 0: + return xmin, ymin, xmax - xmin, ymax - ymin + + return 0, 0, w, h + + +def RandomCropData(data, size): + max_tries = 10 + min_crop_side_ratio = 0.1 + require_original_image = False + keep_ratio = True + + im = data['image'] + text_polys = data['polys'] + ignore_tags = data['ignore_tags'] + texts = data['texts'] + all_care_polys = [ + text_polys[i] for i, tag in enumerate(ignore_tags) if not tag + ] + # 计算crop区域 + crop_x, crop_y, crop_w, crop_h = crop_area(im, all_care_polys, + min_crop_side_ratio, max_tries) + # crop 图片 保持比例填充 + scale_w = size[0] / crop_w + scale_h = size[1] / crop_h + scale = min(scale_w, scale_h) + h = int(crop_h * scale) + w = int(crop_w * scale) + if keep_ratio: + padimg = np.zeros((size[1], size[0], im.shape[2]), im.dtype) + padimg[:h, :w] = cv2.resize( + im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h)) + img = padimg + else: + img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], + tuple(size)) + # crop 文本框 + text_polys_crop = [] + ignore_tags_crop = [] + texts_crop = [] + for poly, text, tag in zip(text_polys, texts, ignore_tags): + poly = ((poly - (crop_x, crop_y)) * scale).tolist() + if not is_poly_outside_rect(poly, 0, 0, w, h): + text_polys_crop.append(poly) + ignore_tags_crop.append(tag) + texts_crop.append(text) + data['image'] = img + data['polys'] = np.array(text_polys_crop) + data['ignore_tags'] = ignore_tags_crop + data['texts'] = texts_crop + return data diff --git a/ppocr/data/reader_main.py b/ppocr/data/reader_main.py new file mode 100755 index 00000000..323620bc --- /dev/null +++ b/ppocr/data/reader_main.py @@ -0,0 +1,81 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import random +import numpy as np + +import paddle +from ppocr.utils.utility import create_module +from copy import deepcopy + +from .rec.img_tools import process_image +import cv2 + +import sys +import signal + + +# handle terminate reader process, do not print stack frame +def _reader_quit(signum, frame): + print("Reader process exit.") + sys.exit() + + +def _term_group(sig_num, frame): + print('pid {} terminated, terminate group ' + '{}...'.format(os.getpid(), os.getpgrp())) + os.killpg(os.getpgid(os.getpid()), signal.SIGKILL) + + +signal.signal(signal.SIGTERM, _reader_quit) +signal.signal(signal.SIGINT, _term_group) + + +def reader_main(config=None, mode=None): + """Create a reader for trainning + + Args: + settings: arguments + + Returns: + train reader + """ + assert mode in ["train", "eval", "test"],\ + "Nonsupport mode:{}".format(mode) + global_params = config['Global'] + if mode == "train": + params = deepcopy(config['TrainReader']) + elif mode == "eval": + params = deepcopy(config['EvalReader']) + else: + params = deepcopy(config['TestReader']) + params['mode'] = mode + params.update(global_params) + reader_function = params['reader_function'] + function = create_module(reader_function)(params) + if mode == "train": + readers = [] + num_workers = params['num_workers'] + for process_id in range(num_workers): + readers.append(function(process_id)) + return paddle.reader.multiprocess_reader(readers, False) + else: + return function(mode) + + +def test_reader(image_shape, img_path): + img = cv2.imread(img_path) + norm_img = process_image(img, image_shape) + return norm_img diff --git a/ppocr/data/rec/__init__.py b/ppocr/data/rec/__init__.py new file mode 100755 index 00000000..abf198b9 --- /dev/null +++ b/ppocr/data/rec/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py new file mode 100755 index 00000000..b6a5fc10 --- /dev/null +++ b/ppocr/data/rec/dataset_traversal.py @@ -0,0 +1,201 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import math +import random +import numpy as np +import cv2 + +import string +import lmdb + +from ppocr.utils.utility import initial_logger +logger = initial_logger() + +from .img_tools import process_image, get_img_data + + +class LMDBReader(object): + def __init__(self, params): + if params['mode'] != 'train': + self.num_workers = 1 + else: + self.num_workers = params['num_workers'] + self.lmdb_sets_dir = params['lmdb_sets_dir'] + self.char_ops = params['char_ops'] + self.image_shape = params['image_shape'] + self.loss_type = params['loss_type'] + self.max_text_length = params['max_text_length'] + self.mode = params['mode'] + if params['mode'] == 'train': + self.batch_size = params['train_batch_size_per_card'] + else: + self.batch_size = params['test_batch_size_per_card'] + + def load_hierarchical_lmdb_dataset(self): + lmdb_sets = {} + dataset_idx = 0 + for dirpath, dirnames, filenames in os.walk(self.lmdb_sets_dir + '/'): + if not dirnames: + env = lmdb.open( + dirpath, + max_readers=32, + readonly=True, + lock=False, + readahead=False, + meminit=False) + txn = env.begin(write=False) + num_samples = int(txn.get('num-samples'.encode())) + lmdb_sets[dataset_idx] = {"dirpath":dirpath, "env":env, \ + "txn":txn, "num_samples":num_samples} + dataset_idx += 1 + return lmdb_sets + + def print_lmdb_sets_info(self, lmdb_sets): + lmdb_info_strs = [] + for dataset_idx in range(len(lmdb_sets)): + tmp_str = " %s:%d," % (lmdb_sets[dataset_idx]['dirpath'], + lmdb_sets[dataset_idx]['num_samples']) + lmdb_info_strs.append(tmp_str) + lmdb_info_strs = ''.join(lmdb_info_strs) + logger.info("DataSummary:" + lmdb_info_strs) + return + + def close_lmdb_dataset(self, lmdb_sets): + for dataset_idx in lmdb_sets: + lmdb_sets[dataset_idx]['env'].close() + return + + def get_lmdb_sample_info(self, txn, index): + label_key = 'label-%09d'.encode() % index + label = txn.get(label_key) + if label is None: + return None + label = label.decode('utf-8') + img_key = 'image-%09d'.encode() % index + imgbuf = txn.get(img_key) + img = get_img_data(imgbuf) + if img is None: + return None + return img, label + + def __call__(self, process_id): + if self.mode != 'train': + process_id = 0 + + def sample_iter_reader(): + lmdb_sets = self.load_hierarchical_lmdb_dataset() + if process_id == 0: + self.print_lmdb_sets_info(lmdb_sets) + cur_index_sets = [1 + process_id] * len(lmdb_sets) + while True: + finish_read_num = 0 + for dataset_idx in range(len(lmdb_sets)): + cur_index = cur_index_sets[dataset_idx] + if cur_index > lmdb_sets[dataset_idx]['num_samples']: + finish_read_num += 1 + else: + sample_info = self.get_lmdb_sample_info( + lmdb_sets[dataset_idx]['txn'], cur_index) + cur_index_sets[dataset_idx] += self.num_workers + if sample_info is None: + continue + img, label = sample_info + outs = process_image(img, self.image_shape, label, + self.char_ops, self.loss_type, + self.max_text_length) + if outs is None: + continue + yield outs + + if finish_read_num == len(lmdb_sets): + break + self.close_lmdb_dataset(lmdb_sets) + + def batch_iter_reader(): + batch_outs = [] + for outs in sample_iter_reader(): + batch_outs.append(outs) + if len(batch_outs) == self.batch_size: + yield batch_outs + batch_outs = [] + if len(batch_outs) != 0: + yield batch_outs + + return batch_iter_reader + + +class SimpleReader(object): + def __init__(self, params): + if params['mode'] != 'train': + self.num_workers = 1 + else: + self.num_workers = params['num_workers'] + self.img_set_dir = params['img_set_dir'] + self.label_file_path = params['label_file_path'] + self.char_ops = params['char_ops'] + self.image_shape = params['image_shape'] + self.loss_type = params['loss_type'] + self.max_text_length = params['max_text_length'] + self.mode = params['mode'] + if params['mode'] == 'train': + self.batch_size = params['train_batch_size_per_card'] + elif params['mode'] == 'eval': + self.batch_size = params['test_batch_size_per_card'] + else: + self.batch_size = 1 + self.infer_img = params['infer_img'] + + def __call__(self, process_id): + if self.mode != 'train': + process_id = 0 + + def sample_iter_reader(): + if self.mode == 'test': + print("infer_img:", self.infer_img) + img = cv2.imread(self.infer_img) + norm_img = process_image(img, self.image_shape) + yield norm_img + with open(self.label_file_path, "rb") as fin: + label_infor_list = fin.readlines() + img_num = len(label_infor_list) + img_id_list = list(range(img_num)) + random.shuffle(img_id_list) + for img_id in range(process_id, img_num, self.num_workers): + label_infor = label_infor_list[img_id_list[img_id]] + substr = label_infor.decode('utf-8').strip("\n").split("\t") + img_path = self.img_set_dir + "/" + substr[0] + img = cv2.imread(img_path) + if img is None: + continue + label = substr[1] + outs = process_image(img, self.image_shape, label, + self.char_ops, self.loss_type, + self.max_text_length) + if outs is None: + continue + yield outs + + def batch_iter_reader(): + batch_outs = [] + for outs in sample_iter_reader(): + batch_outs.append(outs) + if len(batch_outs) == self.batch_size: + yield batch_outs + batch_outs = [] + if len(batch_outs) != 0: + yield batch_outs + + return batch_iter_reader diff --git a/ppocr/data/rec/img_tools.py b/ppocr/data/rec/img_tools.py new file mode 100755 index 00000000..a27f108c --- /dev/null +++ b/ppocr/data/rec/img_tools.py @@ -0,0 +1,92 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import math +import cv2 +import numpy as np + + +def get_bounding_box_rect(pos): + left = min(pos[0]) + right = max(pos[0]) + top = min(pos[1]) + bottom = max(pos[1]) + return [left, top, right, bottom] + + +def resize_norm_img(img, image_shape): + imgC, imgH, imgW = image_shape + h = img.shape[0] + w = img.shape[1] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + if image_shape[0] == 1: + resized_image = resized_image / 255 + resized_image = resized_image[np.newaxis, :] + else: + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + +def get_img_data(value): + """get_img_data""" + if not value: + return None + imgdata = np.frombuffer(value, dtype='uint8') + if imgdata is None: + return None + imgori = cv2.imdecode(imgdata, 1) + if imgori is None: + return None + return imgori + + +def process_image(img, + image_shape, + label=None, + char_ops=None, + loss_type=None, + max_text_length=None): + norm_img = resize_norm_img(img, image_shape) + norm_img = norm_img[np.newaxis, :] + if label is not None: + char_num = char_ops.get_char_num() + text = char_ops.encode(label) + if len(text) == 0 or len(text) > max_text_length: + return None + else: + if loss_type == "ctc": + text = text.reshape(-1, 1) + return (norm_img, text) + elif loss_type == "attention": + beg_flag_idx = char_ops.get_beg_end_flag_idx("beg") + end_flag_idx = char_ops.get_beg_end_flag_idx("end") + beg_text = np.append(beg_flag_idx, text) + end_text = np.append(text, end_flag_idx) + beg_text = beg_text.reshape(-1, 1) + end_text = end_text.reshape(-1, 1) + return (norm_img, beg_text, end_text) + else: + assert False, "Unsupport loss_type %s in process_image"\ + % loss_type + return (norm_img) diff --git a/ppocr/modeling/__init__.py b/ppocr/modeling/__init__.py new file mode 100755 index 00000000..abf198b9 --- /dev/null +++ b/ppocr/modeling/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ppocr/modeling/architectures/det_model.py b/ppocr/modeling/architectures/det_model.py new file mode 100755 index 00000000..e09bcbe9 --- /dev/null +++ b/ppocr/modeling/architectures/det_model.py @@ -0,0 +1,119 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid + +from ppocr.utils.utility import create_module +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from copy import deepcopy + + +class DetModel(object): + def __init__(self, params): + """ + Detection module for OCR text detection. + args: + params (dict): the super parameters for detection module. + """ + global_params = params['Global'] + self.algorithm = global_params['algorithm'] + + backbone_params = deepcopy(params["Backbone"]) + backbone_params.update(global_params) + self.backbone = create_module(backbone_params['function'])\ + (params=backbone_params) + + head_params = deepcopy(params["Head"]) + head_params.update(global_params) + self.head = create_module(head_params['function'])\ + (params=head_params) + + loss_params = deepcopy(params["Loss"]) + loss_params.update(global_params) + self.loss = create_module(loss_params['function'])\ + (params=loss_params) + + self.image_shape = global_params['image_shape'] + + def create_feed(self, mode): + """ + create Dataloader feeds + args: + mode (str): 'train' for training or else for evaluation + return: (image, corresponding label, dataloader) + """ + image_shape = deepcopy(self.image_shape) + image = fluid.layers.data( + name='image', shape=image_shape, dtype='float32') + if mode == "train": + if self.algorithm == "EAST": + score = fluid.layers.data( + name='score', shape=[1, 128, 128], dtype='float32') + geo = fluid.layers.data( + name='geo', shape=[9, 128, 128], dtype='float32') + mask = fluid.layers.data( + name='mask', shape=[1, 128, 128], dtype='float32') + feed_list = [image, score, geo, mask] + labels = {'score': score, 'geo': geo, 'mask': mask} + elif self.algorithm == "DB": + shrink_map = fluid.layers.data( + name='shrink_map', shape=image_shape[1:], dtype='float32') + shrink_mask = fluid.layers.data( + name='shrink_mask', shape=image_shape[1:], dtype='float32') + threshold_map = fluid.layers.data( + name='threshold_map', + shape=image_shape[1:], + dtype='float32') + threshold_mask = fluid.layers.data( + name='threshold_mask', + shape=image_shape[1:], + dtype='float32') + feed_list=[image, shrink_map, shrink_mask,\ + threshold_map, threshold_mask] + labels = {'shrink_map':shrink_map,\ + 'shrink_mask':shrink_mask,\ + 'threshold_map':threshold_map,\ + 'threshold_mask':threshold_mask} + loader = fluid.io.DataLoader.from_generator( + feed_list=feed_list, + capacity=64, + use_double_buffer=True, + iterable=False) + else: + labels = None + loader = None + return image, labels, loader + + def __call__(self, mode): + """ + run forward of defined module + args: + mode (str): 'train' for training; 'export' for inference, + others for evaluation] + """ + image, labels, loader = self.create_feed(mode) + conv_feas = self.backbone(image) + predicts = self.head(conv_feas) + if mode == "train": + losses = self.loss(predicts, labels) + return loader, losses + elif mode == "export": + return [image, predicts] + else: + return loader, predicts diff --git a/ppocr/modeling/architectures/rec_model.py b/ppocr/modeling/architectures/rec_model.py new file mode 100755 index 00000000..c54778fd --- /dev/null +++ b/ppocr/modeling/architectures/rec_model.py @@ -0,0 +1,114 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid + +from ppocr.utils.utility import create_module +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from copy import deepcopy + + +class RecModel(object): + def __init__(self, params): + super(RecModel, self).__init__() + global_params = params['Global'] + char_num = global_params['char_ops'].get_char_num() + global_params['char_num'] = char_num + if "TPS" in params: + tps_params = deepcopy(params["TPS"]) + tps_params.update(global_params) + self.tps = create_module(tps_params['function'])\ + (params=tps_params) + else: + self.tps = None + + backbone_params = deepcopy(params["Backbone"]) + backbone_params.update(global_params) + self.backbone = create_module(backbone_params['function'])\ + (params=backbone_params) + + head_params = deepcopy(params["Head"]) + head_params.update(global_params) + self.head = create_module(head_params['function'])\ + (params=head_params) + + loss_params = deepcopy(params["Loss"]) + loss_params.update(global_params) + self.loss = create_module(loss_params['function'])\ + (params=loss_params) + + self.loss_type = global_params['loss_type'] + self.image_shape = global_params['image_shape'] + self.max_text_length = global_params['max_text_length'] + + def create_feed(self, mode): + image_shape = deepcopy(self.image_shape) + image_shape.insert(0, -1) + image = fluid.data(name='image', shape=image_shape, dtype='float32') + if mode == "train": + if self.loss_type == "attention": + label_in = fluid.data( + name='label_in', + shape=[None, 1], + dtype='int32', + lod_level=1) + label_out = fluid.data( + name='label_out', + shape=[None, 1], + dtype='int32', + lod_level=1) + feed_list = [image, label_in, label_out] + labels = {'label_in': label_in, 'label_out': label_out} + else: + label = fluid.data( + name='label', shape=[None, 1], dtype='int32', lod_level=1) + feed_list = [image, label] + labels = {'label': label} + loader = fluid.io.DataLoader.from_generator( + feed_list=feed_list, + capacity=64, + use_double_buffer=True, + iterable=False) + else: + labels = None + loader = None + return image, labels, loader + + def __call__(self, mode): + image, labels, loader = self.create_feed(mode) + if self.tps is None: + inputs = image + else: + inputs = self.tps(image) + conv_feas = self.backbone(inputs) + predicts = self.head(conv_feas, labels, mode) + decoded_out = predicts['decoded_out'] + if mode == "train": + loss = self.loss(predicts, labels) + if self.loss_type == "attention": + label = labels['label_out'] + else: + label = labels['label'] + outputs = {'total_loss':loss, 'decoded_out':\ + decoded_out, 'label':label} + return loader, outputs + elif mode == "export": + return [image, {'decoded_out': decoded_out}] + else: + return loader, {'decoded_out': decoded_out} diff --git a/ppocr/modeling/backbones/det_mobilenet_v3.py b/ppocr/modeling/backbones/det_mobilenet_v3.py new file mode 100755 index 00000000..87f5dd72 --- /dev/null +++ b/ppocr/modeling/backbones/det_mobilenet_v3.py @@ -0,0 +1,251 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['MobileNetV3'] + + +class MobileNetV3(): + def __init__(self, params): + """ + the MobilenetV3 backbone network for detection module. + Args: + params(dict): the super parameters for build network + """ + self.scale = params['scale'] + model_name = params['model_name'] + self.inplanes = 16 + if model_name == "large": + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, False, 'relu', 1], + [3, 64, 24, False, 'relu', 2], + [3, 72, 24, False, 'relu', 1], + [5, 72, 40, True, 'relu', 2], + [5, 120, 40, True, 'relu', 1], + [5, 120, 40, True, 'relu', 1], + [3, 240, 80, False, 'hard_swish', 2], + [3, 200, 80, False, 'hard_swish', 1], + [3, 184, 80, False, 'hard_swish', 1], + [3, 184, 80, False, 'hard_swish', 1], + [3, 480, 112, True, 'hard_swish', 1], + [3, 672, 112, True, 'hard_swish', 1], + [5, 672, 160, True, 'hard_swish', 2], + [5, 960, 160, True, 'hard_swish', 1], + [5, 960, 160, True, 'hard_swish', 1], + ] + self.cls_ch_squeeze = 960 + self.cls_ch_expand = 1280 + elif model_name == "small": + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, True, 'relu', 2], + [3, 72, 24, False, 'relu', 2], + [3, 88, 24, False, 'relu', 1], + [5, 96, 40, True, 'hard_swish', 2], + [5, 240, 40, True, 'hard_swish', 1], + [5, 240, 40, True, 'hard_swish', 1], + [5, 120, 48, True, 'hard_swish', 1], + [5, 144, 48, True, 'hard_swish', 1], + [5, 288, 96, True, 'hard_swish', 2], + [5, 576, 96, True, 'hard_swish', 1], + [5, 576, 96, True, 'hard_swish', 1], + ] + self.cls_ch_squeeze = 576 + self.cls_ch_expand = 1280 + else: + raise NotImplementedError("mode[" + model_name + + "_model] is not implemented!") + + supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] + assert self.scale in supported_scale, \ + "supported scale are {} but input scale is {}".format(supported_scale, self.scale) + + def __call__(self, input): + scale = self.scale + inplanes = self.inplanes + cfg = self.cfg + cls_ch_squeeze = self.cls_ch_squeeze + cls_ch_expand = self.cls_ch_expand + #conv1 + conv = self.conv_bn_layer( + input, + filter_size=3, + num_filters=self.make_divisible(inplanes * scale), + stride=2, + padding=1, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv1') + i = 0 + inplanes = self.make_divisible(inplanes * scale) + outs = [] + for layer_cfg in cfg: + if layer_cfg[5] == 2 and i > 2: + outs.append(conv) + conv = self.residual_unit( + input=conv, + num_in_filter=inplanes, + num_mid_filter=self.make_divisible(scale * layer_cfg[1]), + num_out_filter=self.make_divisible(scale * layer_cfg[2]), + act=layer_cfg[4], + stride=layer_cfg[5], + filter_size=layer_cfg[0], + use_se=layer_cfg[3], + name='conv' + str(i + 2)) + inplanes = self.make_divisible(scale * layer_cfg[2]) + i += 1 + + conv = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=self.make_divisible(scale * cls_ch_squeeze), + stride=1, + padding=0, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv_last') + outs.append(conv) + return outs + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + if_act=True, + act=None, + name=None, + use_cudnn=True, + res_last_bn_init=False): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr( + name=bn_name + "_scale", + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0)), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + if act == 'relu': + bn = fluid.layers.relu(bn) + elif act == 'hard_swish': + bn = fluid.layers.hard_swish(bn) + return bn + + def make_divisible(self, v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + def se_block(self, input, num_out_filter, ratio=4, name=None): + num_mid_filter = num_out_filter // ratio + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) + conv1 = fluid.layers.conv2d( + input=pool, + filter_size=1, + num_filters=num_mid_filter, + act='relu', + param_attr=ParamAttr(name=name + '_1_weights'), + bias_attr=ParamAttr(name=name + '_1_offset')) + conv2 = fluid.layers.conv2d( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + act='hard_sigmoid', + param_attr=ParamAttr(name=name + '_2_weights'), + bias_attr=ParamAttr(name=name + '_2_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0) + return scale + + def residual_unit(self, + input, + num_in_filter, + num_mid_filter, + num_out_filter, + stride, + filter_size, + act=None, + use_se=False, + name=None): + + conv0 = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_mid_filter, + stride=1, + padding=0, + if_act=True, + act=act, + name=name + '_expand') + + conv1 = self.conv_bn_layer( + input=conv0, + filter_size=filter_size, + num_filters=num_mid_filter, + stride=stride, + padding=int((filter_size - 1) // 2), + if_act=True, + act=act, + num_groups=num_mid_filter, + use_cudnn=False, + name=name + '_depthwise') + if use_se: + conv1 = self.se_block( + input=conv1, num_out_filter=num_mid_filter, name=name + '_se') + + conv2 = self.conv_bn_layer( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + stride=1, + padding=0, + if_act=False, + name=name + '_linear', + res_last_bn_init=True) + if num_in_filter != num_out_filter or stride != 1: + return conv2 + else: + return fluid.layers.elementwise_add(x=input, y=conv2, act=None) diff --git a/ppocr/modeling/backbones/det_resnet_vd.py b/ppocr/modeling/backbones/det_resnet_vd.py new file mode 100755 index 00000000..52a441f3 --- /dev/null +++ b/ppocr/modeling/backbones/det_resnet_vd.py @@ -0,0 +1,252 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["ResNet"] + + +class ResNet(object): + def __init__(self, params): + """ + the Resnet backbone network for detection module. + Args: + params(dict): the super parameters for network build + """ + self.layers = params['layers'] + supported_layers = [18, 34, 50, 101, 152] + assert self.layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, self.layers) + self.is_3x3 = True + + def __call__(self, input): + layers = self.layers + is_3x3 = self.is_3x3 + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + outs = [] + + if is_3x3 == False: + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu') + else: + conv = self.conv_bn_layer( + input=input, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + name=conv_name) + outs.append(conv) + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block == i == 0, + name=conv_name) + outs.append(conv) + return outs + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d( + input=input, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg', + ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new( + input, ch_out, 1, stride, name=name) + elif if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters * 4, + stride, + if_first=if_first, + name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + def basic_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + short = self.shortcut( + input, + num_filters, + stride, + if_first=if_first, + name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') diff --git a/ppocr/modeling/backbones/rec_mobilenet_v3.py b/ppocr/modeling/backbones/rec_mobilenet_v3.py new file mode 100755 index 00000000..506209cc --- /dev/null +++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py @@ -0,0 +1,255 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + 'MobileNetV3', 'MobileNetV3_small_x0_35', 'MobileNetV3_small_x0_5', + 'MobileNetV3_small_x0_75', 'MobileNetV3_small_x1_0', + 'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_35', + 'MobileNetV3_large_x0_5', 'MobileNetV3_large_x0_75', + 'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25' +] + + +class MobileNetV3(): + def __init__(self, params): + self.scale = params['scale'] + model_name = params['model_name'] + self.inplanes = 16 + if model_name == "large": + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, False, 'relu', 1], + [3, 64, 24, False, 'relu', (2, 1)], + [3, 72, 24, False, 'relu', 1], + [5, 72, 40, True, 'relu', (2, 1)], + [5, 120, 40, True, 'relu', 1], + [5, 120, 40, True, 'relu', 1], + [3, 240, 80, False, 'hard_swish', 1], + [3, 200, 80, False, 'hard_swish', 1], + [3, 184, 80, False, 'hard_swish', 1], + [3, 184, 80, False, 'hard_swish', 1], + [3, 480, 112, True, 'hard_swish', 1], + [3, 672, 112, True, 'hard_swish', 1], + [5, 672, 160, True, 'hard_swish', (2, 1)], + [5, 960, 160, True, 'hard_swish', 1], + [5, 960, 160, True, 'hard_swish', 1], + ] + self.cls_ch_squeeze = 960 + self.cls_ch_expand = 1280 + elif model_name == "small": + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, True, 'relu', (2, 1)], + [3, 72, 24, False, 'relu', (2, 1)], + [3, 88, 24, False, 'relu', 1], + [5, 96, 40, True, 'hard_swish', (2, 1)], + [5, 240, 40, True, 'hard_swish', 1], + [5, 240, 40, True, 'hard_swish', 1], + [5, 120, 48, True, 'hard_swish', 1], + [5, 144, 48, True, 'hard_swish', 1], + [5, 288, 96, True, 'hard_swish', (2, 1)], + [5, 576, 96, True, 'hard_swish', 1], + [5, 576, 96, True, 'hard_swish', 1], + ] + self.cls_ch_squeeze = 576 + self.cls_ch_expand = 1280 + else: + raise NotImplementedError("mode[" + model_name + + "_model] is not implemented!") + + supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] + assert self.scale in supported_scale, \ + "supported scale are {} but input scale is {}".format(supported_scale, scale) + + def __call__(self, input): + scale = self.scale + inplanes = self.inplanes + cfg = self.cfg + cls_ch_squeeze = self.cls_ch_squeeze + cls_ch_expand = self.cls_ch_expand + #conv1 + conv = self.conv_bn_layer( + input, + filter_size=3, + num_filters=self.make_divisible(inplanes * scale), + stride=2, + padding=1, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv1') + i = 0 + inplanes = self.make_divisible(inplanes * scale) + for layer_cfg in cfg: + conv = self.residual_unit( + input=conv, + num_in_filter=inplanes, + num_mid_filter=self.make_divisible(scale * layer_cfg[1]), + num_out_filter=self.make_divisible(scale * layer_cfg[2]), + act=layer_cfg[4], + stride=layer_cfg[5], + filter_size=layer_cfg[0], + use_se=layer_cfg[3], + name='conv' + str(i + 2)) + inplanes = self.make_divisible(scale * layer_cfg[2]) + i += 1 + + conv = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=self.make_divisible(scale * cls_ch_squeeze), + stride=1, + padding=0, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv_last') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='max') + return conv + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + if_act=True, + act=None, + name=None, + use_cudnn=True, + res_last_bn_init=False): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr( + name=bn_name + "_scale", + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0)), + bias_attr=ParamAttr( + name=bn_name + "_offset", + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0)), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + if act == 'relu': + bn = fluid.layers.relu(bn) + elif act == 'hard_swish': + bn = fluid.layers.hard_swish(bn) + return bn + + def make_divisible(self, v, divisor=8, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + if new_v < 0.9 * v: + new_v += divisor + return new_v + + def se_block(self, input, num_out_filter, ratio=4, name=None): + num_mid_filter = num_out_filter // ratio + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) + conv1 = fluid.layers.conv2d( + input=pool, + filter_size=1, + num_filters=num_mid_filter, + act='relu', + param_attr=ParamAttr(name=name + '_1_weights'), + bias_attr=ParamAttr(name=name + '_1_offset')) + conv2 = fluid.layers.conv2d( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + act='hard_sigmoid', + param_attr=ParamAttr(name=name + '_2_weights'), + bias_attr=ParamAttr(name=name + '_2_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0) + return scale + + def residual_unit(self, + input, + num_in_filter, + num_mid_filter, + num_out_filter, + stride, + filter_size, + act=None, + use_se=False, + name=None): + + conv0 = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_mid_filter, + stride=1, + padding=0, + if_act=True, + act=act, + name=name + '_expand') + + conv1 = self.conv_bn_layer( + input=conv0, + filter_size=filter_size, + num_filters=num_mid_filter, + stride=stride, + padding=int((filter_size - 1) // 2), + if_act=True, + act=act, + num_groups=num_mid_filter, + use_cudnn=False, + name=name + '_depthwise') + if use_se: + conv1 = self.se_block( + input=conv1, num_out_filter=num_mid_filter, name=name + '_se') + + conv2 = self.conv_bn_layer( + input=conv1, + filter_size=1, + num_filters=num_out_filter, + stride=1, + padding=0, + if_act=False, + name=name + '_linear', + res_last_bn_init=True) + if num_in_filter != num_out_filter or stride != 1: + return conv2 + else: + return fluid.layers.elementwise_add(x=input, y=conv2, act=None) diff --git a/ppocr/modeling/backbones/rec_resnet_vd.py b/ppocr/modeling/backbones/rec_resnet_vd.py new file mode 100755 index 00000000..bc58c8ac --- /dev/null +++ b/ppocr/modeling/backbones/rec_resnet_vd.py @@ -0,0 +1,271 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + "ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", + "ResNet152_vd", "ResNet200_vd" +] + + +class ResNet(): + def __init__(self, params): + self.layers = params['layers'] + self.is_3x3 = True + supported_layers = [18, 34, 50, 101, 152, 200] + assert self.layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, self.layers) + + def __call__(self, input): + is_3x3 = self.is_3x3 + layers = self.layers + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + if is_3x3 == False: + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=1, + act='relu') + else: + conv = self.conv_bn_layer( + input=input, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name='conv1_1') + conv = self.conv_bn_layer( + input=conv, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') + conv = self.conv_bn_layer( + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + + if i == 0 and block != 0: + stride = (2, 1) + else: + stride = (1, 1) + + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=stride, + if_first=block == i == 0, + name=conv_name) + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + + if i == 0 and block != 0: + stride = (2, 1) + else: + stride = (1, 1) + + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=stride, + if_first=block == i == 0, + name=conv_name) + + conv = fluid.layers.pool2d( + input=conv, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='max') + + return conv + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d( + input=input, + pool_size=stride, + pool_stride=stride, + pool_padding=0, + pool_type='avg', + ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride[0] != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new( + input, ch_out, 1, stride, name=name) + elif if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters * 4, + stride, + if_first=if_first, + name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + def basic_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + short = self.shortcut( + input, + num_filters, + stride, + if_first=if_first, + name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') diff --git a/ppocr/modeling/common_functions.py b/ppocr/modeling/common_functions.py new file mode 100755 index 00000000..2ebcb042 --- /dev/null +++ b/ppocr/modeling/common_functions.py @@ -0,0 +1,95 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +import math + + +def get_para_bias_attr(l2_decay, k, name): + regularizer = fluid.regularizer.L2Decay(l2_decay) + stdv = 1.0 / math.sqrt(k * 1.0) + initializer = fluid.initializer.Uniform(-stdv, stdv) + para_attr = fluid.ParamAttr( + regularizer=regularizer, initializer=initializer, name=name + "_w_attr") + bias_attr = fluid.ParamAttr( + regularizer=regularizer, initializer=initializer, name=name + "_b_attr") + return [para_attr, bias_attr] + + +def conv_bn_layer(input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d') + + bn_name = "bn_" + name + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + +def deconv_bn_layer(input, + num_filters, + filter_size=4, + stride=2, + act='relu', + name=None): + deconv = fluid.layers.conv2d_transpose( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=1, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.deconv2d') + bn_name = "bn_" + name + return fluid.layers.batch_norm( + input=deconv, + act=act, + name=bn_name + '.output', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + +def create_tmp_var(program, name, dtype, shape, lod_level=0): + return program.current_block().create_var( + name=name, dtype=dtype, shape=shape, lod_level=lod_level) diff --git a/ppocr/modeling/heads/det_db_head.py b/ppocr/modeling/heads/det_db_head.py new file mode 100644 index 00000000..67ec4748 --- /dev/null +++ b/ppocr/modeling/heads/det_db_head.py @@ -0,0 +1,206 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle.fluid as fluid + + +class DBHead(object): + """ + Differentiable Binarization (DB) for text detection: + see https://arxiv.org/abs/1911.08947 + args: + params(dict): super parameters for build DB network + """ + + def __init__(self, params): + self.k = params['k'] + self.inner_channels = params['inner_channels'] + self.C, self.H, self.W = params['image_shape'] + print(self.C, self.H, self.W) + + def binarize(self, x): + conv1 = fluid.layers.conv2d( + input=x, + num_filters=self.inner_channels // 4, + filter_size=3, + padding=1, + param_attr=fluid.initializer.MSRAInitializer(uniform=False), + bias_attr=False) + conv_bn1 = fluid.layers.batch_norm( + input=conv1, + param_attr=fluid.initializer.ConstantInitializer(value=1.0), + bias_attr=fluid.initializer.ConstantInitializer(value=1e-4), + act="relu") + conv2 = fluid.layers.conv2d_transpose( + input=conv_bn1, + num_filters=self.inner_channels // 4, + filter_size=2, + stride=2, + param_attr=fluid.initializer.MSRAInitializer(uniform=False), + bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"), + act=None) + conv_bn2 = fluid.layers.batch_norm( + input=conv2, + param_attr=fluid.initializer.ConstantInitializer(value=1.0), + bias_attr=fluid.initializer.ConstantInitializer(value=1e-4), + act="relu") + conv3 = fluid.layers.conv2d_transpose( + input=conv_bn2, + num_filters=1, + filter_size=2, + stride=2, + param_attr=fluid.initializer.MSRAInitializer(uniform=False), + bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"), + act=None) + out = fluid.layers.sigmoid(conv3) + return out + + def thresh(self, x): + conv1 = fluid.layers.conv2d( + input=x, + num_filters=self.inner_channels // 4, + filter_size=3, + padding=1, + param_attr=fluid.initializer.MSRAInitializer(uniform=False), + bias_attr=False) + conv_bn1 = fluid.layers.batch_norm( + input=conv1, + param_attr=fluid.initializer.ConstantInitializer(value=1.0), + bias_attr=fluid.initializer.ConstantInitializer(value=1e-4), + act="relu") + conv2 = fluid.layers.conv2d_transpose( + input=conv_bn1, + num_filters=self.inner_channels // 4, + filter_size=2, + stride=2, + param_attr=fluid.initializer.MSRAInitializer(uniform=False), + bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"), + act=None) + conv_bn2 = fluid.layers.batch_norm( + input=conv2, + param_attr=fluid.initializer.ConstantInitializer(value=1.0), + bias_attr=fluid.initializer.ConstantInitializer(value=1e-4), + act="relu") + conv3 = fluid.layers.conv2d_transpose( + input=conv_bn2, + num_filters=1, + filter_size=2, + stride=2, + param_attr=fluid.initializer.MSRAInitializer(uniform=False), + bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"), + act=None) + out = fluid.layers.sigmoid(conv3) + return out + + def _get_bias_attr(self, l2_decay, k, name, gradient_clip=None): + regularizer = fluid.regularizer.L2Decay(l2_decay) + stdv = 1.0 / math.sqrt(k * 1.0) + initializer = fluid.initializer.Uniform(-stdv, stdv) + bias_attr = fluid.ParamAttr( + regularizer=regularizer, + gradient_clip=gradient_clip, + initializer=initializer, + name=name + "_b_attr") + return bias_attr + + def step_function(self, x, y): + return fluid.layers.reciprocal(1 + fluid.layers.exp(-self.k * (x - y))) + + def __call__(self, conv_features, mode="train"): + c2, c3, c4, c5 = conv_features + param_attr = fluid.initializer.MSRAInitializer(uniform=False) + in5 = fluid.layers.conv2d( + input=c5, + num_filters=self.inner_channels, + filter_size=1, + param_attr=param_attr, + bias_attr=False) + in4 = fluid.layers.conv2d( + input=c4, + num_filters=self.inner_channels, + filter_size=1, + param_attr=param_attr, + bias_attr=False) + in3 = fluid.layers.conv2d( + input=c3, + num_filters=self.inner_channels, + filter_size=1, + param_attr=param_attr, + bias_attr=False) + in2 = fluid.layers.conv2d( + input=c2, + num_filters=self.inner_channels, + filter_size=1, + param_attr=param_attr, + bias_attr=False) + + out4 = fluid.layers.elementwise_add( + x=fluid.layers.resize_nearest( + input=in5, scale=2), y=in4) # 1/16 + out3 = fluid.layers.elementwise_add( + x=fluid.layers.resize_nearest( + input=out4, scale=2), y=in3) # 1/8 + out2 = fluid.layers.elementwise_add( + x=fluid.layers.resize_nearest( + input=out3, scale=2), y=in2) # 1/4 + + p5 = fluid.layers.conv2d( + input=in5, + num_filters=self.inner_channels // 4, + filter_size=3, + padding=1, + param_attr=param_attr, + bias_attr=False) + p5 = fluid.layers.resize_nearest(input=p5, scale=8) + p4 = fluid.layers.conv2d( + input=out4, + num_filters=self.inner_channels // 4, + filter_size=3, + padding=1, + param_attr=param_attr, + bias_attr=False) + p4 = fluid.layers.resize_nearest(input=p4, scale=4) + p3 = fluid.layers.conv2d( + input=out3, + num_filters=self.inner_channels // 4, + filter_size=3, + padding=1, + param_attr=param_attr, + bias_attr=False) + p3 = fluid.layers.resize_nearest(input=p3, scale=2) + p2 = fluid.layers.conv2d( + input=out2, + num_filters=self.inner_channels // 4, + filter_size=3, + padding=1, + param_attr=param_attr, + bias_attr=False) + + fuse = fluid.layers.concat(input=[p5, p4, p3, p2], axis=1) + shrink_maps = self.binarize(fuse) + if mode != "train": + return shrink_maps + threshold_maps = self.thresh(fuse) + binary_maps = self.step_function(shrink_maps, threshold_maps) + y = fluid.layers.concat( + input=[shrink_maps, threshold_maps, binary_maps], axis=1) + predicts = {} + predicts['maps'] = y + return predicts diff --git a/ppocr/modeling/heads/det_east_head.py b/ppocr/modeling/heads/det_east_head.py new file mode 100755 index 00000000..e53ba019 --- /dev/null +++ b/ppocr/modeling/heads/det_east_head.py @@ -0,0 +1,116 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from ..common_functions import conv_bn_layer, deconv_bn_layer + + +class EASTHead(object): + """ + EAST: An Efficient and Accurate Scene Text Detector + see arxiv: https://arxiv.org/abs/1704.03155 + args: + params(dict): the super parameters for network build + """ + + def __init__(self, params): + + self.model_name = params['model_name'] + + def unet_fusion(self, inputs): + f = inputs[::-1] + if self.model_name == "large": + num_outputs = [128, 128, 128, 128] + else: + num_outputs = [64, 64, 64, 64] + g = [None, None, None, None] + h = [None, None, None, None] + for i in range(4): + if i == 0: + h[i] = f[i] + else: + h[i] = fluid.layers.concat([g[i - 1], f[i]], axis=1) + h[i] = conv_bn_layer( + input=h[i], + num_filters=num_outputs[i], + filter_size=3, + stride=1, + act='relu', + name="unet_h_%d" % (i)) + if i <= 2: + #can be replaced with unpool + g[i] = deconv_bn_layer( + input=h[i], + num_filters=num_outputs[i], + name="unet_g_%d" % (i)) + else: + g[i] = conv_bn_layer( + input=h[i], + num_filters=num_outputs[i], + filter_size=3, + stride=1, + act='relu', + name="unet_g_%d" % (i)) + return g[3] + + def detector_header(self, f_common): + if self.model_name == "large": + num_outputs = [128, 64, 1, 8] + else: + num_outputs = [64, 32, 1, 8] + f_det = conv_bn_layer( + input=f_common, + num_filters=num_outputs[0], + filter_size=3, + stride=1, + act='relu', + name="det_head1") + f_det = conv_bn_layer( + input=f_det, + num_filters=num_outputs[1], + filter_size=3, + stride=1, + act='relu', + name="det_head2") + #f_score + f_score = conv_bn_layer( + input=f_det, + num_filters=num_outputs[2], + filter_size=1, + stride=1, + act=None, + name="f_score") + f_score = fluid.layers.sigmoid(f_score) + #f_geo + f_geo = conv_bn_layer( + input=f_det, + num_filters=num_outputs[3], + filter_size=1, + stride=1, + act=None, + name="f_geo") + f_geo = (fluid.layers.sigmoid(f_geo) - 0.5) * 2 * 800 + return f_score, f_geo + + def __call__(self, inputs): + f_common = self.unet_fusion(inputs) + f_score, f_geo = self.detector_header(f_common) + predicts = {} + predicts['f_score'] = f_score + predicts['f_geo'] = f_geo + return predicts diff --git a/ppocr/modeling/heads/rec_attention_head.py b/ppocr/modeling/heads/rec_attention_head.py new file mode 100755 index 00000000..8f5b4cc4 --- /dev/null +++ b/ppocr/modeling/heads/rec_attention_head.py @@ -0,0 +1,232 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers as layers +from .rec_seq_encoder import SequenceEncoder +import numpy as np + + +class AttentionPredict(object): + def __init__(self, params): + super(AttentionPredict, self).__init__() + self.char_num = params['char_num'] + self.encoder = SequenceEncoder(params) + self.decoder_size = params['Attention']['decoder_size'] + self.word_vector_dim = params['Attention']['word_vector_dim'] + self.encoder_type = params['encoder_type'] + self.max_length = params['max_text_length'] + + def simple_attention(self, encoder_vec, encoder_proj, decoder_state, + decoder_size): + decoder_state_proj = layers.fc(input=decoder_state, + size=decoder_size, + bias_attr=False, + name="decoder_state_proj_fc") + decoder_state_expand = layers.sequence_expand( + x=decoder_state_proj, y=encoder_proj) + concated = layers.elementwise_add(encoder_proj, decoder_state_expand) + concated = layers.tanh(x=concated) + attention_weights = layers.fc(input=concated, + size=1, + act=None, + bias_attr=False, + name="attention_weights_fc") + attention_weights = layers.sequence_softmax(input=attention_weights) + weigths_reshape = layers.reshape(x=attention_weights, shape=[-1]) + scaled = layers.elementwise_mul( + x=encoder_vec, y=weigths_reshape, axis=0) + context = layers.sequence_pool(input=scaled, pool_type='sum') + return context + + def gru_decoder_with_attention(self, target_embedding, encoder_vec, + encoder_proj, decoder_boot, decoder_size, + char_num): + rnn = layers.DynamicRNN() + with rnn.block(): + current_word = rnn.step_input(target_embedding) + encoder_vec = rnn.static_input(encoder_vec) + encoder_proj = rnn.static_input(encoder_proj) + hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True) + context = self.simple_attention(encoder_vec, encoder_proj, + hidden_mem, decoder_size) + fc_1 = layers.fc(input=context, + size=decoder_size * 3, + bias_attr=False, + name="rnn_fc1") + fc_2 = layers.fc(input=current_word, + size=decoder_size * 3, + bias_attr=False, + name="rnn_fc2") + decoder_inputs = fc_1 + fc_2 + h, _, _ = layers.gru_unit( + input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3) + rnn.update_memory(hidden_mem, h) + out = layers.fc(input=h, + size=char_num, + bias_attr=True, + act='softmax', + name="rnn_out_fc") + rnn.output(out) + return rnn() + + def gru_attention_infer(self, decoder_boot, max_length, char_num, + word_vector_dim, encoded_vector, encoded_proj, + decoder_size): + init_state = decoder_boot + beam_size = 1 + array_len = layers.fill_constant( + shape=[1], dtype='int64', value=max_length) + counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True) + + # fill the first element with init_state + state_array = layers.create_array('float32') + layers.array_write(init_state, array=state_array, i=counter) + + # ids, scores as memory + ids_array = layers.create_array('int64') + scores_array = layers.create_array('float32') + rois_shape = layers.shape(init_state) + batch_size = layers.slice( + rois_shape, axes=[0], starts=[0], ends=[1]) + 1 + lod_level = layers.range( + start=0, end=batch_size, step=1, dtype=batch_size.dtype) + + init_ids = layers.fill_constant_batch_size_like( + input=init_state, shape=[-1, 1], value=0, dtype='int64') + init_ids = layers.lod_reset(init_ids, lod_level) + init_ids = layers.lod_append(init_ids, lod_level) + + init_scores = layers.fill_constant_batch_size_like( + input=init_state, shape=[-1, 1], value=1, dtype='float32') + init_scores = layers.lod_reset(init_scores, init_ids) + layers.array_write(init_ids, array=ids_array, i=counter) + layers.array_write(init_scores, array=scores_array, i=counter) + + full_ids = fluid.layers.fill_constant_batch_size_like( + input=init_state, shape=[-1, 1], dtype='int64', value=1) + + cond = layers.less_than(x=counter, y=array_len) + while_op = layers.While(cond=cond) + with while_op.block(): + pre_ids = layers.array_read(array=ids_array, i=counter) + pre_state = layers.array_read(array=state_array, i=counter) + pre_score = layers.array_read(array=scores_array, i=counter) + pre_ids_emb = layers.embedding( + input=pre_ids, + size=[char_num, word_vector_dim], + dtype='float32') + + context = self.simple_attention(encoded_vector, encoded_proj, + pre_state, decoder_size) + + # expand the recursive_sequence_lengths of pre_state + # to be the same with pre_score + pre_state_expanded = layers.sequence_expand(pre_state, pre_score) + context_expanded = layers.sequence_expand(context, pre_score) + + fc_1 = layers.fc(input=context_expanded, + size=decoder_size * 3, + bias_attr=False, + name="rnn_fc1") + + fc_2 = layers.fc(input=pre_ids_emb, + size=decoder_size * 3, + bias_attr=False, + name="rnn_fc2") + + decoder_inputs = fc_1 + fc_2 + current_state, _, _ = layers.gru_unit( + input=decoder_inputs, + hidden=pre_state_expanded, + size=decoder_size * 3) + current_state_with_lod = layers.lod_reset( + x=current_state, y=pre_score) + # use score to do beam search + current_score = layers.fc(input=current_state_with_lod, + size=char_num, + bias_attr=True, + act='softmax', + name="rnn_out_fc") + topk_scores, topk_indices = layers.topk(current_score, k=beam_size) + + new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1) + fluid.layers.assign(new_ids, full_ids) + + layers.increment(x=counter, value=1, in_place=True) + + # update the memories + layers.array_write(current_state, array=state_array, i=counter) + layers.array_write(topk_indices, array=ids_array, i=counter) + layers.array_write(topk_scores, array=scores_array, i=counter) + + # update the break condition: + # up to the max length or all candidates of + # source sentences have ended. + length_cond = layers.less_than(x=counter, y=array_len) + finish_cond = layers.logical_not(layers.is_empty(x=topk_indices)) + layers.logical_and(x=length_cond, y=finish_cond, out=cond) + return full_ids + + def __call__(self, inputs, labels=None, mode=None): + encoder_features = self.encoder(inputs) + char_num = self.char_num + word_vector_dim = self.word_vector_dim + decoder_size = self.decoder_size + + if self.encoder_type == "reshape": + encoder_input = encoder_features + encoded_vector = encoder_features + else: + encoder_input = encoder_features[1] + encoded_vector = layers.concat(encoder_features, axis=1) + encoded_proj = layers.fc(input=encoded_vector, + size=decoder_size, + bias_attr=False, + name="encoded_proj_fc") + backward_first = layers.sequence_pool( + input=encoder_input, pool_type='first') + decoder_boot = layers.fc(input=backward_first, + size=decoder_size, + bias_attr=False, + act="relu", + name='decoder_boot') + + if mode == "train": + label_in = labels['label_in'] + label_out = labels['label_out'] + label_in = layers.cast(x=label_in, dtype='int64') + trg_embedding = layers.embedding( + input=label_in, + size=[char_num, word_vector_dim], + dtype='float32') + predict = self.gru_decoder_with_attention( + trg_embedding, encoded_vector, encoded_proj, decoder_boot, + decoder_size, char_num) + _, decoded_out = layers.topk(input=predict, k=1) + decoded_out = layers.lod_reset(decoded_out, y=label_out) + predicts = {'predict': predict, 'decoded_out': decoded_out} + else: + ids = self.gru_attention_infer( + decoder_boot, self.max_length, char_num, word_vector_dim, + encoded_vector, encoded_proj, decoder_size) + predicts = {'decoded_out': ids} + return predicts diff --git a/ppocr/modeling/heads/rec_ctc_head.py b/ppocr/modeling/heads/rec_ctc_head.py new file mode 100755 index 00000000..37b4b00f --- /dev/null +++ b/ppocr/modeling/heads/rec_ctc_head.py @@ -0,0 +1,51 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from .rec_seq_encoder import SequenceEncoder +from ..common_functions import get_para_bias_attr +import numpy as np + + +class CTCPredict(object): + def __init__(self, params): + super(CTCPredict, self).__init__() + self.char_num = params['char_num'] + self.encoder = SequenceEncoder(params) + self.encoder_type = params['encoder_type'] + + def __call__(self, inputs, labels=None, mode=None): + encoder_features = self.encoder(inputs) + if self.encoder_type != "reshape": + encoder_features = fluid.layers.concat(encoder_features, axis=1) + name = "ctc_fc" + para_attr, bias_attr = get_para_bias_attr( + l2_decay=0.0004, k=encoder_features.shape[1], name=name) + predict = fluid.layers.fc(input=encoder_features, + size=self.char_num + 1, + param_attr=para_attr, + bias_attr=bias_attr, + name=name) + decoded_out = fluid.layers.ctc_greedy_decoder( + input=predict, blank=self.char_num) + predicts = {'predict': predict, 'decoded_out': decoded_out} + return predicts diff --git a/ppocr/modeling/heads/rec_seq_encoder.py b/ppocr/modeling/heads/rec_seq_encoder.py new file mode 100755 index 00000000..0c49667a --- /dev/null +++ b/ppocr/modeling/heads/rec_seq_encoder.py @@ -0,0 +1,100 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +import paddle.fluid as fluid +import paddle.fluid.layers as layers + + +class EncoderWithReshape(object): + def __init__(self, params): + super(EncoderWithReshape, self).__init__() + + def __call__(self, inputs): + sliced_feature = layers.im2sequence( + input=inputs, + stride=[1, 1], + filter_size=[inputs.shape[2], 1], + name="sliced_feature") + return sliced_feature + + +class EncoderWithRNN(object): + def __init__(self, params): + super(EncoderWithRNN, self).__init__() + self.rnn_hidden_size = params['SeqRNN']['hidden_size'] + + def __call__(self, inputs): + lstm_list = [] + name_prefix = "lstm" + rnn_hidden_size = self.rnn_hidden_size + for no in range(1, 3): + if no == 1: + is_reverse = False + else: + is_reverse = True + name = "%s_st1_fc%d" % (name_prefix, no) + fc = layers.fc(input=inputs, + size=rnn_hidden_size * 4, + param_attr=fluid.ParamAttr(name=name + "_w"), + bias_attr=fluid.ParamAttr(name=name + "_b"), + name=name) + name = "%s_st1_out%d" % (name_prefix, no) + lstm, _ = layers.dynamic_lstm( + input=fc, + size=rnn_hidden_size * 4, + is_reverse=is_reverse, + param_attr=fluid.ParamAttr(name=name + "_w"), + bias_attr=fluid.ParamAttr(name=name + "_b"), + use_peepholes=False) + name = "%s_st2_fc%d" % (name_prefix, no) + fc = layers.fc(input=lstm, + size=rnn_hidden_size * 4, + param_attr=fluid.ParamAttr(name=name + "_w"), + bias_attr=fluid.ParamAttr(name=name + "_b"), + name=name) + name = "%s_st2_out%d" % (name_prefix, no) + lstm, _ = layers.dynamic_lstm( + input=fc, + size=rnn_hidden_size * 4, + is_reverse=is_reverse, + param_attr=fluid.ParamAttr(name=name + "_w"), + bias_attr=fluid.ParamAttr(name=name + "_b"), + use_peepholes=False) + lstm_list.append(lstm) + return lstm_list + + +class SequenceEncoder(object): + def __init__(self, params): + super(SequenceEncoder, self).__init__() + self.encoder_type = params['encoder_type'] + self.encoder_reshape = EncoderWithReshape(params) + if self.encoder_type == "rnn": + self.encoder_rnn = EncoderWithRNN(params) + + def __call__(self, inputs): + if self.encoder_type == "reshape": + encoder_features = self.encoder_reshape(inputs) + elif self.encoder_type == "rnn": + inputs = self.encoder_reshape(inputs) + encoder_features = self.encoder_rnn(inputs) + else: + assert False, "Unsupport encoder_type:%s"\ + % self.encoder_type + return encoder_features diff --git a/ppocr/modeling/losses/det_basic_loss.py b/ppocr/modeling/losses/det_basic_loss.py new file mode 100644 index 00000000..8fb81070 --- /dev/null +++ b/ppocr/modeling/losses/det_basic_loss.py @@ -0,0 +1,116 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +import paddle.fluid as fluid + + +def BalanceLoss(pred, + gt, + mask, + balance_loss=True, + main_loss_type="DiceLoss", + negative_ratio=3, + return_origin=False, + eps=1e-6): + """ + The BalanceLoss for Differentiable Binarization text detection + args: + pred (variable): predicted feature maps. + gt (variable): ground truth feature maps. + mask (variable): masked maps. + balance_loss (bool): whether balance loss or not, default is True + main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss', + 'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'. + negative_ratio (int|float): float, default is 3. + return_origin (bool): whether return unbalanced loss or not, default is False. + eps (float): default is 1e-6. + return: (variable) balanced loss + """ + positive = gt * mask + negative = (1 - gt) * mask + + positive_count = fluid.layers.reduce_sum(positive) + positive_count_int = fluid.layers.cast(positive_count, dtype=np.int32) + negative_count = min( + fluid.layers.reduce_sum(negative), positive_count * negative_ratio) + negative_count_int = fluid.layers.cast(negative_count, dtype=np.int32) + + if main_loss_type == "CrossEntropy": + loss = fluid.layers.cross_entropy(input=pred, label=gt, soft_label=True) + loss = fluid.layers.reduce_mean(loss) + elif main_loss_type == "Euclidean": + loss = fluid.layers.square(pred - gt) + loss = fluid.layers.reduce_mean(loss) + elif main_loss_type == "DiceLoss": + loss = DiceLoss(pred, gt, mask) + elif main_loss_type == "BCELoss": + loss = fluid.layers.sigmoid_cross_entropy_with_logits(pred, label=gt) + elif main_loss_type == "MaskL1Loss": + loss = MaskL1Loss(pred, gt, mask) + else: + loss_type = [ + 'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss' + ] + raise Exception("main_loss_type in BalanceLoss() can only be one of {}". + format(loss_type)) + + if not balance_loss: + return loss + + positive_loss = positive * loss + negative_loss = negative * loss + negative_loss = fluid.layers.reshape(negative_loss, shape=[-1]) + negative_loss, _ = fluid.layers.topk(negative_loss, k=negative_count_int) + balance_loss = (fluid.layers.reduce_sum(positive_loss) + + fluid.layers.reduce_sum(negative_loss)) / ( + positive_count + negative_count + eps) + + if return_origin: + return balance_loss, loss + return balance_loss + + +def DiceLoss(pred, gt, mask, weights=None, eps=1e-6): + """ + DiceLoss function. + """ + + assert pred.shape == gt.shape + assert pred.shape == mask.shape + if weights is not None: + assert weights.shape == mask.shape + mask = weights * mask + intersection = fluid.layers.reduce_sum(pred * gt * mask) + + union = fluid.layers.reduce_sum(pred * mask) + fluid.layers.reduce_sum( + gt * mask) + eps + loss = 1 - 2.0 * intersection / union + assert loss <= 1 + return loss + + +def MaskL1Loss(pred, gt, mask, eps=1e-6): + """ + Mask L1 Loss + """ + loss = fluid.layers.reduce_sum((fluid.layers.abs(pred - gt) * mask)) / ( + fluid.layers.reduce_sum(mask) + eps) + loss = fluid.layers.reduce_mean(loss) + return loss diff --git a/ppocr/modeling/losses/det_db_loss.py b/ppocr/modeling/losses/det_db_loss.py new file mode 100755 index 00000000..c35e33ae --- /dev/null +++ b/ppocr/modeling/losses/det_db_loss.py @@ -0,0 +1,68 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss + + +class DBLoss(object): + """ + Differentiable Binarization (DB) Loss Function + args: + param (dict): the super paramter for DB Loss + """ + + def __init__(self, params): + super(DBLoss, self).__init__() + self.balance_loss = params['balance_loss'] + self.main_loss_type = params['main_loss_type'] + + self.alpha = params['alpha'] + self.beta = params['beta'] + self.ohem_ratio = params['ohem_ratio'] + + def __call__(self, predicts, labels): + label_shrink_map = labels['shrink_map'] + label_shrink_mask = labels['shrink_mask'] + label_threshold_map = labels['threshold_map'] + label_threshold_mask = labels['threshold_mask'] + pred = predicts['maps'] + shrink_maps = pred[:, 0, :, :] + threshold_maps = pred[:, 1, :, :] + binary_maps = pred[:, 2, :, :] + + loss_shrink_maps = BalanceLoss( + shrink_maps, + label_shrink_map, + label_shrink_mask, + balance_loss=self.balance_loss, + main_loss_type=self.main_loss_type, + negative_ratio=self.ohem_ratio) + loss_threshold_maps = MaskL1Loss(threshold_maps, label_threshold_map, + label_threshold_mask) + loss_binary_maps = DiceLoss(binary_maps, label_shrink_map, + label_shrink_mask) + loss_shrink_maps = self.alpha * loss_shrink_maps + loss_threshold_maps = self.beta * loss_threshold_maps + + loss_all = loss_shrink_maps + loss_threshold_maps\ + + loss_binary_maps + losses = {'total_loss':loss_all,\ + "loss_shrink_maps":loss_shrink_maps,\ + "loss_threshold_maps":loss_threshold_maps,\ + "loss_binary_maps":loss_binary_maps} + return losses diff --git a/ppocr/modeling/losses/det_east_loss.py b/ppocr/modeling/losses/det_east_loss.py new file mode 100755 index 00000000..2019298a --- /dev/null +++ b/ppocr/modeling/losses/det_east_loss.py @@ -0,0 +1,61 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid + + +class EASTLoss(object): + """ + EAST Loss function + """ + + def __init__(self, params=None): + super(EASTLoss, self).__init__() + + def __call__(self, predicts, labels): + f_score = predicts['f_score'] + f_geo = predicts['f_geo'] + l_score = labels['score'] + l_geo = labels['geo'] + l_mask = labels['mask'] + ##dice_loss + intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask) + union = fluid.layers.reduce_sum(f_score * l_mask)\ + + fluid.layers.reduce_sum(l_score * l_mask) + dice_loss = 1 - 2 * intersection / (union + 1e-5) + #smoooth_l1_loss + channels = 8 + l_geo_split = fluid.layers.split( + l_geo, num_or_sections=channels + 1, dim=1) + f_geo_split = fluid.layers.split(f_geo, num_or_sections=channels, dim=1) + smooth_l1 = 0 + for i in range(0, channels): + geo_diff = l_geo_split[i] - f_geo_split[i] + abs_geo_diff = fluid.layers.abs(geo_diff) + smooth_l1_sign = fluid.layers.less_than(abs_geo_diff, l_score) + smooth_l1_sign = fluid.layers.cast(smooth_l1_sign, dtype='float32') + in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \ + (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign) + out_loss = l_geo_split[-1] / channels * in_loss * l_score + smooth_l1 += out_loss + smooth_l1_loss = fluid.layers.reduce_mean(smooth_l1 * l_score) + dice_loss = dice_loss * 0.01 + total_loss = dice_loss + smooth_l1_loss + losses = {'total_loss':total_loss, "dice_loss":dice_loss,\ + "smooth_l1_loss":smooth_l1_loss} + return losses diff --git a/ppocr/modeling/losses/rec_attention_loss.py b/ppocr/modeling/losses/rec_attention_loss.py new file mode 100755 index 00000000..8d8d7c13 --- /dev/null +++ b/ppocr/modeling/losses/rec_attention_loss.py @@ -0,0 +1,38 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +import numpy as np + + +class AttentionLoss(object): + def __init__(self, params): + super(AttentionLoss, self).__init__() + self.char_num = params['char_num'] + + def __call__(self, predicts, labels): + predict = predicts['predict'] + label_out = labels['label_out'] + label_out = fluid.layers.cast(x=label_out, dtype='int64') + cost = fluid.layers.cross_entropy(input=predict, label=label_out) + sum_cost = fluid.layers.reduce_sum(cost) + return sum_cost diff --git a/ppocr/modeling/losses/rec_ctc_loss.py b/ppocr/modeling/losses/rec_ctc_loss.py new file mode 100755 index 00000000..3552d320 --- /dev/null +++ b/ppocr/modeling/losses/rec_ctc_loss.py @@ -0,0 +1,36 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid + + +class CTCLoss(object): + def __init__(self, params): + super(CTCLoss, self).__init__() + self.char_num = params['char_num'] + + def __call__(self, predicts, labels): + predict = predicts['predict'] + label = labels['label'] + cost = fluid.layers.warpctc( + input=predict, label=label, blank=self.char_num, norm_by_times=True) + sum_cost = fluid.layers.reduce_sum(cost) + return sum_cost diff --git a/ppocr/modeling/stns/tps.py b/ppocr/modeling/stns/tps.py new file mode 100755 index 00000000..24c6448d --- /dev/null +++ b/ppocr/modeling/stns/tps.py @@ -0,0 +1,261 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle.fluid as fluid +import paddle.fluid.layers as layers +from paddle.fluid.param_attr import ParamAttr +import numpy as np + + +class LocalizationNetwork(object): + def __init__(self, params): + super(LocalizationNetwork, self).__init__() + self.F = params['num_fiducial'] + self.loc_lr = params['loc_lr'] + self.model_name = params['model_name'] + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + bn_name = "bn_" + name + return layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def get_initial_fiducials(self): + """ see RARE paper Fig. 6 (a) """ + F = self.F + ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2)) + ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2)) + ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2)) + ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) + ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) + initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0) + return initial_bias + + def __call__(self, image): + F = self.F + loc_lr = self.loc_lr + if self.model_name == "large": + num_filters_list = [64, 128, 256, 512] + fc_dim = 256 + else: + num_filters_list = [16, 32, 64, 128] + fc_dim = 64 + for fno in range(len(num_filters_list)): + num_filters = num_filters_list[fno] + name = "loc_conv%d" % fno + if fno == 0: + conv = self.conv_bn_layer( + image, num_filters, 3, act='relu', name=name) + else: + conv = self.conv_bn_layer( + pool, num_filters, 3, act='relu', name=name) + + if fno == len(num_filters_list) - 1: + pool = layers.adaptive_pool2d( + input=conv, pool_size=[1, 1], pool_type='avg') + else: + pool = layers.pool2d( + input=conv, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='max') + name = "loc_fc1" + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + fc1 = layers.fc(input=pool, + size=fc_dim, + param_attr=fluid.param_attr.ParamAttr( + learning_rate=loc_lr, + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + "_w"), + act='relu', + name=name) + + initial_bias = self.get_initial_fiducials() + initial_bias = initial_bias.reshape(-1) + name = "loc_fc2" + param_attr = fluid.param_attr.ParamAttr( + learning_rate=loc_lr, + initializer=fluid.initializer.NumpyArrayInitializer( + np.zeros([fc_dim, F * 2])), + name=name + "_w") + bias_attr = fluid.param_attr.ParamAttr( + learning_rate=loc_lr, + initializer=fluid.initializer.NumpyArrayInitializer(initial_bias), + name=name + "_b") + fc2 = layers.fc(input=fc1, + size=F * 2, + param_attr=param_attr, + bias_attr=bias_attr, + name=name) + batch_C_prime = layers.reshape(x=fc2, shape=[-1, F, 2], inplace=False) + return batch_C_prime + + +class GridGenerator(object): + def __init__(self, params): + super(GridGenerator, self).__init__() + self.eps = 1e-6 + self.F = params['num_fiducial'] + + def build_C(self): + """ Return coordinates of fiducial points in I_r; C """ + F = self.F + ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2)) + ctrl_pts_y_top = -1 * np.ones(int(F / 2)) + ctrl_pts_y_bottom = np.ones(int(F / 2)) + ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1) + ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1) + C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0) + return C # F x 2 + + def build_P(self, I_r_size): + I_r_width, I_r_height = I_r_size + I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0)\ + / I_r_width # self.I_r_width + I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0)\ + / I_r_height # self.I_r_height + # P: self.I_r_width x self.I_r_height x 2 + P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2) + # n (= self.I_r_width x self.I_r_height) x 2 + return P.reshape([-1, 2]) + + def build_inv_delta_C(self, C): + """ Return inv_delta_C which is needed to calculate T """ + F = self.F + hat_C = np.zeros((F, F), dtype=float) # F x F + for i in range(0, F): + for j in range(i, F): + r = np.linalg.norm(C[i] - C[j]) + hat_C[i, j] = r + hat_C[j, i] = r + np.fill_diagonal(hat_C, 1) + hat_C = (hat_C**2) * np.log(hat_C) + # print(C.shape, hat_C.shape) + delta_C = np.concatenate( # F+3 x F+3 + [ + np.concatenate( + [np.ones((F, 1)), C, hat_C], axis=1), # F x F+3 + np.concatenate( + [np.zeros((2, 3)), np.transpose(C)], axis=1), # 2 x F+3 + np.concatenate( + [np.zeros((1, 3)), np.ones((1, F))], axis=1) # 1 x F+3 + ], + axis=0) + inv_delta_C = np.linalg.inv(delta_C) + return inv_delta_C # F+3 x F+3 + + def build_P_hat(self, C, P): + F = self.F + eps = self.eps + n = P.shape[0] # n (= self.I_r_width x self.I_r_height) + #P_tile: n x 2 -> n x 1 x 2 -> n x F x 2 + P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1)) + C_tile = np.expand_dims(C, axis=0) # 1 x F x 2 + P_diff = P_tile - C_tile # n x F x 2 + #rbf_norm: n x F + rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False) + #rbf: n x F + rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps)) + P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1) + return P_hat # n x F+3 + + def get_expand_tensor(self, batch_C_prime): + name = "ex_fc" + initializer = fluid.initializer.ConstantInitializer(value=0.0) + param_attr = fluid.param_attr.ParamAttr( + learning_rate=0.0, initializer=initializer, name=name + "_w") + bias_attr = fluid.param_attr.ParamAttr( + learning_rate=0.0, initializer=initializer, name=name + "_b") + batch_C_ex_part_tensor = fluid.layers.fc(input=batch_C_prime, + size=6, + param_attr=param_attr, + bias_attr=bias_attr, + name=name) + batch_C_ex_part_tensor = fluid.layers.reshape( + x=batch_C_ex_part_tensor, shape=[-1, 3, 2]) + return batch_C_ex_part_tensor + + def __call__(self, batch_C_prime, I_r_size): + C = self.build_C() + P = self.build_P(I_r_size) + inv_delta_C = self.build_inv_delta_C(C).astype('float32') + P_hat = self.build_P_hat(C, P).astype('float32') + + inv_delta_C_tensor = layers.create_tensor(dtype='float32') + layers.assign(inv_delta_C, inv_delta_C_tensor) + inv_delta_C_tensor.stop_gradient = True + P_hat_tensor = layers.create_tensor(dtype='float32') + layers.assign(P_hat, P_hat_tensor) + P_hat_tensor.stop_gradient = True + + batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime) + # batch_C_ex_part_tensor = create_tmp_var( + # fluid.default_main_program(), + # name='batch_C_ex_part_tensor', + # dtype='float32', shape=[-1, 3, 2]) + # layers.py_func(func=get_batch_C_expand, + # x=[batch_C_prime], out=[batch_C_ex_part_tensor]) + + batch_C_ex_part_tensor.stop_gradient = True + + batch_C_prime_with_zeros = layers.concat( + [batch_C_prime, batch_C_ex_part_tensor], axis=1) + batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros) + batch_P_prime = layers.matmul(P_hat_tensor, batch_T) + return batch_P_prime + + +class TPS(object): + def __init__(self, params): + super(TPS, self).__init__() + self.loc_net = LocalizationNetwork(params) + self.grid_generator = GridGenerator(params) + + def __call__(self, image): + batch_C_prime = self.loc_net(image) + I_r_size = [image.shape[3], image.shape[2]] + batch_P_prime = self.grid_generator(batch_C_prime, I_r_size) + batch_P_prime = layers.reshape( + x=batch_P_prime, shape=[-1, image.shape[2], image.shape[3], 2]) + batch_I_r = layers.grid_sampler(x=image, grid=batch_P_prime) + image.stop_gradient = False + return batch_I_r diff --git a/ppocr/optimizer.py b/ppocr/optimizer.py new file mode 100755 index 00000000..a6ad1eb7 --- /dev/null +++ b/ppocr/optimizer.py @@ -0,0 +1,36 @@ +#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle.fluid as fluid + + +def AdamDecay(params, parameter_list=None): + """ + define optimizer function + args: + params(dict): the super parameters + parameter_list (list): list of Variable names to update to minimize loss + return: + """ + base_lr = params['base_lr'] + beta1 = params['beta1'] + beta2 = params['beta2'] + optimizer = fluid.optimizer.Adam( + learning_rate=base_lr, + beta1=beta1, + beta2=beta2, + parameter_list=parameter_list) + return optimizer diff --git a/ppocr/postprocess/db_postprocess.py b/ppocr/postprocess/db_postprocess.py new file mode 100644 index 00000000..15ba4615 --- /dev/null +++ b/ppocr/postprocess/db_postprocess.py @@ -0,0 +1,152 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid + +import numpy as np +import string +import cv2 +from shapely.geometry import Polygon +import pyclipper + + +class DBPostProcess(object): + """ + The post process for Differentiable Binarization (DB). + """ + + def __init__(self, params): + self.thresh = params['thresh'] + self.box_thresh = params['box_thresh'] + self.max_candidates = params['max_candidates'] + self.min_size = 3 + + def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): + ''' + _bitmap: single map with shape (1, H, W), + whose values are binarized as {0, 1} + ''' + + bitmap = _bitmap + height, width = bitmap.shape + + # img, contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), + cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) + + num_contours = min(len(contours), self.max_candidates) + boxes = np.zeros((num_contours, 4, 2), dtype=np.int16) + scores = np.zeros((num_contours, ), dtype=np.float32) + + for index in range(num_contours): + contour = contours[index] + points, sside = self.get_mini_boxes(contour) + if sside < self.min_size: + continue + points = np.array(points) + score = self.box_score_fast(pred, points.reshape(-1, 2)) + if self.box_thresh > score: + continue + + box = self.unclip(points).reshape(-1, 1, 2) + box, sside = self.get_mini_boxes(box) + if sside < self.min_size + 2: + continue + box = np.array(box) + if not isinstance(dest_width, int): + dest_width = dest_width.item() + dest_height = dest_height.item() + + box[:, 0] = np.clip( + np.round(box[:, 0] / width * dest_width), 0, dest_width) + box[:, 1] = np.clip( + np.round(box[:, 1] / height * dest_height), 0, dest_height) + boxes[index, :, :] = box.astype(np.int16) + scores[index] = score + return boxes, scores + + def unclip(self, box, unclip_ratio=1.5): + poly = Polygon(box) + distance = poly.area * unclip_ratio / poly.length + offset = pyclipper.PyclipperOffset() + offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) + expanded = np.array(offset.Execute(distance)) + return expanded + + def get_mini_boxes(self, contour): + bounding_box = cv2.minAreaRect(contour) + points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) + + index_1, index_2, index_3, index_4 = 0, 1, 2, 3 + if points[1][1] > points[0][1]: + index_1 = 0 + index_4 = 1 + else: + index_1 = 1 + index_4 = 0 + if points[3][1] > points[2][1]: + index_2 = 2 + index_3 = 3 + else: + index_2 = 3 + index_3 = 2 + + box = [ + points[index_1], points[index_2], points[index_3], points[index_4] + ] + return box, min(bounding_box[1]) + + def box_score_fast(self, bitmap, _box): + h, w = bitmap.shape[:2] + box = _box.copy() + xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1) + xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1) + ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1) + ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1) + + mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) + box[:, 0] = box[:, 0] - xmin + box[:, 1] = box[:, 1] - ymin + cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) + return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] + + def __call__(self, outs_dict, ratio_list): + pred = outs_dict['maps'] + pred = pred[:, 0, :, :] + segmentation = pred > self.thresh + + boxes_batch = [] + for batch_index in range(pred.shape[0]): + height, width = pred.shape[-2:] + tmp_boxes, tmp_scores = self.boxes_from_bitmap( + pred[batch_index], segmentation[batch_index], width, height) + + boxes = [] + for k in range(len(tmp_boxes)): + if tmp_scores[k] > self.box_thresh: + boxes.append(tmp_boxes[k]) + if len(boxes) > 0: + boxes = np.array(boxes) + + ratio_h, ratio_w = ratio_list[batch_index] + boxes[:, :, 0] = boxes[:, :, 0] / ratio_w + boxes[:, :, 1] = boxes[:, :, 1] / ratio_h + + boxes_batch.append(boxes) + return boxes_batch diff --git a/ppocr/postprocess/east_postprocess.py b/ppocr/postprocess/east_postprocess.py new file mode 100755 index 00000000..b41f2ae6 --- /dev/null +++ b/ppocr/postprocess/east_postprocess.py @@ -0,0 +1,121 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from .locality_aware_nms import nms_locality +import cv2 + + +class EASTPostPocess(object): + """ + The post process for EAST. + """ + + def __init__(self, params): + self.score_thresh = params['score_thresh'] + self.cover_thresh = params['cover_thresh'] + self.nms_thresh = params['nms_thresh'] + + def restore_rectangle_quad(self, origin, geometry): + """ + Restore rectangle from quadrangle. + """ + # quad + origin_concat = np.concatenate( + (origin, origin, origin, origin), axis=1) # (n, 8) + pred_quads = origin_concat - geometry + pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2) + return pred_quads + + def detect(self, + score_map, + geo_map, + score_thresh=0.8, + cover_thresh=0.1, + nms_thresh=0.2): + """ + restore text boxes from score map and geo map + """ + score_map = score_map[0] + geo_map = np.swapaxes(geo_map, 1, 0) + geo_map = np.swapaxes(geo_map, 1, 2) + # filter the score map + xy_text = np.argwhere(score_map > score_thresh) + if len(xy_text) == 0: + return [] + # sort the text boxes via the y axis + xy_text = xy_text[np.argsort(xy_text[:, 0])] + #restore quad proposals + text_box_restored = self.restore_rectangle_quad( + xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) + boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) + boxes[:, :8] = text_box_restored.reshape((-1, 8)) + boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] + boxes = nms_locality(boxes.astype(np.float64), nms_thresh) + if boxes.shape[0] == 0: + return [] + # Here we filter some low score boxes by the average score map, + # this is different from the orginal paper. + for i, box in enumerate(boxes): + mask = np.zeros_like(score_map, dtype=np.uint8) + cv2.fillPoly(mask, box[:8].reshape( + (-1, 4, 2)).astype(np.int32) // 4, 1) + boxes[i, 8] = cv2.mean(score_map, mask)[0] + boxes = boxes[boxes[:, 8] > cover_thresh] + return boxes + + def sort_poly(self, p): + """ + Sort polygons. + """ + min_axis = np.argmin(np.sum(p, axis=1)) + p = p[[min_axis, (min_axis + 1) % 4,\ + (min_axis + 2) % 4, (min_axis + 3) % 4]] + if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]): + return p + else: + return p[[0, 3, 2, 1]] + + def __call__(self, outs_dict, ratio_list): + score_list = outs_dict['f_score'] + geo_list = outs_dict['f_geo'] + img_num = len(ratio_list) + dt_boxes_list = [] + for ino in range(img_num): + score = score_list[ino] + geo = geo_list[ino] + boxes = self.detect( + score_map=score, + geo_map=geo, + score_thresh=self.score_thresh, + cover_thresh=self.cover_thresh, + nms_thresh=self.nms_thresh) + boxes_norm = [] + if len(boxes) > 0: + ratio_h, ratio_w = ratio_list[ino] + boxes = boxes[:, :8].reshape((-1, 4, 2)) + boxes[:, :, 0] /= ratio_w + boxes[:, :, 1] /= ratio_h + for i_box, box in enumerate(boxes): + box = self.sort_poly(box.astype(np.int32)) + if np.linalg.norm(box[0] - box[1]) < 5 \ + or np.linalg.norm(box[3] - box[0]) < 5: + continue + boxes_norm.append(box) + dt_boxes_list.append(np.array(boxes_norm)) + return dt_boxes_list diff --git a/ppocr/postprocess/locality_aware_nms.py b/ppocr/postprocess/locality_aware_nms.py new file mode 100644 index 00000000..1220ffa0 --- /dev/null +++ b/ppocr/postprocess/locality_aware_nms.py @@ -0,0 +1,199 @@ +""" +Locality aware nms. +""" + +import numpy as np +from shapely.geometry import Polygon + + +def intersection(g, p): + """ + Intersection. + """ + g = Polygon(g[:8].reshape((4, 2))) + p = Polygon(p[:8].reshape((4, 2))) + g = g.buffer(0) + p = p.buffer(0) + if not g.is_valid or not p.is_valid: + return 0 + inter = Polygon(g).intersection(Polygon(p)).area + union = g.area + p.area - inter + if union == 0: + return 0 + else: + return inter / union + + +def intersection_iog(g, p): + """ + Intersection_iog. + """ + g = Polygon(g[:8].reshape((4, 2))) + p = Polygon(p[:8].reshape((4, 2))) + if not g.is_valid or not p.is_valid: + return 0 + inter = Polygon(g).intersection(Polygon(p)).area + #union = g.area + p.area - inter + union = p.area + if union == 0: + print("p_area is very small") + return 0 + else: + return inter / union + + +def weighted_merge(g, p): + """ + Weighted merge. + """ + g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8]) + g[8] = (g[8] + p[8]) + return g + + +def standard_nms(S, thres): + """ + Standard nms. + """ + order = np.argsort(S[:, 8])[::-1] + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) + + inds = np.where(ovr <= thres)[0] + order = order[inds + 1] + + return S[keep] + + +def standard_nms_inds(S, thres): + """ + Standard nms, retun inds. + """ + order = np.argsort(S[:, 8])[::-1] + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) + + inds = np.where(ovr <= thres)[0] + order = order[inds + 1] + + return keep + + +def nms(S, thres): + """ + nms. + """ + order = np.argsort(S[:, 8])[::-1] + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + ovr = np.array([intersection(S[i], S[t]) for t in order[1:]]) + + inds = np.where(ovr <= thres)[0] + order = order[inds + 1] + + return keep + + +def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2): + """ + soft_nms + :para boxes_in, N x 9 (coords + score) + :para threshould, eliminate cases min score(0.001) + :para Nt_thres, iou_threshi + :para sigma, gaussian weght + :method, linear or gaussian + """ + boxes = boxes_in.copy() + N = boxes.shape[0] + if N is None or N < 1: + return np.array([]) + pos, maxpos = 0, 0 + weight = 0.0 + inds = np.arange(N) + tbox, sbox = boxes[0].copy(), boxes[0].copy() + for i in range(N): + maxscore = boxes[i, 8] + maxpos = i + tbox = boxes[i].copy() + ti = inds[i] + pos = i + 1 + #get max box + while pos < N: + if maxscore < boxes[pos, 8]: + maxscore = boxes[pos, 8] + maxpos = pos + pos = pos + 1 + #add max box as a detection + boxes[i, :] = boxes[maxpos, :] + inds[i] = inds[maxpos] + #swap + boxes[maxpos, :] = tbox + inds[maxpos] = ti + tbox = boxes[i].copy() + pos = i + 1 + #NMS iteration + while pos < N: + sbox = boxes[pos].copy() + ts_iou_val = intersection(tbox, sbox) + if ts_iou_val > 0: + if method == 1: + if ts_iou_val > Nt_thres: + weight = 1 - ts_iou_val + else: + weight = 1 + elif method == 2: + weight = np.exp(-1.0 * ts_iou_val**2 / sigma) + else: + if ts_iou_val > Nt_thres: + weight = 0 + else: + weight = 1 + boxes[pos, 8] = weight * boxes[pos, 8] + #if box score falls below thresold, discard the box by + #swaping last box update N + if boxes[pos, 8] < threshold: + boxes[pos, :] = boxes[N - 1, :] + inds[pos] = inds[N - 1] + N = N - 1 + pos = pos - 1 + pos = pos + 1 + + return boxes[:N] + + +def nms_locality(polys, thres=0.3): + """ + locality aware nms of EAST + :param polys: a N*9 numpy array. first 8 coordinates, then prob + :return: boxes after nms + """ + S = [] + p = None + for g in polys: + if p is not None and intersection(g, p) > thres: + p = weighted_merge(g, p) + else: + if p is not None: + S.append(p) + p = g + if p is not None: + S.append(p) + + if len(S) == 0: + return np.array([]) + return standard_nms(np.array(S), thres) + + +if __name__ == '__main__': + # 343,350,448,135,474,143,369,359 + print( + Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]])) + .area) diff --git a/ppocr/utils/__init__.py b/ppocr/utils/__init__.py new file mode 100755 index 00000000..abf198b9 --- /dev/null +++ b/ppocr/utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/ppocr/utils/character.py b/ppocr/utils/character.py new file mode 100755 index 00000000..b4075039 --- /dev/null +++ b/ppocr/utils/character.py @@ -0,0 +1,171 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import string +import re +from .check import check_config_params +import sys + + +class CharacterOps(object): + """ Convert between text-label and text-index """ + + def __init__(self, config): + self.character_type = config['character_type'] + self.loss_type = config['loss_type'] + if self.character_type == "en": + self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" + dict_character = list(self.character_str) + elif self.character_type == "ch": + character_dict_path = config['character_dict_path'] + self.character_str = "" + with open(character_dict_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + line = line.decode('utf-8').strip("\n") + self.character_str += line + dict_character = list(self.character_str) + elif self.character_type == "en_sensitive": + # same with ASTER setting (use 94 char). + self.character_str = string.printable[:-6] + dict_character = list(self.character_str) + else: + self.character_str = None + assert self.character_str is not None, \ + "Nonsupport type of the character: {}".format(self.character_str) + self.beg_str = "sos" + self.end_str = "eos" + if self.loss_type == "attention": + dict_character = [self.beg_str, self.end_str] + dict_character + self.dict = {} + for i, char in enumerate(dict_character): + self.dict[char] = i + self.character = dict_character + + def encode(self, text): + """convert text-label into text-index. + input: + text: text labels of each image. [batch_size] + + output: + text: concatenated text index for CTCLoss. + [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] + length: length of each text. [batch_size] + """ + if self.character_type == "en": + text = text.lower() + + text_list = [] + for char in text: + if char not in self.dict: + continue + text_list.append(self.dict[char]) + text = np.array(text_list) + return text + + def decode(self, text_index, is_remove_duplicate=False): + """ convert text-index into text-label. """ + char_list = [] + char_num = self.get_char_num() + + if self.loss_type == "attention": + beg_idx = self.get_beg_end_flag_idx("beg") + end_idx = self.get_beg_end_flag_idx("end") + ignored_tokens = [beg_idx, end_idx] + else: + ignored_tokens = [char_num] + + for idx in range(len(text_index)): + if text_index[idx] in ignored_tokens: + continue + if is_remove_duplicate: + if idx > 0 and text_index[idx - 1] == text_index[idx]: + continue + char_list.append(self.character[text_index[idx]]) + text = ''.join(char_list) + return text + + def get_char_num(self): + return len(self.character) + + def get_beg_end_flag_idx(self, beg_or_end): + if self.loss_type == "attention": + if beg_or_end == "beg": + idx = np.array(self.dict[self.beg_str]) + elif beg_or_end == "end": + idx = np.array(self.dict[self.end_str]) + else: + assert False, "Unsupport type %s in get_beg_end_flag_idx"\ + % beg_or_end + return idx + else: + err = "error in get_beg_end_flag_idx when using the loss %s"\ + % (self.loss_type) + assert False, err + + +def cal_predicts_accuracy(char_ops, + preds, + preds_lod, + labels, + labels_lod, + is_remove_duplicate=False): + acc_num = 0 + img_num = 0 + for ino in range(len(labels_lod) - 1): + beg_no = preds_lod[ino] + end_no = preds_lod[ino + 1] + preds_text = preds[beg_no:end_no].reshape(-1) + preds_text = char_ops.decode(preds_text, is_remove_duplicate) + + beg_no = labels_lod[ino] + end_no = labels_lod[ino + 1] + labels_text = labels[beg_no:end_no].reshape(-1) + labels_text = char_ops.decode(labels_text, is_remove_duplicate) + img_num += 1 + + if preds_text == labels_text: + acc_num += 1 + acc = acc_num * 1.0 / img_num + return acc, acc_num, img_num + + +def convert_rec_attention_infer_res(preds): + img_num = preds.shape[0] + target_lod = [0] + convert_ids = [] + for ino in range(img_num): + end_pos = np.where(preds[ino, :] == 1)[0] + if len(end_pos) <= 1: + text_list = preds[ino, 1:] + else: + text_list = preds[ino, 1:end_pos[1]] + target_lod.append(target_lod[ino] + len(text_list)) + convert_ids = convert_ids + list(text_list) + convert_ids = np.array(convert_ids) + convert_ids = convert_ids.reshape((-1, 1)) + return convert_ids, target_lod + + +def convert_rec_label_to_lod(ori_labels): + img_num = len(ori_labels) + target_lod = [0] + convert_ids = [] + for ino in range(img_num): + target_lod.append(target_lod[ino] + len(ori_labels[ino])) + convert_ids = convert_ids + list(ori_labels[ino]) + convert_ids = np.array(convert_ids) + convert_ids = convert_ids.reshape((-1, 1)) + return convert_ids, target_lod diff --git a/ppocr/utils/check.py b/ppocr/utils/check.py new file mode 100755 index 00000000..dc0482f0 --- /dev/null +++ b/ppocr/utils/check.py @@ -0,0 +1,33 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import sys + +import paddle.fluid as fluid + +import logging +logger = logging.getLogger(__name__) + + +def check_config_params(config, config_name, params): + for param in params: + if param not in config: + err = "param %s didn't find in %s!" % (param, config_name) + assert False, err + return diff --git a/ppocr/utils/ppocr_keys_v1.txt b/ppocr/utils/ppocr_keys_v1.txt new file mode 100644 index 00000000..84b885d8 --- /dev/null +++ b/ppocr/utils/ppocr_keys_v1.txt @@ -0,0 +1,6623 @@ +' +疗 +绚 +诚 +娇 +溜 +题 +贿 +者 +廖 +更 +纳 +加 +奉 +公 +一 +就 +汴 +计 +与 +路 +房 +原 +妇 +2 +0 +8 +- +7 +其 +> +: +] +, +, +骑 +刈 +全 +消 +昏 +傈 +安 +久 +钟 +嗅 +不 +影 +处 +驽 +蜿 +资 +关 +椤 +地 +瘸 +专 +问 +忖 +票 +嫉 +炎 +韵 +要 +月 +田 +节 +陂 +鄙 +捌 +备 +拳 +伺 +眼 +网 +盎 +大 +傍 +心 +东 +愉 +汇 +蹿 +科 +每 +业 +里 +航 +晏 +字 +平 +录 +先 +1 +3 +彤 +鲶 +产 +稍 +督 +腴 +有 +象 +岳 +注 +绍 +在 +泺 +文 +定 +核 +名 +水 +过 +理 +让 +偷 +率 +等 +这 +发 +” +为 +含 +肥 +酉 +相 +鄱 +七 +编 +猥 +锛 +日 +镀 +蒂 +掰 +倒 +辆 +栾 +栗 +综 +涩 +州 +雌 +滑 +馀 +了 +机 +块 +司 +宰 +甙 +兴 +矽 +抚 +保 +用 +沧 +秩 +如 +收 +息 +滥 +页 +疑 +埠 +! +! +姥 +异 +橹 +钇 +向 +下 +跄 +的 +椴 +沫 +国 +绥 +獠 +报 +开 +民 +蜇 +何 +分 +凇 +长 +讥 +藏 +掏 +施 +羽 +中 +讲 +派 +嘟 +人 +提 +浼 +间 +世 +而 +古 +多 +倪 +唇 +饯 +控 +庚 +首 +赛 +蜓 +味 +断 +制 +觉 +技 +替 +艰 +溢 +潮 +夕 +钺 +外 +摘 +枋 +动 +双 +单 +啮 +户 +枇 +确 +锦 +曜 +杜 +或 +能 +效 +霜 +盒 +然 +侗 +电 +晁 +放 +步 +鹃 +新 +杖 +蜂 +吒 +濂 +瞬 +评 +总 +隍 +对 +独 +合 +也 +是 +府 +青 +天 +诲 +墙 +组 +滴 +级 +邀 +帘 +示 +已 +时 +骸 +仄 +泅 +和 +遨 +店 +雇 +疫 +持 +巍 +踮 +境 +只 +亨 +目 +鉴 +崤 +闲 +体 +泄 +杂 +作 +般 +轰 +化 +解 +迂 +诿 +蛭 +璀 +腾 +告 +版 +服 +省 +师 +小 +规 +程 +线 +海 +办 +引 +二 +桧 +牌 +砺 +洄 +裴 +修 +图 +痫 +胡 +许 +犊 +事 +郛 +基 +柴 +呼 +食 +研 +奶 +律 +蛋 +因 +葆 +察 +戏 +褒 +戒 +再 +李 +骁 +工 +貂 +油 +鹅 +章 +啄 +休 +场 +给 +睡 +纷 +豆 +器 +捎 +说 +敏 +学 +会 +浒 +设 +诊 +格 +廓 +查 +来 +霓 +室 +溆 +¢ +诡 +寥 +焕 +舜 +柒 +狐 +回 +戟 +砾 +厄 +实 +翩 +尿 +五 +入 +径 +惭 +喹 +股 +宇 +篝 +| +; +美 +期 +云 +九 +祺 +扮 +靠 +锝 +槌 +系 +企 +酰 +阊 +暂 +蚕 +忻 +豁 +本 +羹 +执 +条 +钦 +H +獒 +限 +进 +季 +楦 +于 +芘 +玖 +铋 +茯 +未 +答 +粘 +括 +样 +精 +欠 +矢 +甥 +帷 +嵩 +扣 +令 +仔 +风 +皈 +行 +支 +部 +蓉 +刮 +站 +蜡 +救 +钊 +汗 +松 +嫌 +成 +可 +. +鹤 +院 +从 +交 +政 +怕 +活 +调 +球 +局 +验 +髌 +第 +韫 +谗 +串 +到 +圆 +年 +米 +/ +* +友 +忿 +检 +区 +看 +自 +敢 +刃 +个 +兹 +弄 +流 +留 +同 +没 +齿 +星 +聆 +轼 +湖 +什 +三 +建 +蛔 +儿 +椋 +汕 +震 +颧 +鲤 +跟 +力 +情 +璺 +铨 +陪 +务 +指 +族 +训 +滦 +鄣 +濮 +扒 +商 +箱 +十 +召 +慷 +辗 +所 +莞 +管 +护 +臭 +横 +硒 +嗓 +接 +侦 +六 +露 +党 +馋 +驾 +剖 +高 +侬 +妪 +幂 +猗 +绺 +骐 +央 +酐 +孝 +筝 +课 +徇 +缰 +门 +男 +西 +项 +句 +谙 +瞒 +秃 +篇 +教 +碲 +罚 +声 +呐 +景 +前 +富 +嘴 +鳌 +稀 +免 +朋 +啬 +睐 +去 +赈 +鱼 +住 +肩 +愕 +速 +旁 +波 +厅 +健 +茼 +厥 +鲟 +谅 +投 +攸 +炔 +数 +方 +击 +呋 +谈 +绩 +别 +愫 +僚 +躬 +鹧 +胪 +炳 +招 +喇 +膨 +泵 +蹦 +毛 +结 +5 +4 +谱 +识 +陕 +粽 +婚 +拟 +构 +且 +搜 +任 +潘 +比 +郢 +妨 +醪 +陀 +桔 +碘 +扎 +选 +哈 +骷 +楷 +亿 +明 +缆 +脯 +监 +睫 +逻 +婵 +共 +赴 +淝 +凡 +惦 +及 +达 +揖 +谩 +澹 +减 +焰 +蛹 +番 +祁 +柏 +员 +禄 +怡 +峤 +龙 +白 +叽 +生 +闯 +起 +细 +装 +谕 +竟 +聚 +钙 +上 +导 +渊 +按 +艾 +辘 +挡 +耒 +盹 +饪 +臀 +记 +邮 +蕙 +受 +各 +医 +搂 +普 +滇 +朗 +茸 +带 +翻 +酚 +( +光 +堤 +墟 +蔷 +万 +幻 +〓 +瑙 +辈 +昧 +盏 +亘 +蛀 +吉 +铰 +请 +子 +假 +闻 +税 +井 +诩 +哨 +嫂 +好 +面 +琐 +校 +馊 +鬣 +缂 +营 +访 +炖 +占 +农 +缀 +否 +经 +钚 +棵 +趟 +张 +亟 +吏 +茶 +谨 +捻 +论 +迸 +堂 +玉 +信 +吧 +瞠 +乡 +姬 +寺 +咬 +溏 +苄 +皿 +意 +赉 +宝 +尔 +钰 +艺 +特 +唳 +踉 +都 +荣 +倚 +登 +荐 +丧 +奇 +涵 +批 +炭 +近 +符 +傩 +感 +道 +着 +菊 +虹 +仲 +众 +懈 +濯 +颞 +眺 +南 +释 +北 +缝 +标 +既 +茗 +整 +撼 +迤 +贲 +挎 +耱 +拒 +某 +妍 +卫 +哇 +英 +矶 +藩 +治 +他 +元 +领 +膜 +遮 +穗 +蛾 +飞 +荒 +棺 +劫 +么 +市 +火 +温 +拈 +棚 +洼 +转 +果 +奕 +卸 +迪 +伸 +泳 +斗 +邡 +侄 +涨 +屯 +萋 +胭 +氡 +崮 +枞 +惧 +冒 +彩 +斜 +手 +豚 +随 +旭 +淑 +妞 +形 +菌 +吲 +沱 +争 +驯 +歹 +挟 +兆 +柱 +传 +至 +包 +内 +响 +临 +红 +功 +弩 +衡 +寂 +禁 +老 +棍 +耆 +渍 +织 +害 +氵 +渑 +布 +载 +靥 +嗬 +虽 +苹 +咨 +娄 +库 +雉 +榜 +帜 +嘲 +套 +瑚 +亲 +簸 +欧 +边 +6 +腿 +旮 +抛 +吹 +瞳 +得 +镓 +梗 +厨 +继 +漾 +愣 +憨 +士 +策 +窑 +抑 +躯 +襟 +脏 +参 +贸 +言 +干 +绸 +鳄 +穷 +藜 +音 +折 +详 +) +举 +悍 +甸 +癌 +黎 +谴 +死 +罩 +迁 +寒 +驷 +袖 +媒 +蒋 +掘 +模 +纠 +恣 +观 +祖 +蛆 +碍 +位 +稿 +主 +澧 +跌 +筏 +京 +锏 +帝 +贴 +证 +糠 +才 +黄 +鲸 +略 +炯 +饱 +四 +出 +园 +犀 +牧 +容 +汉 +杆 +浈 +汰 +瑷 +造 +虫 +瘩 +怪 +驴 +济 +应 +花 +沣 +谔 +夙 +旅 +价 +矿 +以 +考 +s +u +呦 +晒 +巡 +茅 +准 +肟 +瓴 +詹 +仟 +褂 +译 +桌 +混 +宁 +怦 +郑 +抿 +些 +余 +鄂 +饴 +攒 +珑 +群 +阖 +岔 +琨 +藓 +预 +环 +洮 +岌 +宀 +杲 +瀵 +最 +常 +囡 +周 +踊 +女 +鼓 +袭 +喉 +简 +范 +薯 +遐 +疏 +粱 +黜 +禧 +法 +箔 +斤 +遥 +汝 +奥 +直 +贞 +撑 +置 +绱 +集 +她 +馅 +逗 +钧 +橱 +魉 +[ +恙 +躁 +唤 +9 +旺 +膘 +待 +脾 +惫 +购 +吗 +依 +盲 +度 +瘿 +蠖 +俾 +之 +镗 +拇 +鲵 +厝 +簧 +续 +款 +展 +啃 +表 +剔 +品 +钻 +腭 +损 +清 +锶 +统 +涌 +寸 +滨 +贪 +链 +吠 +冈 +伎 +迥 +咏 +吁 +览 +防 +迅 +失 +汾 +阔 +逵 +绀 +蔑 +列 +川 +凭 +努 +熨 +揪 +利 +俱 +绉 +抢 +鸨 +我 +即 +责 +膦 +易 +毓 +鹊 +刹 +玷 +岿 +空 +嘞 +绊 +排 +术 +估 +锷 +违 +们 +苟 +铜 +播 +肘 +件 +烫 +审 +鲂 +广 +像 +铌 +惰 +铟 +巳 +胍 +鲍 +康 +憧 +色 +恢 +想 +拷 +尤 +疳 +知 +S +Y +F +D +A +峄 +裕 +帮 +握 +搔 +氐 +氘 +难 +墒 +沮 +雨 +叁 +缥 +悴 +藐 +湫 +娟 +苑 +稠 +颛 +簇 +后 +阕 +闭 +蕤 +缚 +怎 +佞 +码 +嘤 +蔡 +痊 +舱 +螯 +帕 +赫 +昵 +升 +烬 +岫 +、 +疵 +蜻 +髁 +蕨 +隶 +烛 +械 +丑 +盂 +梁 +强 +鲛 +由 +拘 +揉 +劭 +龟 +撤 +钩 +呕 +孛 +费 +妻 +漂 +求 +阑 +崖 +秤 +甘 +通 +深 +补 +赃 +坎 +床 +啪 +承 +吼 +量 +暇 +钼 +烨 +阂 +擎 +脱 +逮 +称 +P +神 +属 +矗 +华 +届 +狍 +葑 +汹 +育 +患 +窒 +蛰 +佼 +静 +槎 +运 +鳗 +庆 +逝 +曼 +疱 +克 +代 +官 +此 +麸 +耧 +蚌 +晟 +例 +础 +榛 +副 +测 +唰 +缢 +迹 +灬 +霁 +身 +岁 +赭 +扛 +又 +菡 +乜 +雾 +板 +读 +陷 +徉 +贯 +郁 +虑 +变 +钓 +菜 +圾 +现 +琢 +式 +乐 +维 +渔 +浜 +左 +吾 +脑 +钡 +警 +T +啵 +拴 +偌 +漱 +湿 +硕 +止 +骼 +魄 +积 +燥 +联 +踢 +玛 +则 +窿 +见 +振 +畿 +送 +班 +钽 +您 +赵 +刨 +印 +讨 +踝 +籍 +谡 +舌 +崧 +汽 +蔽 +沪 +酥 +绒 +怖 +财 +帖 +肱 +私 +莎 +勋 +羔 +霸 +励 +哼 +帐 +将 +帅 +渠 +纪 +婴 +娩 +岭 +厘 +滕 +吻 +伤 +坝 +冠 +戊 +隆 +瘁 +介 +涧 +物 +黍 +并 +姗 +奢 +蹑 +掣 +垸 +锴 +命 +箍 +捉 +病 +辖 +琰 +眭 +迩 +艘 +绌 +繁 +寅 +若 +毋 +思 +诉 +类 +诈 +燮 +轲 +酮 +狂 +重 +反 +职 +筱 +县 +委 +磕 +绣 +奖 +晋 +濉 +志 +徽 +肠 +呈 +獐 +坻 +口 +片 +碰 +几 +村 +柿 +劳 +料 +获 +亩 +惕 +晕 +厌 +号 +罢 +池 +正 +鏖 +煨 +家 +棕 +复 +尝 +懋 +蜥 +锅 +岛 +扰 +队 +坠 +瘾 +钬 +@ +卧 +疣 +镇 +譬 +冰 +彷 +频 +黯 +据 +垄 +采 +八 +缪 +瘫 +型 +熹 +砰 +楠 +襁 +箐 +但 +嘶 +绳 +啤 +拍 +盥 +穆 +傲 +洗 +盯 +塘 +怔 +筛 +丿 +台 +恒 +喂 +葛 +永 +¥ +烟 +酒 +桦 +书 +砂 +蚝 +缉 +态 +瀚 +袄 +圳 +轻 +蛛 +超 +榧 +遛 +姒 +奘 +铮 +右 +荽 +望 +偻 +卡 +丶 +氰 +附 +做 +革 +索 +戚 +坨 +桷 +唁 +垅 +榻 +岐 +偎 +坛 +莨 +山 +殊 +微 +骇 +陈 +爨 +推 +嗝 +驹 +澡 +藁 +呤 +卤 +嘻 +糅 +逛 +侵 +郓 +酌 +德 +摇 +※ +鬃 +被 +慨 +殡 +羸 +昌 +泡 +戛 +鞋 +河 +宪 +沿 +玲 +鲨 +翅 +哽 +源 +铅 +语 +照 +邯 +址 +荃 +佬 +顺 +鸳 +町 +霭 +睾 +瓢 +夸 +椁 +晓 +酿 +痈 +咔 +侏 +券 +噎 +湍 +签 +嚷 +离 +午 +尚 +社 +锤 +背 +孟 +使 +浪 +缦 +潍 +鞅 +军 +姹 +驶 +笑 +鳟 +鲁 +》 +孽 +钜 +绿 +洱 +礴 +焯 +椰 +颖 +囔 +乌 +孔 +巴 +互 +性 +椽 +哞 +聘 +昨 +早 +暮 +胶 +炀 +隧 +低 +彗 +昝 +铁 +呓 +氽 +藉 +喔 +癖 +瑗 +姨 +权 +胱 +韦 +堑 +蜜 +酋 +楝 +砝 +毁 +靓 +歙 +锲 +究 +屋 +喳 +骨 +辨 +碑 +武 +鸠 +宫 +辜 +烊 +适 +坡 +殃 +培 +佩 +供 +走 +蜈 +迟 +翼 +况 +姣 +凛 +浔 +吃 +飘 +债 +犟 +金 +促 +苛 +崇 +坂 +莳 +畔 +绂 +兵 +蠕 +斋 +根 +砍 +亢 +欢 +恬 +崔 +剁 +餐 +榫 +快 +扶 +‖ +濒 +缠 +鳜 +当 +彭 +驭 +浦 +篮 +昀 +锆 +秸 +钳 +弋 +娣 +瞑 +夷 +龛 +苫 +拱 +致 +% +嵊 +障 +隐 +弑 +初 +娓 +抉 +汩 +累 +蓖 +" +唬 +助 +苓 +昙 +押 +毙 +破 +城 +郧 +逢 +嚏 +獭 +瞻 +溱 +婿 +赊 +跨 +恼 +璧 +萃 +姻 +貉 +灵 +炉 +密 +氛 +陶 +砸 +谬 +衔 +点 +琛 +沛 +枳 +层 +岱 +诺 +脍 +榈 +埂 +征 +冷 +裁 +打 +蹴 +素 +瘘 +逞 +蛐 +聊 +激 +腱 +萘 +踵 +飒 +蓟 +吆 +取 +咙 +簋 +涓 +矩 +曝 +挺 +揣 +座 +你 +史 +舵 +焱 +尘 +苏 +笈 +脚 +溉 +榨 +诵 +樊 +邓 +焊 +义 +庶 +儋 +蟋 +蒲 +赦 +呷 +杞 +诠 +豪 +还 +试 +颓 +茉 +太 +除 +紫 +逃 +痴 +草 +充 +鳕 +珉 +祗 +墨 +渭 +烩 +蘸 +慕 +璇 +镶 +穴 +嵘 +恶 +骂 +险 +绋 +幕 +碉 +肺 +戳 +刘 +潞 +秣 +纾 +潜 +銮 +洛 +须 +罘 +销 +瘪 +汞 +兮 +屉 +r +林 +厕 +质 +探 +划 +狸 +殚 +善 +煊 +烹 +〒 +锈 +逯 +宸 +辍 +泱 +柚 +袍 +远 +蹋 +嶙 +绝 +峥 +娥 +缍 +雀 +徵 +认 +镱 +谷 += +贩 +勉 +撩 +鄯 +斐 +洋 +非 +祚 +泾 +诒 +饿 +撬 +威 +晷 +搭 +芍 +锥 +笺 +蓦 +候 +琊 +档 +礁 +沼 +卵 +荠 +忑 +朝 +凹 +瑞 +头 +仪 +弧 +孵 +畏 +铆 +突 +衲 +车 +浩 +气 +茂 +悖 +厢 +枕 +酝 +戴 +湾 +邹 +飚 +攘 +锂 +写 +宵 +翁 +岷 +无 +喜 +丈 +挑 +嗟 +绛 +殉 +议 +槽 +具 +醇 +淞 +笃 +郴 +阅 +饼 +底 +壕 +砚 +弈 +询 +缕 +庹 +翟 +零 +筷 +暨 +舟 +闺 +甯 +撞 +麂 +茌 +蔼 +很 +珲 +捕 +棠 +角 +阉 +媛 +娲 +诽 +剿 +尉 +爵 +睬 +韩 +诰 +匣 +危 +糍 +镯 +立 +浏 +阳 +少 +盆 +舔 +擘 +匪 +申 +尬 +铣 +旯 +抖 +赘 +瓯 +居 +ˇ +哮 +游 +锭 +茏 +歌 +坏 +甚 +秒 +舞 +沙 +仗 +劲 +潺 +阿 +燧 +郭 +嗖 +霏 +忠 +材 +奂 +耐 +跺 +砀 +输 +岖 +媳 +氟 +极 +摆 +灿 +今 +扔 +腻 +枝 +奎 +药 +熄 +吨 +话 +q +额 +慑 +嘌 +协 +喀 +壳 +埭 +视 +著 +於 +愧 +陲 +翌 +峁 +颅 +佛 +腹 +聋 +侯 +咎 +叟 +秀 +颇 +存 +较 +罪 +哄 +岗 +扫 +栏 +钾 +羌 +己 +璨 +枭 +霉 +煌 +涸 +衿 +键 +镝 +益 +岢 +奏 +连 +夯 +睿 +冥 +均 +糖 +狞 +蹊 +稻 +爸 +刿 +胥 +煜 +丽 +肿 +璃 +掸 +跚 +灾 +垂 +樾 +濑 +乎 +莲 +窄 +犹 +撮 +战 +馄 +软 +络 +显 +鸢 +胸 +宾 +妲 +恕 +埔 +蝌 +份 +遇 +巧 +瞟 +粒 +恰 +剥 +桡 +博 +讯 +凯 +堇 +阶 +滤 +卖 +斌 +骚 +彬 +兑 +磺 +樱 +舷 +两 +娱 +福 +仃 +差 +找 +桁 +÷ +净 +把 +阴 +污 +戬 +雷 +碓 +蕲 +楚 +罡 +焖 +抽 +妫 +咒 +仑 +闱 +尽 +邑 +菁 +爱 +贷 +沥 +鞑 +牡 +嗉 +崴 +骤 +塌 +嗦 +订 +拮 +滓 +捡 +锻 +次 +坪 +杩 +臃 +箬 +融 +珂 +鹗 +宗 +枚 +降 +鸬 +妯 +阄 +堰 +盐 +毅 +必 +杨 +崃 +俺 +甬 +状 +莘 +货 +耸 +菱 +腼 +铸 +唏 +痤 +孚 +澳 +懒 +溅 +翘 +疙 +杷 +淼 +缙 +骰 +喊 +悉 +砻 +坷 +艇 +赁 +界 +谤 +纣 +宴 +晃 +茹 +归 +饭 +梢 +铡 +街 +抄 +肼 +鬟 +苯 +颂 +撷 +戈 +炒 +咆 +茭 +瘙 +负 +仰 +客 +琉 +铢 +封 +卑 +珥 +椿 +镧 +窨 +鬲 +寿 +御 +袤 +铃 +萎 +砖 +餮 +脒 +裳 +肪 +孕 +嫣 +馗 +嵇 +恳 +氯 +江 +石 +褶 +冢 +祸 +阻 +狈 +羞 +银 +靳 +透 +咳 +叼 +敷 +芷 +啥 +它 +瓤 +兰 +痘 +懊 +逑 +肌 +往 +捺 +坊 +甩 +呻 +〃 +沦 +忘 +膻 +祟 +菅 +剧 +崆 +智 +坯 +臧 +霍 +墅 +攻 +眯 +倘 +拢 +骠 +铐 +庭 +岙 +瓠 +′ +缺 +泥 +迢 +捶 +? +? +郏 +喙 +掷 +沌 +纯 +秘 +种 +听 +绘 +固 +螨 +团 +香 +盗 +妒 +埚 +蓝 +拖 +旱 +荞 +铀 +血 +遏 +汲 +辰 +叩 +拽 +幅 +硬 +惶 +桀 +漠 +措 +泼 +唑 +齐 +肾 +念 +酱 +虚 +屁 +耶 +旗 +砦 +闵 +婉 +馆 +拭 +绅 +韧 +忏 +窝 +醋 +葺 +顾 +辞 +倜 +堆 +辋 +逆 +玟 +贱 +疾 +董 +惘 +倌 +锕 +淘 +嘀 +莽 +俭 +笏 +绑 +鲷 +杈 +择 +蟀 +粥 +嗯 +驰 +逾 +案 +谪 +褓 +胫 +哩 +昕 +颚 +鲢 +绠 +躺 +鹄 +崂 +儒 +俨 +丝 +尕 +泌 +啊 +萸 +彰 +幺 +吟 +骄 +苣 +弦 +脊 +瑰 +〈 +诛 +镁 +析 +闪 +剪 +侧 +哟 +框 +螃 +守 +嬗 +燕 +狭 +铈 +缮 +概 +迳 +痧 +鲲 +俯 +售 +笼 +痣 +扉 +挖 +满 +咋 +援 +邱 +扇 +歪 +便 +玑 +绦 +峡 +蛇 +叨 +〖 +泽 +胃 +斓 +喋 +怂 +坟 +猪 +该 +蚬 +炕 +弥 +赞 +棣 +晔 +娠 +挲 +狡 +创 +疖 +铕 +镭 +稷 +挫 +弭 +啾 +翔 +粉 +履 +苘 +哦 +楼 +秕 +铂 +土 +锣 +瘟 +挣 +栉 +习 +享 +桢 +袅 +磨 +桂 +谦 +延 +坚 +蔚 +噗 +署 +谟 +猬 +钎 +恐 +嬉 +雒 +倦 +衅 +亏 +璩 +睹 +刻 +殿 +王 +算 +雕 +麻 +丘 +柯 +骆 +丸 +塍 +谚 +添 +鲈 +垓 +桎 +蚯 +芥 +予 +飕 +镦 +谌 +窗 +醚 +菀 +亮 +搪 +莺 +蒿 +羁 +足 +J +真 +轶 +悬 +衷 +靛 +翊 +掩 +哒 +炅 +掐 +冼 +妮 +l +谐 +稚 +荆 +擒 +犯 +陵 +虏 +浓 +崽 +刍 +陌 +傻 +孜 +千 +靖 +演 +矜 +钕 +煽 +杰 +酗 +渗 +伞 +栋 +俗 +泫 +戍 +罕 +沾 +疽 +灏 +煦 +芬 +磴 +叱 +阱 +榉 +湃 +蜀 +叉 +醒 +彪 +租 +郡 +篷 +屎 +良 +垢 +隗 +弱 +陨 +峪 +砷 +掴 +颁 +胎 +雯 +绵 +贬 +沐 +撵 +隘 +篙 +暖 +曹 +陡 +栓 +填 +臼 +彦 +瓶 +琪 +潼 +哪 +鸡 +摩 +啦 +俟 +锋 +域 +耻 +蔫 +疯 +纹 +撇 +毒 +绶 +痛 +酯 +忍 +爪 +赳 +歆 +嘹 +辕 +烈 +册 +朴 +钱 +吮 +毯 +癜 +娃 +谀 +邵 +厮 +炽 +璞 +邃 +丐 +追 +词 +瓒 +忆 +轧 +芫 +谯 +喷 +弟 +半 +冕 +裙 +掖 +墉 +绮 +寝 +苔 +势 +顷 +褥 +切 +衮 +君 +佳 +嫒 +蚩 +霞 +佚 +洙 +逊 +镖 +暹 +唛 +& +殒 +顶 +碗 +獗 +轭 +铺 +蛊 +废 +恹 +汨 +崩 +珍 +那 +杵 +曲 +纺 +夏 +薰 +傀 +闳 +淬 +姘 +舀 +拧 +卷 +楂 +恍 +讪 +厩 +寮 +篪 +赓 +乘 +灭 +盅 +鞣 +沟 +慎 +挂 +饺 +鼾 +杳 +树 +缨 +丛 +絮 +娌 +臻 +嗳 +篡 +侩 +述 +衰 +矛 +圈 +蚜 +匕 +筹 +匿 +濞 +晨 +叶 +骋 +郝 +挚 +蚴 +滞 +增 +侍 +描 +瓣 +吖 +嫦 +蟒 +匾 +圣 +赌 +毡 +癞 +恺 +百 +曳 +需 +篓 +肮 +庖 +帏 +卿 +驿 +遗 +蹬 +鬓 +骡 +歉 +芎 +胳 +屐 +禽 +烦 +晌 +寄 +媾 +狄 +翡 +苒 +船 +廉 +终 +痞 +殇 +々 +畦 +饶 +改 +拆 +悻 +萄 +£ +瓿 +乃 +訾 +桅 +匮 +溧 +拥 +纱 +铍 +骗 +蕃 +龋 +缬 +父 +佐 +疚 +栎 +醍 +掳 +蓄 +x +惆 +颜 +鲆 +榆 +〔 +猎 +敌 +暴 +谥 +鲫 +贾 +罗 +玻 +缄 +扦 +芪 +癣 +落 +徒 +臾 +恿 +猩 +托 +邴 +肄 +牵 +春 +陛 +耀 +刊 +拓 +蓓 +邳 +堕 +寇 +枉 +淌 +啡 +湄 +兽 +酷 +萼 +碚 +濠 +萤 +夹 +旬 +戮 +梭 +琥 +椭 +昔 +勺 +蜊 +绐 +晚 +孺 +僵 +宣 +摄 +冽 +旨 +萌 +忙 +蚤 +眉 +噼 +蟑 +付 +契 +瓜 +悼 +颡 +壁 +曾 +窕 +颢 +澎 +仿 +俑 +浑 +嵌 +浣 +乍 +碌 +褪 +乱 +蔟 +隙 +玩 +剐 +葫 +箫 +纲 +围 +伐 +决 +伙 +漩 +瑟 +刑 +肓 +镳 +缓 +蹭 +氨 +皓 +典 +畲 +坍 +铑 +檐 +塑 +洞 +倬 +储 +胴 +淳 +戾 +吐 +灼 +惺 +妙 +毕 +珐 +缈 +虱 +盖 +羰 +鸿 +磅 +谓 +髅 +娴 +苴 +唷 +蚣 +霹 +抨 +贤 +唠 +犬 +誓 +逍 +庠 +逼 +麓 +籼 +釉 +呜 +碧 +秧 +氩 +摔 +霄 +穸 +纨 +辟 +妈 +映 +完 +牛 +缴 +嗷 +炊 +恩 +荔 +茆 +掉 +紊 +慌 +莓 +羟 +阙 +萁 +磐 +另 +蕹 +辱 +鳐 +湮 +吡 +吩 +唐 +睦 +垠 +舒 +圜 +冗 +瞿 +溺 +芾 +囱 +匠 +僳 +汐 +菩 +饬 +漓 +黑 +霰 +浸 +濡 +窥 +毂 +蒡 +兢 +驻 +鹉 +芮 +诙 +迫 +雳 +厂 +忐 +臆 +猴 +鸣 +蚪 +栈 +箕 +羡 +渐 +莆 +捍 +眈 +哓 +趴 +蹼 +埕 +嚣 +骛 +宏 +淄 +斑 +噜 +严 +瑛 +垃 +椎 +诱 +压 +庾 +绞 +焘 +廿 +抡 +迄 +棘 +夫 +纬 +锹 +眨 +瞌 +侠 +脐 +竞 +瀑 +孳 +骧 +遁 +姜 +颦 +荪 +滚 +萦 +伪 +逸 +粳 +爬 +锁 +矣 +役 +趣 +洒 +颔 +诏 +逐 +奸 +甭 +惠 +攀 +蹄 +泛 +尼 +拼 +阮 +鹰 +亚 +颈 +惑 +勒 +〉 +际 +肛 +爷 +刚 +钨 +丰 +养 +冶 +鲽 +辉 +蔻 +画 +覆 +皴 +妊 +麦 +返 +醉 +皂 +擀 +〗 +酶 +凑 +粹 +悟 +诀 +硖 +港 +卜 +z +杀 +涕 +± +舍 +铠 +抵 +弛 +段 +敝 +镐 +奠 +拂 +轴 +跛 +袱 +e +t +沉 +菇 +俎 +薪 +峦 +秭 +蟹 +历 +盟 +菠 +寡 +液 +肢 +喻 +染 +裱 +悱 +抱 +氙 +赤 +捅 +猛 +跑 +氮 +谣 +仁 +尺 +辊 +窍 +烙 +衍 +架 +擦 +倏 +璐 +瑁 +币 +楞 +胖 +夔 +趸 +邛 +惴 +饕 +虔 +蝎 +§ +哉 +贝 +宽 +辫 +炮 +扩 +饲 +籽 +魏 +菟 +锰 +伍 +猝 +末 +琳 +哚 +蛎 +邂 +呀 +姿 +鄞 +却 +歧 +仙 +恸 +椐 +森 +牒 +寤 +袒 +婆 +虢 +雅 +钉 +朵 +贼 +欲 +苞 +寰 +故 +龚 +坭 +嘘 +咫 +礼 +硷 +兀 +睢 +汶 +’ +铲 +烧 +绕 +诃 +浃 +钿 +哺 +柜 +讼 +颊 +璁 +腔 +洽 +咐 +脲 +簌 +筠 +镣 +玮 +鞠 +谁 +兼 +姆 +挥 +梯 +蝴 +谘 +漕 +刷 +躏 +宦 +弼 +b +垌 +劈 +麟 +莉 +揭 +笙 +渎 +仕 +嗤 +仓 +配 +怏 +抬 +错 +泯 +镊 +孰 +猿 +邪 +仍 +秋 +鼬 +壹 +歇 +吵 +炼 +< +尧 +射 +柬 +廷 +胧 +霾 +凳 +隋 +肚 +浮 +梦 +祥 +株 +堵 +退 +L +鹫 +跎 +凶 +毽 +荟 +炫 +栩 +玳 +甜 +沂 +鹿 +顽 +伯 +爹 +赔 +蛴 +徐 +匡 +欣 +狰 +缸 +雹 +蟆 +疤 +默 +沤 +啜 +痂 +衣 +禅 +w +i +h +辽 +葳 +黝 +钗 +停 +沽 +棒 +馨 +颌 +肉 +吴 +硫 +悯 +劾 +娈 +马 +啧 +吊 +悌 +镑 +峭 +帆 +瀣 +涉 +咸 +疸 +滋 +泣 +翦 +拙 +癸 +钥 +蜒 ++ +尾 +庄 +凝 +泉 +婢 +渴 +谊 +乞 +陆 +锉 +糊 +鸦 +淮 +I +B +N +晦 +弗 +乔 +庥 +葡 +尻 +席 +橡 +傣 +渣 +拿 +惩 +麋 +斛 +缃 +矮 +蛏 +岘 +鸽 +姐 +膏 +催 +奔 +镒 +喱 +蠡 +摧 +钯 +胤 +柠 +拐 +璋 +鸥 +卢 +荡 +倾 +^ +_ +珀 +逄 +萧 +塾 +掇 +贮 +笆 +聂 +圃 +冲 +嵬 +M +滔 +笕 +值 +炙 +偶 +蜱 +搐 +梆 +汪 +蔬 +腑 +鸯 +蹇 +敞 +绯 +仨 +祯 +谆 +梧 +糗 +鑫 +啸 +豺 +囹 +猾 +巢 +柄 +瀛 +筑 +踌 +沭 +暗 +苁 +鱿 +蹉 +脂 +蘖 +牢 +热 +木 +吸 +溃 +宠 +序 +泞 +偿 +拜 +檩 +厚 +朐 +毗 +螳 +吞 +媚 +朽 +担 +蝗 +橘 +畴 +祈 +糟 +盱 +隼 +郜 +惜 +珠 +裨 +铵 +焙 +琚 +唯 +咚 +噪 +骊 +丫 +滢 +勤 +棉 +呸 +咣 +淀 +隔 +蕾 +窈 +饨 +挨 +煅 +短 +匙 +粕 +镜 +赣 +撕 +墩 +酬 +馁 +豌 +颐 +抗 +酣 +氓 +佑 +搁 +哭 +递 +耷 +涡 +桃 +贻 +碣 +截 +瘦 +昭 +镌 +蔓 +氚 +甲 +猕 +蕴 +蓬 +散 +拾 +纛 +狼 +猷 +铎 +埋 +旖 +矾 +讳 +囊 +糜 +迈 +粟 +蚂 +紧 +鲳 +瘢 +栽 +稼 +羊 +锄 +斟 +睁 +桥 +瓮 +蹙 +祉 +醺 +鼻 +昱 +剃 +跳 +篱 +跷 +蒜 +翎 +宅 +晖 +嗑 +壑 +峻 +癫 +屏 +狠 +陋 +袜 +途 +憎 +祀 +莹 +滟 +佶 +溥 +臣 +约 +盛 +峰 +磁 +慵 +婪 +拦 +莅 +朕 +鹦 +粲 +裤 +哎 +疡 +嫖 +琵 +窟 +堪 +谛 +嘉 +儡 +鳝 +斩 +郾 +驸 +酊 +妄 +胜 +贺 +徙 +傅 +噌 +钢 +栅 +庇 +恋 +匝 +巯 +邈 +尸 +锚 +粗 +佟 +蛟 +薹 +纵 +蚊 +郅 +绢 +锐 +苗 +俞 +篆 +淆 +膀 +鲜 +煎 +诶 +秽 +寻 +涮 +刺 +怀 +噶 +巨 +褰 +魅 +灶 +灌 +桉 +藕 +谜 +舸 +薄 +搀 +恽 +借 +牯 +痉 +渥 +愿 +亓 +耘 +杠 +柩 +锔 +蚶 +钣 +珈 +喘 +蹒 +幽 +赐 +稗 +晤 +莱 +泔 +扯 +肯 +菪 +裆 +腩 +豉 +疆 +骜 +腐 +倭 +珏 +唔 +粮 +亡 +润 +慰 +伽 +橄 +玄 +誉 +醐 +胆 +龊 +粼 +塬 +陇 +彼 +削 +嗣 +绾 +芽 +妗 +垭 +瘴 +爽 +薏 +寨 +龈 +泠 +弹 +赢 +漪 +猫 +嘧 +涂 +恤 +圭 +茧 +烽 +屑 +痕 +巾 +赖 +荸 +凰 +腮 +畈 +亵 +蹲 +偃 +苇 +澜 +艮 +换 +骺 +烘 +苕 +梓 +颉 +肇 +哗 +悄 +氤 +涠 +葬 +屠 +鹭 +植 +竺 +佯 +诣 +鲇 +瘀 +鲅 +邦 +移 +滁 +冯 +耕 +癔 +戌 +茬 +沁 +巩 +悠 +湘 +洪 +痹 +锟 +循 +谋 +腕 +鳃 +钠 +捞 +焉 +迎 +碱 +伫 +急 +榷 +奈 +邝 +卯 +辄 +皲 +卟 +醛 +畹 +忧 +稳 +雄 +昼 +缩 +阈 +睑 +扌 +耗 +曦 +涅 +捏 +瞧 +邕 +淖 +漉 +铝 +耦 +禹 +湛 +喽 +莼 +琅 +诸 +苎 +纂 +硅 +始 +嗨 +傥 +燃 +臂 +赅 +嘈 +呆 +贵 +屹 +壮 +肋 +亍 +蚀 +卅 +豹 +腆 +邬 +迭 +浊 +} +童 +螂 +捐 +圩 +勐 +触 +寞 +汊 +壤 +荫 +膺 +渌 +芳 +懿 +遴 +螈 +泰 +蓼 +蛤 +茜 +舅 +枫 +朔 +膝 +眙 +避 +梅 +判 +鹜 +璜 +牍 +缅 +垫 +藻 +黔 +侥 +惚 +懂 +踩 +腰 +腈 +札 +丞 +唾 +慈 +顿 +摹 +荻 +琬 +~ +斧 +沈 +滂 +胁 +胀 +幄 +莜 +Z +匀 +鄄 +掌 +绰 +茎 +焚 +赋 +萱 +谑 +汁 +铒 +瞎 +夺 +蜗 +野 +娆 +冀 +弯 +篁 +懵 +灞 +隽 +芡 +脘 +俐 +辩 +芯 +掺 +喏 +膈 +蝈 +觐 +悚 +踹 +蔗 +熠 +鼠 +呵 +抓 +橼 +峨 +畜 +缔 +禾 +崭 +弃 +熊 +摒 +凸 +拗 +穹 +蒙 +抒 +祛 +劝 +闫 +扳 +阵 +醌 +踪 +喵 +侣 +搬 +仅 +荧 +赎 +蝾 +琦 +买 +婧 +瞄 +寓 +皎 +冻 +赝 +箩 +莫 +瞰 +郊 +笫 +姝 +筒 +枪 +遣 +煸 +袋 +舆 +痱 +涛 +母 +〇 +启 +践 +耙 +绲 +盘 +遂 +昊 +搞 +槿 +诬 +纰 +泓 +惨 +檬 +亻 +越 +C +o +憩 +熵 +祷 +钒 +暧 +塔 +阗 +胰 +咄 +娶 +魔 +琶 +钞 +邻 +扬 +杉 +殴 +咽 +弓 +〆 +髻 +】 +吭 +揽 +霆 +拄 +殖 +脆 +彻 +岩 +芝 +勃 +辣 +剌 +钝 +嘎 +甄 +佘 +皖 +伦 +授 +徕 +憔 +挪 +皇 +庞 +稔 +芜 +踏 +溴 +兖 +卒 +擢 +饥 +鳞 +煲 +‰ +账 +颗 +叻 +斯 +捧 +鳍 +琮 +讹 +蛙 +纽 +谭 +酸 +兔 +莒 +睇 +伟 +觑 +羲 +嗜 +宜 +褐 +旎 +辛 +卦 +诘 +筋 +鎏 +溪 +挛 +熔 +阜 +晰 +鳅 +丢 +奚 +灸 +呱 +献 +陉 +黛 +鸪 +甾 +萨 +疮 +拯 +洲 +疹 +辑 +叙 +恻 +谒 +允 +柔 +烂 +氏 +逅 +漆 +拎 +惋 +扈 +湟 +纭 +啕 +掬 +擞 +哥 +忽 +涤 +鸵 +靡 +郗 +瓷 +扁 +廊 +怨 +雏 +钮 +敦 +E +懦 +憋 +汀 +拚 +啉 +腌 +岸 +f +痼 +瞅 +尊 +咀 +眩 +飙 +忌 +仝 +迦 +熬 +毫 +胯 +篑 +茄 +腺 +凄 +舛 +碴 +锵 +诧 +羯 +後 +漏 +汤 +宓 +仞 +蚁 +壶 +谰 +皑 +铄 +棰 +罔 +辅 +晶 +苦 +牟 +闽 +\ +烃 +饮 +聿 +丙 +蛳 +朱 +煤 +涔 +鳖 +犁 +罐 +荼 +砒 +淦 +妤 +黏 +戎 +孑 +婕 +瑾 +戢 +钵 +枣 +捋 +砥 +衩 +狙 +桠 +稣 +阎 +肃 +梏 +诫 +孪 +昶 +婊 +衫 +嗔 +侃 +塞 +蜃 +樵 +峒 +貌 +屿 +欺 +缫 +阐 +栖 +诟 +珞 +荭 +吝 +萍 +嗽 +恂 +啻 +蜴 +磬 +峋 +俸 +豫 +谎 +徊 +镍 +韬 +魇 +晴 +U +囟 +猜 +蛮 +坐 +囿 +伴 +亭 +肝 +佗 +蝠 +妃 +胞 +滩 +榴 +氖 +垩 +苋 +砣 +扪 +馏 +姓 +轩 +厉 +夥 +侈 +禀 +垒 +岑 +赏 +钛 +辐 +痔 +披 +纸 +碳 +“ +坞 +蠓 +挤 +荥 +沅 +悔 +铧 +帼 +蒌 +蝇 +a +p +y +n +g +哀 +浆 +瑶 +凿 +桶 +馈 +皮 +奴 +苜 +佤 +伶 +晗 +铱 +炬 +优 +弊 +氢 +恃 +甫 +攥 +端 +锌 +灰 +稹 +炝 +曙 +邋 +亥 +眶 +碾 +拉 +萝 +绔 +捷 +浍 +腋 +姑 +菖 +凌 +涞 +麽 +锢 +桨 +潢 +绎 +镰 +殆 +锑 +渝 +铬 +困 +绽 +觎 +匈 +糙 +暑 +裹 +鸟 +盔 +肽 +迷 +綦 +『 +亳 +佝 +俘 +钴 +觇 +骥 +仆 +疝 +跪 +婶 +郯 +瀹 +唉 +脖 +踞 +针 +晾 +忒 +扼 +瞩 +叛 +椒 +疟 +嗡 +邗 +肆 +跆 +玫 +忡 +捣 +咧 +唆 +艄 +蘑 +潦 +笛 +阚 +沸 +泻 +掊 +菽 +贫 +斥 +髂 +孢 +镂 +赂 +麝 +鸾 +屡 +衬 +苷 +恪 +叠 +希 +粤 +爻 +喝 +茫 +惬 +郸 +绻 +庸 +撅 +碟 +宄 +妹 +膛 +叮 +饵 +崛 +嗲 +椅 +冤 +搅 +咕 +敛 +尹 +垦 +闷 +蝉 +霎 +勰 +败 +蓑 +泸 +肤 +鹌 +幌 +焦 +浠 +鞍 +刁 +舰 +乙 +竿 +裔 +。 +茵 +函 +伊 +兄 +丨 +娜 +匍 +謇 +莪 +宥 +似 +蝽 +翳 +酪 +翠 +粑 +薇 +祢 +骏 +赠 +叫 +Q +噤 +噻 +竖 +芗 +莠 +潭 +俊 +羿 +耜 +O +郫 +趁 +嗪 +囚 +蹶 +芒 +洁 +笋 +鹑 +敲 +硝 +啶 +堡 +渲 +揩 +』 +携 +宿 +遒 +颍 +扭 +棱 +割 +萜 +蔸 +葵 +琴 +捂 +饰 +衙 +耿 +掠 +募 +岂 +窖 +涟 +蔺 +瘤 +柞 +瞪 +怜 +匹 +距 +楔 +炜 +哆 +秦 +缎 +幼 +茁 +绪 +痨 +恨 +楸 +娅 +瓦 +桩 +雪 +嬴 +伏 +榔 +妥 +铿 +拌 +眠 +雍 +缇 +‘ +卓 +搓 +哌 +觞 +噩 +屈 +哧 +髓 +咦 +巅 +娑 +侑 +淫 +膳 +祝 +勾 +姊 +莴 +胄 +疃 +薛 +蜷 +胛 +巷 +芙 +芋 +熙 +闰 +勿 +窃 +狱 +剩 +钏 +幢 +陟 +铛 +慧 +靴 +耍 +k +浙 +浇 +飨 +惟 +绗 +祜 +澈 +啼 +咪 +磷 +摞 +诅 +郦 +抹 +跃 +壬 +吕 +肖 +琏 +颤 +尴 +剡 +抠 +凋 +赚 +泊 +津 +宕 +殷 +倔 +氲 +漫 +邺 +涎 +怠 +$ +垮 +荬 +遵 +俏 +叹 +噢 +饽 +蜘 +孙 +筵 +疼 +鞭 +羧 +牦 +箭 +潴 +c +眸 +祭 +髯 +啖 +坳 +愁 +芩 +驮 +倡 +巽 +穰 +沃 +胚 +怒 +凤 +槛 +剂 +趵 +嫁 +v +邢 +灯 +鄢 +桐 +睽 +檗 +锯 +槟 +婷 +嵋 +圻 +诗 +蕈 +颠 +遭 +痢 +芸 +怯 +馥 +竭 +锗 +徜 +恭 +遍 +籁 +剑 +嘱 +苡 +龄 +僧 +桑 +潸 +弘 +澶 +楹 +悲 +讫 +愤 +腥 +悸 +谍 +椹 +呢 +桓 +葭 +攫 +阀 +翰 +躲 +敖 +柑 +郎 +笨 +橇 +呃 +魁 +燎 +脓 +葩 +磋 +垛 +玺 +狮 +沓 +砜 +蕊 +锺 +罹 +蕉 +翱 +虐 +闾 +巫 +旦 +茱 +嬷 +枯 +鹏 +贡 +芹 +汛 +矫 +绁 +拣 +禺 +佃 +讣 +舫 +惯 +乳 +趋 +疲 +挽 +岚 +虾 +衾 +蠹 +蹂 +飓 +氦 +铖 +孩 +稞 +瑜 +壅 +掀 +勘 +妓 +畅 +髋 +W +庐 +牲 +蓿 +榕 +练 +垣 +唱 +邸 +菲 +昆 +婺 +穿 +绡 +麒 +蚱 +掂 +愚 +泷 +涪 +漳 +妩 +娉 +榄 +讷 +觅 +旧 +藤 +煮 +呛 +柳 +腓 +叭 +庵 +烷 +阡 +罂 +蜕 +擂 +猖 +咿 +媲 +脉 +【 +沏 +貅 +黠 +熏 +哲 +烁 +坦 +酵 +兜 +× +潇 +撒 +剽 +珩 +圹 +乾 +摸 +樟 +帽 +嗒 +襄 +魂 +轿 +憬 +锡 +〕 +喃 +皆 +咖 +隅 +脸 +残 +泮 +袂 +鹂 +珊 +囤 +捆 +咤 +误 +徨 +闹 +淙 +芊 +淋 +怆 +囗 +拨 +梳 +渤 +R +G +绨 +蚓 +婀 +幡 +狩 +麾 +谢 +唢 +裸 +旌 +伉 +纶 +裂 +驳 +砼 +咛 +澄 +樨 +蹈 +宙 +澍 +倍 +貔 +操 +勇 +蟠 +摈 +砧 +虬 +够 +缁 +悦 +藿 +撸 +艹 +摁 +淹 +豇 +虎 +榭 +ˉ +吱 +d +° +喧 +荀 +踱 +侮 +奋 +偕 +饷 +犍 +惮 +坑 +璎 +徘 +宛 +妆 +袈 +倩 +窦 +昂 +荏 +乖 +K +怅 +撰 +鳙 +牙 +袁 +酞 +X +痿 +琼 +闸 +雁 +趾 +荚 +虻 +涝 +《 +杏 +韭 +偈 +烤 +绫 +鞘 +卉 +症 +遢 +蓥 +诋 +杭 +荨 +匆 +竣 +簪 +辙 +敕 +虞 +丹 +缭 +咩 +黟 +m +淤 +瑕 +咂 +铉 +硼 +茨 +嶂 +痒 +畸 +敬 +涿 +粪 +窘 +熟 +叔 +嫔 +盾 +忱 +裘 +憾 +梵 +赡 +珙 +咯 +娘 +庙 +溯 +胺 +葱 +痪 +摊 +荷 +卞 +乒 +髦 +寐 +铭 +坩 +胗 +枷 +爆 +溟 +嚼 +羚 +砬 +轨 +惊 +挠 +罄 +竽 +菏 +氧 +浅 +楣 +盼 +枢 +炸 +阆 +杯 +谏 +噬 +淇 +渺 +俪 +秆 +墓 +泪 +跻 +砌 +痰 +垡 +渡 +耽 +釜 +讶 +鳎 +煞 +呗 +韶 +舶 +绷 +鹳 +缜 +旷 +铊 +皱 +龌 +檀 +霖 +奄 +槐 +艳 +蝶 +旋 +哝 +赶 +骞 +蚧 +腊 +盈 +丁 +` +蜚 +矸 +蝙 +睨 +嚓 +僻 +鬼 +醴 +夜 +彝 +磊 +笔 +拔 +栀 +糕 +厦 +邰 +纫 +逭 +纤 +眦 +膊 +馍 +躇 +烯 +蘼 +冬 +诤 +暄 +骶 +哑 +瘠 +」 +臊 +丕 +愈 +咱 +螺 +擅 +跋 +搏 +硪 +谄 +笠 +淡 +嘿 +骅 +谧 +鼎 +皋 +姚 +歼 +蠢 +驼 +耳 +胬 +挝 +涯 +狗 +蒽 +孓 +犷 +凉 +芦 +箴 +铤 +孤 +嘛 +坤 +V +茴 +朦 +挞 +尖 +橙 +诞 +搴 +碇 +洵 +浚 +帚 +蜍 +漯 +柘 +嚎 +讽 +芭 +荤 +咻 +祠 +秉 +跖 +埃 +吓 +糯 +眷 +馒 +惹 +娼 +鲑 +嫩 +讴 +轮 +瞥 +靶 +褚 +乏 +缤 +宋 +帧 +删 +驱 +碎 +扑 +俩 +俄 +偏 +涣 +竹 +噱 +皙 +佰 +渚 +唧 +斡 +# +镉 +刀 +崎 +筐 +佣 +夭 +贰 +肴 +峙 +哔 +艿 +匐 +牺 +镛 +缘 +仡 +嫡 +劣 +枸 +堀 +梨 +簿 +鸭 +蒸 +亦 +稽 +浴 +{ +衢 +束 +槲 +j +阁 +揍 +疥 +棋 +潋 +聪 +窜 +乓 +睛 +插 +冉 +阪 +苍 +搽 +「 +蟾 +螟 +幸 +仇 +樽 +撂 +慢 +跤 +幔 +俚 +淅 +覃 +觊 +溶 +妖 +帛 +侨 +曰 +妾 +泗 +· +: +瀘 +風 +Ë +( +) +∶ +紅 +紗 +瑭 +雲 +頭 +鶏 +財 +許 +• +¥ +樂 +焗 +麗 +— +; +滙 +東 +榮 +繪 +興 +… +門 +業 +π +楊 +國 +顧 +é +盤 +寳 +Λ +龍 +鳳 +島 +誌 +緣 +結 +銭 +萬 +勝 +祎 +璟 +優 +歡 +臨 +時 +購 += +★ +藍 +昇 +鐵 +觀 +勅 +農 +聲 +畫 +兿 +術 +發 +劉 +記 +專 +耑 +園 +書 +壴 +種 +Ο +● +褀 +號 +銀 +匯 +敟 +锘 +葉 +橪 +廣 +進 +蒄 +鑽 +阝 +祙 +貢 +鍋 +豊 +夬 +喆 +團 +閣 +開 +燁 +賓 +館 +酡 +沔 +順 ++ +硚 +劵 +饸 +陽 +車 +湓 +復 +萊 +氣 +軒 +華 +堃 +迮 +纟 +戶 +馬 +學 +裡 +電 +嶽 +獨 +マ +シ +サ +ジ +燘 +袪 +環 +❤ +臺 +灣 +専 +賣 +孖 +聖 +攝 +線 +▪ +α +傢 +俬 +夢 +達 +莊 +喬 +貝 +薩 +劍 +羅 +壓 +棛 +饦 +尃 +璈 +囍 +醫 +G +I +A +# +N +鷄 +髙 +嬰 +啓 +約 +隹 +潔 +賴 +藝 +~ +寶 +籣 +麺 +  +嶺 +√ +義 +網 +峩 +長 +∧ +魚 +機 +構 +② +鳯 +偉 +L +B +㙟 +畵 +鴿 +' +詩 +溝 +嚞 +屌 +藔 +佧 +玥 +蘭 +織 +1 +3 +9 +0 +7 +點 +砭 +鴨 +鋪 +銘 +廳 +弍 +‧ +創 +湯 +坶 +℃ +卩 +骝 +& +烜 +荘 +當 +潤 +扞 +係 +懷 +碶 +钅 +蚨 +讠 +☆ +叢 +爲 +埗 +涫 +塗 +→ +楽 +現 +鯨 +愛 +瑪 +鈺 +忄 +悶 +藥 +飾 +樓 +視 +孬 +ㆍ +燚 +苪 +師 +① +丼 +锽 +│ +韓 +標 +è +兒 +閏 +匋 +張 +漢 +Ü +髪 +會 +閑 +檔 +習 +裝 +の +峯 +菘 +輝 +И +雞 +釣 +億 +浐 +K +O +R +8 +H +E +P +T +W +D +S +C +M +F +姌 +饹 +» +晞 +廰 +ä +嵯 +鷹 +負 +飲 +絲 +冚 +楗 +澤 +綫 +區 +❋ +← +質 +靑 +揚 +③ +滬 +統 +産 +協 +﹑ +乸 +畐 +經 +運 +際 +洺 +岽 +為 +粵 +諾 +崋 +豐 +碁 +ɔ +V +2 +6 +齋 +誠 +訂 +´ +勑 +雙 +陳 +無 +í +泩 +媄 +夌 +刂 +i +c +t +o +r +a +嘢 +耄 +燴 +暃 +壽 +媽 +靈 +抻 +體 +唻 +É +冮 +甹 +鎮 +錦 +ʌ +蜛 +蠄 +尓 +駕 +戀 +飬 +逹 +倫 +貴 +極 +Я +Й +寬 +磚 +嶪 +郎 +職 +| +間 +n +d +剎 +伈 +課 +飛 +橋 +瘊 +№ +譜 +骓 +圗 +滘 +縣 +粿 +咅 +養 +濤 +彳 +® +% +Ⅱ +啰 +㴪 +見 +矞 +薬 +糁 +邨 +鲮 +顔 +罱 +З +選 +話 +贏 +氪 +俵 +競 +瑩 +繡 +枱 +β +綉 +á +獅 +爾 +™ +麵 +戋 +淩 +徳 +個 +劇 +場 +務 +簡 +寵 +h +實 +膠 +轱 +圖 +築 +嘣 +樹 +㸃 +營 +耵 +孫 +饃 +鄺 +飯 +麯 +遠 +輸 +坫 +孃 +乚 +閃 +鏢 +㎡ +題 +廠 +關 +↑ +爺 +將 +軍 +連 +篦 +覌 +參 +箸 +- +窠 +棽 +寕 +夀 +爰 +歐 +呙 +閥 +頡 +熱 +雎 +垟 +裟 +凬 +勁 +帑 +馕 +夆 +疌 +枼 +馮 +貨 +蒤 +樸 +彧 +旸 +靜 +龢 +暢 +㐱 +鳥 +珺 +鏡 +灡 +爭 +堷 +廚 +Ó +騰 +診 +┅ +蘇 +褔 +凱 +頂 +豕 +亞 +帥 +嘬 +⊥ +仺 +桖 +複 +饣 +絡 +穂 +顏 +棟 +納 +▏ +濟 +親 +設 +計 +攵 +埌 +烺 +ò +頤 +燦 +蓮 +撻 +節 +講 +濱 +濃 +娽 +洳 +朿 +燈 +鈴 +護 +膚 +铔 +過 +補 +Z +U +5 +4 +坋 +闿 +䖝 +餘 +缐 +铞 +貿 +铪 +桼 +趙 +鍊 +[ +㐂 +垚 +菓 +揸 +捲 +鐘 +滏 +𣇉 +爍 +輪 +燜 +鴻 +鮮 +動 +鹞 +鷗 +丄 +慶 +鉌 +翥 +飮 +腸 +⇋ +漁 +覺 +來 +熘 +昴 +翏 +鲱 +圧 +鄉 +萭 +頔 +爐 +嫚 +г +貭 +類 +聯 +幛 +輕 +訓 +鑒 +夋 +锨 +芃 +珣 +䝉 +扙 +嵐 +銷 +處 +ㄱ +語 +誘 +苝 +歸 +儀 +燒 +楿 +內 +粢 +葒 +奧 +麥 +礻 +滿 +蠔 +穵 +瞭 +態 +鱬 +榞 +硂 +鄭 +黃 +煙 +祐 +奓 +逺 +* +瑄 +獲 +聞 +薦 +讀 +這 +樣 +決 +問 +啟 +們 +執 +説 +轉 +單 +隨 +唘 +帶 +倉 +庫 +還 +贈 +尙 +皺 +■ +餅 +產 +○ +∈ +報 +狀 +楓 +賠 +琯 +嗮 +禮 +` +傳 +> +≤ +嗞 +Φ +≥ +換 +咭 +∣ +↓ +曬 +ε +応 +寫 +″ +終 +様 +純 +費 +療 +聨 +凍 +壐 +郵 +ü +黒 +∫ +製 +塊 +調 +軽 +確 +撃 +級 +馴 +Ⅲ +涇 +繹 +數 +碼 +證 +狒 +処 +劑 +< +晧 +賀 +衆 +] +櫥 +兩 +陰 +絶 +對 +鯉 +憶 +◎ +p +e +Y +蕒 +煖 +頓 +測 +試 +鼽 +僑 +碩 +妝 +帯 +≈ +鐡 +舖 +權 +喫 +倆 +ˋ +該 +悅 +ā +俫 +. +f +s +b +m +k +g +u +j +貼 +淨 +濕 +針 +適 +備 +l +/ +給 +謢 +強 +觸 +衛 +與 +⊙ +$ +緯 +變 +⑴ +⑵ +⑶ +㎏ +殺 +∩ +幚 +─ +價 +▲ +離 +ú +ó +飄 +烏 +関 +閟 +﹝ +﹞ +邏 +輯 +鍵 +驗 +訣 +導 +歷 +屆 +層 +▼ +儱 +錄 +熳 +ē +艦 +吋 +錶 +辧 +飼 +顯 +④ +禦 +販 +気 +対 +枰 +閩 +紀 +幹 +瞓 +貊 +淚 +△ +眞 +墊 +Ω +獻 +褲 +縫 +緑 +亜 +鉅 +餠 +{ +} +◆ +蘆 +薈 +█ +◇ +溫 +彈 +晳 +粧 +犸 +穩 +訊 +崬 +凖 +熥 +П +舊 +條 +紋 +圍 +Ⅳ +筆 +尷 +難 +雜 +錯 +綁 +識 +頰 +鎖 +艶 +□ +殁 +殼 +⑧ +├ +▕ +鵬 +ǐ +ō +ǒ +糝 +綱 +▎ +μ +盜 +饅 +醬 +籤 +蓋 +釀 +鹽 +據 +à +ɡ +辦 +◥ +彐 +┌ +婦 +獸 +鲩 +伱 +ī +蒟 +蒻 +齊 +袆 +腦 +寧 +凈 +妳 +煥 +詢 +偽 +謹 +啫 +鯽 +騷 +鱸 +損 +傷 +鎻 +髮 +買 +冏 +儥 +両 +﹢ +∞ +載 +喰 +z +羙 +悵 +燙 +曉 +員 +組 +徹 +艷 +痠 +鋼 +鼙 +縮 +細 +嚒 +爯 +≠ +維 +" +鱻 +壇 +厍 +帰 +浥 +犇 +薡 +軎 +² +應 +醜 +刪 +緻 +鶴 +賜 +噁 +軌 +尨 +镔 +鷺 +槗 +彌 +葚 +濛 +請 +溇 +緹 +賢 +訪 +獴 +瑅 +資 +縤 +陣 +蕟 +栢 +韻 +祼 +恁 +伢 +謝 +劃 +涑 +總 +衖 +踺 +砋 +凉 +籃 +駿 +苼 +瘋 +昽 +紡 +驊 +腎 +﹗ +響 +杋 +剛 +嚴 +禪 +歓 +槍 +傘 +檸 +檫 +炣 +勢 +鏜 +鎢 +銑 +尐 +減 +奪 +惡 +θ +僮 +婭 +臘 +ū +ì +殻 +鉄 +∑ +蛲 +焼 +緖 +續 +紹 +懮 \ No newline at end of file diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py new file mode 100755 index 00000000..4d23f626 --- /dev/null +++ b/ppocr/utils/save_load.py @@ -0,0 +1,131 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import errno +import os +import shutil +import tempfile + +import paddle +import paddle.fluid as fluid + +from .utility import initial_logger +import re +logger = initial_logger() + + +def _mkdir_if_not_exist(path): + """ + mkdir if not exists, ignore the exception when multiprocess mkdir together + """ + if not os.path.exists(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno == errno.EEXIST and os.path.isdir(path): + logger.warning( + 'be happy if some process has already created {}'.format( + path)) + else: + raise OSError('Failed to mkdir {}'.format(path)) + + +def _load_state(path): + if os.path.exists(path + '.pdopt'): + # XXX another hack to ignore the optimizer state + tmp = tempfile.mkdtemp() + dst = os.path.join(tmp, os.path.basename(os.path.normpath(path))) + shutil.copy(path + '.pdparams', dst + '.pdparams') + state = fluid.io.load_program_state(dst) + shutil.rmtree(tmp) + else: + state = fluid.io.load_program_state(path) + return state + + +def load_params(exe, prog, path, ignore_params=[]): + """ + Load model from the given path. + Args: + exe (fluid.Executor): The fluid.Executor object. + prog (fluid.Program): load weight to which Program object. + path (string): URL string or loca model path. + ignore_params (list): ignore variable to load when finetuning. + It can be specified by finetune_exclude_pretrained_params + and the usage can refer to docs/advanced_tutorials/TRANSFER_LEARNING.md + """ + if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')): + raise ValueError("Model pretrain path {} does not " + "exists.".format(path)) + + logger.info('Loading parameters from {}...'.format(path)) + + ignore_set = set() + state = _load_state(path) + + # ignore the parameter which mismatch the shape + # between the model and pretrain weight. + all_var_shape = {} + for block in prog.blocks: + for param in block.all_parameters(): + all_var_shape[param.name] = param.shape + ignore_set.update([ + name for name, shape in all_var_shape.items() + if name in state and shape != state[name].shape + ]) + + if ignore_params: + all_var_names = [var.name for var in prog.list_vars()] + ignore_list = filter( + lambda var: any([re.match(name, var) for name in ignore_params]), + all_var_names) + ignore_set.update(list(ignore_list)) + + if len(ignore_set) > 0: + for k in ignore_set: + if k in state: + logger.warning('variable {} not used'.format(k)) + del state[k] + fluid.io.set_program_state(prog, state) + + +def init_model(config, program, exe): + """ + load model from checkpoint or pretrained_model + """ + checkpoints = config['Global'].get('checkpoints') + if checkpoints: + path = checkpoints + fluid.load(program, path, exe) + logger.info("Finish initing model from {}".format(path)) + return + + pretrain_weights = config['Global'].get('pretrain_weights') + if pretrain_weights: + path = pretrain_weights + load_params(exe, program, path) + logger.info("Finish initing model from {}".format(path)) + return + + +def save_model(program, model_path): + """ + save model to the target path + """ + fluid.save(program, model_path) + logger.info("Already save model in {}".format(model_path)) diff --git a/ppocr/utils/stats.py b/ppocr/utils/stats.py new file mode 100755 index 00000000..826c1c82 --- /dev/null +++ b/ppocr/utils/stats.py @@ -0,0 +1,65 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import collections +import numpy as np +import datetime + +__all__ = ['TrainingStats', 'Time'] + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size): + self.deque = collections.deque(maxlen=window_size) + + def add_value(self, value): + self.deque.append(value) + + def get_median_value(self): + return np.median(self.deque) + + +def Time(): + return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f') + + +class TrainingStats(object): + def __init__(self, window_size, stats_keys): + self.smoothed_losses_and_metrics = { + key: SmoothedValue(window_size) + for key in stats_keys + } + + def update(self, stats): + for k, v in self.smoothed_losses_and_metrics.items(): + v.add_value(stats[k]) + + def get(self, extras=None): + stats = collections.OrderedDict() + if extras: + for k, v in extras.items(): + stats[k] = v + for k, v in self.smoothed_losses_and_metrics.items(): + stats[k] = round(v.get_median_value(), 6) + + return stats + + def log(self, extras=None): + d = self.get(extras) + strs = ', '.join(str(dict({x: y})).strip('{}') for x, y in d.items()) + return strs diff --git a/ppocr/utils/utility.py b/ppocr/utils/utility.py new file mode 100755 index 00000000..6a81465c --- /dev/null +++ b/ppocr/utils/utility.py @@ -0,0 +1,71 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + + +def initial_logger(): + FORMAT = '%(asctime)s-%(levelname)s: %(message)s' + logging.basicConfig(level=logging.INFO, format=FORMAT) + logger = logging.getLogger(__name__) + return logger + + +import importlib + + +def create_module(module_str): + tmpss = module_str.split(",") + assert len(tmpss) == 2, "Error formate\ + of the module path: {}".format(module_str) + module_name, function_name = tmpss[0], tmpss[1] + somemodule = importlib.import_module(module_name, __package__) + function = getattr(somemodule, function_name) + return function + + +def get_check_global_params(mode): + check_params = ['use_gpu', 'max_text_length', 'image_shape',\ + 'image_shape', 'character_type', 'loss_type'] + if mode == "train_eval": + check_params = check_params + [\ + 'train_batch_size_per_card', 'test_batch_size_per_card'] + elif mode == "test": + check_params = check_params + ['test_batch_size_per_card'] + return check_params + + +def get_check_reader_params(mode): + check_params = [] + if mode == "train_eval": + check_params = ['TrainReader', 'EvalReader'] + elif mode == "test": + check_params = ['TestReader'] + return check_params + + +from paddle import fluid + + +def create_multi_devices_program(program, loss_var_name): + build_strategy = fluid.BuildStrategy() + build_strategy.memory_optimize = False + build_strategy.enable_inplace = True + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.num_iteration_per_drop_scope = 1 + compile_program = fluid.CompiledProgram(program).with_data_parallel( + loss_name=loss_var_name, + build_strategy=build_strategy, + exec_strategy=exec_strategy) + return compile_program diff --git a/tools/eval.py b/tools/eval.py new file mode 100755 index 00000000..14840f72 --- /dev/null +++ b/tools/eval.py @@ -0,0 +1,102 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +import program +from paddle import fluid +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.data.reader_main import reader_main +from ppocr.utils.save_load import init_model +from eval_utils.eval_det_utils import eval_det_run +from eval_utils.eval_rec_utils import test_rec_benchmark +from eval_utils.eval_rec_utils import eval_rec_run +from ppocr.utils.character import CharacterOps + + +def main(): + config = program.load_config(FLAGS.config) + program.merge_config(FLAGS.opt) + logger.info(config) + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config['Global']['use_gpu'] + program.check_gpu(True) + + alg = config['Global']['algorithm'] + assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE'] + if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']: + config['Global']['char_ops'] = CharacterOps(config['Global']) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + startup_prog = fluid.Program() + eval_program = fluid.Program() + eval_build_outputs = program.build( + config, eval_program, startup_prog, mode='test') + eval_fetch_name_list = eval_build_outputs[1] + eval_fetch_varname_list = eval_build_outputs[2] + eval_program = eval_program.clone(for_test=True) + exe = fluid.Executor(place) + exe.run(startup_prog) + + init_model(config, eval_program, exe) + + if alg in ['EAST', 'DB']: + eval_reader = reader_main(config=config, mode="test") + eval_info_dict = {'program':eval_program,\ + 'reader':eval_reader,\ + 'fetch_name_list':eval_fetch_name_list,\ + 'fetch_varname_list':eval_fetch_varname_list} + metrics = eval_det_run(exe, config, eval_info_dict, "test") + else: + dataset = config['Global']['dataset'] + assert dataset in ['lmdb', 'common'] + if dataset == 'common': + eval_reader = reader_main(config=config, mode="eval") + eval_info_dict = {'program': eval_program, \ + 'reader': eval_reader, \ + 'fetch_name_list': eval_fetch_name_list, \ + 'fetch_varname_list': eval_fetch_varname_list} + metrics = eval_rec_run(exe, config, eval_info_dict, "eval") + print("Eval result:", metrics) + else: + eval_info_dict = {'program':eval_program,\ + 'fetch_name_list':eval_fetch_name_list,\ + 'fetch_varname_list':eval_fetch_varname_list} + test_rec_benchmark(exe, config, eval_info_dict) + + +if __name__ == '__main__': + parser = program.ArgsParser() + FLAGS = parser.parse_args() + main() diff --git a/tools/eval_utils/__init__.py b/tools/eval_utils/__init__.py new file mode 100644 index 00000000..abf198b9 --- /dev/null +++ b/tools/eval_utils/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tools/eval_utils/eval_det_iou.py b/tools/eval_utils/eval_det_iou.py new file mode 100644 index 00000000..c6dacb3e --- /dev/null +++ b/tools/eval_utils/eval_det_iou.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from collections import namedtuple +import numpy as np +from shapely.geometry import Polygon + + +class DetectionIoUEvaluator(object): + def __init__(self, iou_constraint=0.5, area_precision_constraint=0.5): + self.iou_constraint = iou_constraint + self.area_precision_constraint = area_precision_constraint + + def evaluate_image(self, gt, pred): + def get_union(pD, pG): + return Polygon(pD).union(Polygon(pG)).area + + def get_intersection_over_union(pD, pG): + return get_intersection(pD, pG) / get_union(pD, pG) + + def get_intersection(pD, pG): + return Polygon(pD).intersection(Polygon(pG)).area + + def compute_ap(confList, matchList, numGtCare): + correct = 0 + AP = 0 + if len(confList) > 0: + confList = np.array(confList) + matchList = np.array(matchList) + sorted_ind = np.argsort(-confList) + confList = confList[sorted_ind] + matchList = matchList[sorted_ind] + for n in range(len(confList)): + match = matchList[n] + if match: + correct += 1 + AP += float(correct) / (n + 1) + + if numGtCare > 0: + AP /= numGtCare + + return AP + + perSampleMetrics = {} + + matchedSum = 0 + + Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax') + + numGlobalCareGt = 0 + numGlobalCareDet = 0 + + arrGlobalConfidences = [] + arrGlobalMatches = [] + + recall = 0 + precision = 0 + hmean = 0 + + detMatched = 0 + + iouMat = np.empty([1, 1]) + + gtPols = [] + detPols = [] + + gtPolPoints = [] + detPolPoints = [] + + # Array of Ground Truth Polygons' keys marked as don't Care + gtDontCarePolsNum = [] + # Array of Detected Polygons' matched with a don't Care GT + detDontCarePolsNum = [] + + pairs = [] + detMatchedNums = [] + + arrSampleConfidences = [] + arrSampleMatch = [] + + evaluationLog = "" + + # print(len(gt)) + for n in range(len(gt)): + points = gt[n]['points'] + # transcription = gt[n]['text'] + dontCare = gt[n]['ignore'] + points = Polygon(points) + points = points.buffer(0) + if not Polygon(points).is_valid or not Polygon(points).is_simple: + continue + + gtPol = points + gtPols.append(gtPol) + gtPolPoints.append(points) + if dontCare: + gtDontCarePolsNum.append(len(gtPols) - 1) + + evaluationLog += "GT polygons: " + str(len(gtPols)) + ( + " (" + str(len(gtDontCarePolsNum)) + " don't care)\n" + if len(gtDontCarePolsNum) > 0 else "\n") + + for n in range(len(pred)): + points = pred[n]['points'] + points = Polygon(points) + points = points.buffer(0) + if not Polygon(points).is_valid or not Polygon(points).is_simple: + continue + + detPol = points + detPols.append(detPol) + detPolPoints.append(points) + if len(gtDontCarePolsNum) > 0: + for dontCarePol in gtDontCarePolsNum: + dontCarePol = gtPols[dontCarePol] + intersected_area = get_intersection(dontCarePol, detPol) + pdDimensions = Polygon(detPol).area + precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions + if (precision > self.area_precision_constraint): + detDontCarePolsNum.append(len(detPols) - 1) + break + + evaluationLog += "DET polygons: " + str(len(detPols)) + ( + " (" + str(len(detDontCarePolsNum)) + " don't care)\n" + if len(detDontCarePolsNum) > 0 else "\n") + + if len(gtPols) > 0 and len(detPols) > 0: + # Calculate IoU and precision matrixs + outputShape = [len(gtPols), len(detPols)] + iouMat = np.empty(outputShape) + gtRectMat = np.zeros(len(gtPols), np.int8) + detRectMat = np.zeros(len(detPols), np.int8) + for gtNum in range(len(gtPols)): + for detNum in range(len(detPols)): + pG = gtPols[gtNum] + pD = detPols[detNum] + iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG) + + for gtNum in range(len(gtPols)): + for detNum in range(len(detPols)): + if gtRectMat[gtNum] == 0 and detRectMat[ + detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum: + if iouMat[gtNum, detNum] > self.iou_constraint: + gtRectMat[gtNum] = 1 + detRectMat[detNum] = 1 + detMatched += 1 + pairs.append({'gt': gtNum, 'det': detNum}) + detMatchedNums.append(detNum) + evaluationLog += "Match GT #" + \ + str(gtNum) + " with Det #" + str(detNum) + "\n" + + numGtCare = (len(gtPols) - len(gtDontCarePolsNum)) + numDetCare = (len(detPols) - len(detDontCarePolsNum)) + if numGtCare == 0: + recall = float(1) + precision = float(0) if numDetCare > 0 else float(1) + else: + recall = float(detMatched) / numGtCare + precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare + + hmean = 0 if (precision + recall) == 0 else 2.0 * \ + precision * recall / (precision + recall) + + matchedSum += detMatched + numGlobalCareGt += numGtCare + numGlobalCareDet += numDetCare + + perSampleMetrics = { + 'precision': precision, + 'recall': recall, + 'hmean': hmean, + 'pairs': pairs, + 'iouMat': [] if len(detPols) > 100 else iouMat.tolist(), + 'gtPolPoints': gtPolPoints, + 'detPolPoints': detPolPoints, + 'gtCare': numGtCare, + 'detCare': numDetCare, + 'gtDontCare': gtDontCarePolsNum, + 'detDontCare': detDontCarePolsNum, + 'detMatched': detMatched, + 'evaluationLog': evaluationLog + } + + return perSampleMetrics + + def combine_results(self, results): + numGlobalCareGt = 0 + numGlobalCareDet = 0 + matchedSum = 0 + for result in results: + numGlobalCareGt += result['gtCare'] + numGlobalCareDet += result['detCare'] + matchedSum += result['detMatched'] + + methodRecall = 0 if numGlobalCareGt == 0 else float( + matchedSum) / numGlobalCareGt + methodPrecision = 0 if numGlobalCareDet == 0 else float( + matchedSum) / numGlobalCareDet + methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * \ + methodRecall * methodPrecision / (methodRecall + methodPrecision) + # print(methodRecall, methodPrecision, methodHmean) + # sys.exit(-1) + methodMetrics = { + 'precision': methodPrecision, + 'recall': methodRecall, + 'hmean': methodHmean + } + + return methodMetrics + + +if __name__ == '__main__': + evaluator = DetectionIoUEvaluator() + gts = [[{ + 'points': [(0, 0), (1, 0), (1, 1), (0, 1)], + 'text': 1234, + 'ignore': False, + }, { + 'points': [(2, 2), (3, 2), (3, 3), (2, 3)], + 'text': 5678, + 'ignore': False, + }]] + preds = [[{ + 'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)], + 'text': 123, + 'ignore': False, + }]] + results = [] + for gt, pred in zip(gts, preds): + results.append(evaluator.evaluate_image(gt, pred)) + metrics = evaluator.combine_results(results) + print(metrics) diff --git a/tools/eval_utils/eval_det_utils.py b/tools/eval_utils/eval_det_utils.py new file mode 100644 index 00000000..015cba99 --- /dev/null +++ b/tools/eval_utils/eval_det_utils.py @@ -0,0 +1,131 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import numpy as np + +import paddle.fluid as fluid + +__all__ = ['eval_det_run'] + +import logging +FORMAT = '%(asctime)s-%(levelname)s: %(message)s' +logging.basicConfig(level=logging.INFO, format=FORMAT) +logger = logging.getLogger(__name__) + +from ppocr.utils.utility import create_module +from .eval_det_iou import DetectionIoUEvaluator +import json +from copy import deepcopy +import cv2 +from ppocr.data.reader_main import reader_main + + +def cal_det_res(exe, config, eval_info_dict): + global_params = config['Global'] + save_res_path = global_params['save_res_path'] + postprocess_params = deepcopy(config["PostProcess"]) + postprocess_params.update(global_params) + postprocess = create_module(postprocess_params['function']) \ + (params=postprocess_params) + with open(save_res_path, "wb") as fout: + tackling_num = 0 + for data in eval_info_dict['reader'](): + img_num = len(data) + tackling_num = tackling_num + img_num + logger.info("test tackling num:%d", tackling_num) + img_list = [] + ratio_list = [] + img_name_list = [] + for ino in range(img_num): + img_list.append(data[ino][0]) + ratio_list.append(data[ino][1]) + img_name_list.append(data[ino][2]) + img_list = np.concatenate(img_list, axis=0) + outs = exe.run(eval_info_dict['program'], \ + feed={'image': img_list}, \ + fetch_list=eval_info_dict['fetch_varname_list']) + outs_dict = {} + for tno in range(len(outs)): + fetch_name = eval_info_dict['fetch_name_list'][tno] + fetch_value = np.array(outs[tno]) + outs_dict[fetch_name] = fetch_value + dt_boxes_list = postprocess(outs_dict, ratio_list) + for ino in range(img_num): + dt_boxes = dt_boxes_list[ino] + img_name = img_name_list[ino] + dt_boxes_json = [] + for box in dt_boxes: + tmp_json = {"transcription": ""} + tmp_json['points'] = box.tolist() + dt_boxes_json.append(tmp_json) + otstr = img_name + "\t" + json.dumps(dt_boxes_json) + "\n" + fout.write(otstr.encode()) + return + + +def load_label_infor(label_file_path, do_ignore=False): + img_name_label_dict = {} + with open(label_file_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + substr = line.decode().strip("\n").split("\t") + bbox_infor = json.loads(substr[1]) + bbox_num = len(bbox_infor) + for bno in range(bbox_num): + text = bbox_infor[bno]['transcription'] + ignore = False + if text == "###" and do_ignore: + ignore = True + bbox_infor[bno]['ignore'] = ignore + img_name_label_dict[substr[0]] = bbox_infor + return img_name_label_dict + + +def cal_det_metrics(gt_label_path, save_res_path): + evaluator = DetectionIoUEvaluator() + gt_label_infor = load_label_infor(gt_label_path, do_ignore=True) + dt_label_infor = load_label_infor(save_res_path) + results = [] + for img_name in gt_label_infor: + gt_label = gt_label_infor[img_name] + if img_name not in dt_label_infor: + dt_label = [] + else: + dt_label = dt_label_infor[img_name] + result = evaluator.evaluate_image(gt_label, dt_label) + results.append(result) + methodMetrics = evaluator.combine_results(results) + return methodMetrics + + +def eval_det_run(exe, config, eval_info_dict, mode): + cal_det_res(exe, config, eval_info_dict) + + save_res_path = config['Global']['save_res_path'] + if mode == "eval": + gt_label_path = config['EvalReader']['label_file_path'] + metrics = cal_det_metrics(gt_label_path, save_res_path) + else: + gt_label_path = config['TestReader']['label_file_path'] + do_eval = config['TestReader']['do_eval'] + if do_eval: + metrics = cal_det_metrics(gt_label_path, save_res_path) + else: + metrics = {} + return metrics diff --git a/tools/eval_utils/eval_rec_utils.py b/tools/eval_utils/eval_rec_utils.py new file mode 100644 index 00000000..2d7d7e1d --- /dev/null +++ b/tools/eval_utils/eval_rec_utils.py @@ -0,0 +1,111 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging +import numpy as np + +import paddle.fluid as fluid + +__all__ = ['eval_rec_run', 'test_rec_benchmark'] + +import logging + +FORMAT = '%(asctime)s-%(levelname)s: %(message)s' +logging.basicConfig(level=logging.INFO, format=FORMAT) +logger = logging.getLogger(__name__) + +from ppocr.utils.character import cal_predicts_accuracy +from ppocr.utils.character import convert_rec_label_to_lod +from ppocr.utils.character import convert_rec_attention_infer_res +from ppocr.utils.utility import create_module +import json +from copy import deepcopy +import cv2 +from ppocr.data.reader_main import reader_main + + +def eval_rec_run(exe, config, eval_info_dict, mode): + """ + Run evaluation program, return program outputs. + """ + char_ops = config['Global']['char_ops'] + total_loss = 0 + total_sample_num = 0 + total_acc_num = 0 + total_batch_num = 0 + if mode == "eval": + is_remove_duplicate = False + else: + is_remove_duplicate = True + + for data in eval_info_dict['reader'](): + img_num = len(data) + img_list = [] + label_list = [] + for ino in range(img_num): + img_list.append(data[ino][0]) + label_list.append(data[ino][1]) + img_list = np.concatenate(img_list, axis=0) + outs = exe.run(eval_info_dict['program'], \ + feed={'image': img_list}, \ + fetch_list=eval_info_dict['fetch_varname_list'], \ + return_numpy=False) + preds = np.array(outs[0]) + if preds.shape[1] != 1: + preds, preds_lod = convert_rec_attention_infer_res(preds) + else: + preds_lod = outs[0].lod()[0] + labels, labels_lod = convert_rec_label_to_lod(label_list) + acc, acc_num, sample_num = cal_predicts_accuracy( + char_ops, preds, preds_lod, labels, labels_lod, is_remove_duplicate) + total_acc_num += acc_num + total_sample_num += sample_num + total_batch_num += 1 + avg_acc = total_acc_num * 1.0 / total_sample_num + metrics = {'avg_acc': avg_acc, "total_acc_num": total_acc_num, \ + "total_sample_num": total_sample_num} + return metrics + + +def test_rec_benchmark(exe, config, eval_info_dict): + " 评估lmdb 数据" + eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', \ + 'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80'] + eval_data_dir = config['TestReader']['lmdb_sets_dir'] + total_evaluation_data_number = 0 + total_correct_number = 0 + eval_data_acc_info = {} + for eval_data in eval_data_list: + config['TestReader']['lmdb_sets_dir'] = \ + eval_data_dir + "/" + eval_data + eval_reader = reader_main(config=config, mode="test") + eval_info_dict['reader'] = eval_reader + metrics = eval_rec_run(exe, config, eval_info_dict, "test") + total_evaluation_data_number += metrics['total_sample_num'] + total_correct_number += metrics['total_acc_num'] + eval_data_acc_info[eval_data] = metrics + + avg_acc = total_correct_number * 1.0 / total_evaluation_data_number + logger.info('-' * 50) + strs = "" + for eval_data in eval_data_list: + eval_acc = eval_data_acc_info[eval_data]['avg_acc'] + strs += "\n {}, accuracy:{:.6f}".format(eval_data, eval_acc) + strs += "\n average, accuracy:{:.6f}".format(avg_acc) + logger.info(strs) + logger.info('-' * 50) diff --git a/tools/export_model.py b/tools/export_model.py new file mode 100644 index 00000000..6c924f3f --- /dev/null +++ b/tools/export_model.py @@ -0,0 +1,88 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import time +import multiprocessing +import numpy as np + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +import program +from paddle import fluid +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.utils.save_load import init_model +from ppocr.utils.character import CharacterOps +from ppocr.utils.utility import create_module + + +def main(): + config = program.load_config(FLAGS.config) + program.merge_config(FLAGS.opt) + logger.info(config) + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config['Global']['use_gpu'] + program.check_gpu(True) + + alg = config['Global']['algorithm'] + assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE'] + if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']: + config['Global']['char_ops'] = CharacterOps(config['Global']) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + startup_prog = fluid.Program() + eval_program = fluid.Program() + + feeded_var_names, target_vars, fetches_var_name = program.build_export( + config, eval_program, startup_prog) + eval_program = eval_program.clone(for_test=True) + exe = fluid.Executor(place) + exe.run(startup_prog) + + init_model(config, eval_program, exe) + + fluid.io.save_inference_model( + dirname="./output/", + feeded_var_names=feeded_var_names, + main_program=eval_program, + target_vars=target_vars, + executor=exe, + model_filename='model', + params_filename='params') + print("save success, output_name_list:", fetches_var_name) + + +if __name__ == '__main__': + parser = program.ArgsParser() + FLAGS = parser.parse_args() + main() diff --git a/tools/infer/det_program.txt b/tools/infer/det_program.txt new file mode 100644 index 00000000..0da7070f --- /dev/null +++ b/tools/infer/det_program.txt @@ -0,0 +1 @@ + diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py new file mode 100755 index 00000000..d830306e --- /dev/null +++ b/tools/infer/predict_det.py @@ -0,0 +1,169 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import utility +from ppocr.utils.utility import initial_logger +logger = initial_logger() +import cv2 +from ppocr.data.det.east_process import EASTProcessTest +from ppocr.data.det.db_process import DBProcessTest +from ppocr.postprocess.db_postprocess import DBPostProcess +from ppocr.postprocess.east_postprocess import EASTPostPocess +import copy +import numpy as np +import math +import time + + +class TextDetector(object): + def __init__(self, args): + max_side_len = args.det_max_side_len + self.det_algorithm = args.det_algorithm + preprocess_params = {'max_side_len': max_side_len} + postprocess_params = {} + if self.det_algorithm == "DB": + self.preprocess_op = DBProcessTest(preprocess_params) + postprocess_params["thresh"] = args.det_db_thresh + postprocess_params["box_thresh"] = args.det_db_box_thresh + postprocess_params["max_candidates"] = 1000 + self.postprocess_op = DBPostProcess(postprocess_params) + elif self.det_algorithm == "EAST": + self.preprocess_op = EASTProcessTest(preprocess_params) + postprocess_params["score_thresh"] = args.det_east_score_thresh + postprocess_params["cover_thresh"] = args.det_east_cover_thresh + postprocess_params["nms_thresh"] = args.det_east_nms_thresh + self.postprocess_op = EASTPostPocess(postprocess_params) + else: + logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) + sys.exit(0) + + self.predictor, self.input_tensor, self.output_tensors =\ + utility.create_predictor(args, mode="det") + + def order_points_clockwise(self, pts): + ####### + ## https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py + ######## + # sort the points based on their x-coordinates + xSorted = pts[np.argsort(pts[:, 0]), :] + + # grab the left-most and right-most points from the sorted + # x-roodinate points + leftMost = xSorted[:2, :] + rightMost = xSorted[2:, :] + + # now, sort the left-most coordinates according to their + # y-coordinates so we can grab the top-left and bottom-left + # points, respectively + leftMost = leftMost[np.argsort(leftMost[:, 1]), :] + (tl, bl) = leftMost + + rightMost = rightMost[np.argsort(rightMost[:, 1]), :] + (tr, br) = rightMost + + rect = np.array([tl, tr, br, bl], dtype="float32") + return rect + + def expand_det_res(self, points, bbox_height, bbox_width, img_height, + img_width): + if bbox_height * 1.0 / bbox_width >= 2.0: + expand_w = bbox_width * 0.20 + expand_h = bbox_width * 0.20 + elif bbox_width * 1.0 / bbox_height >= 3.0: + expand_w = bbox_height * 0.20 + expand_h = bbox_height * 0.20 + else: + expand_w = bbox_height * 0.1 + expand_h = bbox_height * 0.1 + + points[0, 0] = int(max((points[0, 0] - expand_w), 0)) + points[1, 0] = int(min((points[1, 0] + expand_w), img_width)) + points[3, 0] = int(max((points[3, 0] - expand_w), 0)) + points[2, 0] = int(min((points[2, 0] + expand_w), img_width)) + + points[0, 1] = int(max((points[0, 1] - expand_h), 0)) + points[1, 1] = int(max((points[1, 1] - expand_h), 0)) + points[3, 1] = int(min((points[3, 1] + expand_h), img_height)) + points[2, 1] = int(min((points[2, 1] + expand_h), img_height)) + return points + + def filter_tag_det_res(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.order_points_clockwise(box) + left = int(np.min(box[:, 0])) + right = int(np.max(box[:, 0])) + top = int(np.min(box[:, 1])) + bottom = int(np.max(box[:, 1])) + bbox_height = bottom - top + bbox_width = right - left + diffh = math.fabs(box[0, 1] - box[1, 1]) + diffw = math.fabs(box[0, 0] - box[3, 0]) + rect_width = int(np.linalg.norm(box[0] - box[1])) + rect_height = int(np.linalg.norm(box[0] - box[3])) + if rect_width <= 10 or rect_height <= 10: + continue + if diffh <= 10 and diffw <= 10: + box = self.expand_det_res( + copy.deepcopy(box), bbox_height, bbox_width, img_height, + img_width) + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def __call__(self, img): + ori_im = img.copy() + im, ratio_list = self.preprocess_op(img) + if im is None: + return None, 0 + im = im.copy() + starttime = time.time() + self.input_tensor.copy_from_cpu(im) + self.predictor.zero_copy_run() + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + outs_dict = {} + if self.det_algorithm == "EAST": + outs_dict['f_score'] = outputs[0] + outs_dict['f_geo'] = outputs[1] + else: + outs_dict['maps'] = [outputs[0]] + dt_boxes_list = self.postprocess_op(outs_dict, [ratio_list]) + dt_boxes = dt_boxes_list[0] + dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape) + elapse = time.time() - starttime + return dt_boxes, elapse + + +if __name__ == "__main__": + args = utility.parse_args() + image_file_list = utility.get_image_file_list(args.image_dir) + text_detector = TextDetector(args) + count = 0 + total_time = 0 + for image_file in image_file_list: + img = cv2.imread(image_file) + if img is None: + logger.info("error in loading image:{}".format(image_file)) + continue + dt_boxes, elapse = text_detector(img) + if count > 0: + total_time += elapse + count += 1 + print("Predict time of %s:" % image_file, elapse) + utility.draw_text_det_res(dt_boxes, image_file) + print("Avg Time:", total_time / (count - 1)) diff --git a/tools/infer/predict_eval.py b/tools/infer/predict_eval.py new file mode 100755 index 00000000..53ee7e2d --- /dev/null +++ b/tools/infer/predict_eval.py @@ -0,0 +1,76 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import utility +from ppocr.utils.utility import initial_logger +logger = initial_logger() +import cv2 +import predict_system +import copy +import numpy as np +import math +import time +import json + +if __name__ == "__main__": + args = utility.parse_args() + text_sys = predict_system.TextSystem(args) + + image_file_list = [] + label_file_path = "./eval_perform/gt_res/test_chinese_ic15_500_4pts.txt" + img_set_path = "./eval_perform/" + with open(label_file_path, "rb") as fin: + lines = fin.readlines() + for line in lines: + substr = line.decode('utf-8').strip("\n").split("\t") + if "lsvt" in substr[0]: + continue + image_file_list.append(substr[0]) + + total_time_all = 0 + count = 0 + save_path = "./output/predict.txt" + fout = open(save_path, "wb") + for image_name in image_file_list: + image_file = img_set_path + image_name + img = cv2.imread(image_file) + if img is None: + logger.info("error in loading image:{}".format(image_file)) + continue + count += 1 + total_time = 0 + starttime = time.time() + dt_boxes, rec_res = text_sys(img) + elapse = time.time() - starttime + total_time_all += elapse + print("Predict time of %s(%d): %.3fs" % (image_file, count, elapse)) + dt_num = len(dt_boxes) + bbox_list = [] + for dno in range(dt_num): + box = dt_boxes[dno] + text, score = rec_res[dno] + points = [] + for tno in range(len(box)): + points.append([box[tno][0] * 1.0, box[tno][1] * 1.0]) + bbox_list.append({ + "transcription": text, + "points": points, + "scores": score * 1.0 + }) + otstr = image_name + "\t" + json.dumps(bbox_list) + "\n" + fout.write(otstr.encode('utf-8')) + avg_time = total_time_all / count + logger.info("avg_time: {0}".format(avg_time)) + logger.info("avg_fps: {0}".format(1.0 / avg_time)) + fout.close() diff --git a/tools/infer/predict_eval_new.py b/tools/infer/predict_eval_new.py new file mode 100755 index 00000000..514b1ad4 --- /dev/null +++ b/tools/infer/predict_eval_new.py @@ -0,0 +1,72 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import utility +from ppocr.utils.utility import initial_logger +logger = initial_logger() +import cv2 +import predict_system +import copy +import numpy as np +import math +import time +import json +import os + +if __name__ == "__main__": + args = utility.parse_args() + text_sys = predict_system.TextSystem(args) + + image_file_list = [] + img_set_path = "/paddle/code/dyn/test_imgs/rctw_samples/" + image_file_list = os.listdir(img_set_path) + + total_time_all = 0 + count = 0 + save_path = "./output/predict.txt" + fout = open(save_path, "wb") + for image_name in image_file_list: + image_file = img_set_path + image_name + img = cv2.imread(image_file) + if img is None: + logger.info("error in loading image:{}".format(image_file)) + continue + count += 1 + starttime = time.time() + dt_boxes, rec_res = text_sys(img) + if dt_boxes is None: + count -= 1 + continue + elapse = time.time() - starttime + total_time_all += elapse + print("Predict time of %s(%d): %.3fs" % (image_file, count, elapse)) + dt_num = len(dt_boxes) + bbox_list = [] + for dno in range(dt_num): + box = dt_boxes[dno] + text, score = rec_res[dno] + points = [] + for tno in range(len(box)): + points.append([box[tno][0] * 1.0, box[tno][1] * 1.0]) + bbox_list.append({ + "transcription": text, + "points": points, + "scores": score * 1.0 + }) + otstr = image_name + "\t" + json.dumps(bbox_list) + "\n" + fout.write(otstr.encode('utf-8')) + avg_time = total_time_all / count + logger.info("avg_time: {0}".format(avg_time)) + logger.info("avg_fps: {0}".format(1.0 / avg_time)) + fout.close() diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py new file mode 100755 index 00000000..42d4059c --- /dev/null +++ b/tools/infer/predict_rec.py @@ -0,0 +1,115 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import utility +from ppocr.utils.utility import initial_logger +logger = initial_logger() +import cv2 + +import copy +import numpy as np +import math +import time +from ppocr.utils.character import CharacterOps + + +class TextRecognizer(object): + def __init__(self, args): + self.predictor, self.input_tensor, self.output_tensors =\ + utility.create_predictor(args, mode="rec") + image_shape = [int(v) for v in args.rec_image_shape.split(",")] + self.rec_image_shape = image_shape + char_ops_params = {} + char_ops_params["character_type"] = args.rec_char_type + char_ops_params["character_dict_path"] = args.rec_char_dict_path + char_ops_params['loss_type'] = 'ctc' + self.char_ops = CharacterOps(char_ops_params) + + def resize_norm_img(self, img): + imgC, imgH, imgW = self.rec_image_shape + h = img.shape[0] + w = img.shape[1] + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + def __call__(self, img_list): + img_num = len(img_list) + batch_num = 15 + rec_res = [] + predict_time = 0 + for beg_img_no in range(0, img_num, batch_num): + end_img_no = min(img_num, beg_img_no + batch_num) + norm_img_batch = [] + for ino in range(beg_img_no, end_img_no): + norm_img = self.resize_norm_img(img_list[ino]) + norm_img = norm_img[np.newaxis, :] + norm_img_batch.append(norm_img) + norm_img_batch = np.concatenate(norm_img_batch) + norm_img_batch = norm_img_batch.copy() + starttime = time.time() + self.input_tensor.copy_from_cpu(norm_img_batch) + self.predictor.zero_copy_run() + rec_idx_batch = self.output_tensors[0].copy_to_cpu() + rec_idx_lod = self.output_tensors[0].lod()[0] + predict_batch = self.output_tensors[1].copy_to_cpu() + predict_lod = self.output_tensors[1].lod()[0] + elapse = time.time() - starttime + predict_time += elapse + starttime = time.time() + for rno in range(len(rec_idx_lod) - 1): + beg = rec_idx_lod[rno] + end = rec_idx_lod[rno + 1] + rec_idx_tmp = rec_idx_batch[beg:end, 0] + preds_text = self.char_ops.decode(rec_idx_tmp) + beg = predict_lod[rno] + end = predict_lod[rno + 1] + probs = predict_batch[beg:end, :] + ind = np.argmax(probs, axis=1) + blank = probs.shape[1] + valid_ind = np.where(ind != (blank - 1))[0] + score = np.mean(probs[valid_ind, ind[valid_ind]]) + rec_res.append([preds_text, score]) + return rec_res, predict_time + + +if __name__ == "__main__": + args = utility.parse_args() + image_file_list = utility.get_image_file_list(args.image_dir) + text_recognizer = TextRecognizer(args) + valid_image_file_list = [] + img_list = [] + for image_file in image_file_list: + img = cv2.imread(image_file) + if img is None: + logger.info("error in loading image:{}".format(image_file)) + continue + valid_image_file_list.append(image_file) + img_list.append(img) + rec_res, predict_time = text_recognizer(img_list) + rec_res, predict_time = text_recognizer(img_list) + for ino in range(len(img_list)): + print("Predicts of %s:%s" % (valid_image_file_list[ino], rec_res[ino])) + print("Total predict time for %d images:%.3f" % + (len(img_list), predict_time)) diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py new file mode 100755 index 00000000..defa0615 --- /dev/null +++ b/tools/infer/predict_system.py @@ -0,0 +1,97 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import utility +from ppocr.utils.utility import initial_logger +logger = initial_logger() +import cv2 +import predict_det +import predict_rec +import copy +import numpy as np +import math +import time + + +class TextSystem(object): + def __init__(self, args): + self.text_detector = predict_det.TextDetector(args) + self.text_recognizer = predict_rec.TextRecognizer(args) + + def get_rotate_crop_image(self, img, points): + img_height, img_width = img.shape[0:2] + left = int(np.min(points[:, 0])) + right = int(np.max(points[:, 0])) + top = int(np.min(points[:, 1])) + bottom = int(np.max(points[:, 1])) + img_crop = img[top:bottom, left:right, :].copy() + points[:, 0] = points[:, 0] - left + points[:, 1] = points[:, 1] - top + img_crop_width = int(np.linalg.norm(points[0] - points[1])) + img_crop_height = int(np.linalg.norm(points[0] - points[3])) + pts_std = np.float32([[0, 0], [img_crop_width, 0],\ + [img_crop_width, img_crop_height], [0, img_crop_height]]) + M = cv2.getPerspectiveTransform(points, pts_std) + dst_img = cv2.warpPerspective( + img_crop, + M, (img_crop_width, img_crop_height), + borderMode=cv2.BORDER_REPLICATE) + dst_img_height, dst_img_width = dst_img.shape[0:2] + if dst_img_height * 1.0 / dst_img_width >= 1.5: + dst_img = np.rot90(dst_img) + return dst_img + + def print_draw_crop_rec_res(self, img_crop_list, rec_res): + bbox_num = len(img_crop_list) + for bno in range(bbox_num): + cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno]) + print(bno, rec_res[bno]) + + def __call__(self, img): + ori_im = img.copy() + dt_boxes, elapse = self.text_detector(img) + if dt_boxes is None: + return None, None + img_crop_list = [] + for bno in range(len(dt_boxes)): + tmp_box = copy.deepcopy(dt_boxes[bno]) + img_crop = self.get_rotate_crop_image(ori_im, tmp_box) + img_crop_list.append(img_crop) + rec_res, elapse = self.text_recognizer(img_crop_list) + # self.print_draw_crop_rec_res(img_crop_list, rec_res) + return dt_boxes, rec_res + + +if __name__ == "__main__": + args = utility.parse_args() + image_file_list = utility.get_image_file_list(args.image_dir) + text_sys = TextSystem(args) + for image_file in image_file_list: + img = cv2.imread(image_file) + if img is None: + logger.info("error in loading image:{}".format(image_file)) + continue + starttime = time.time() + dt_boxes, rec_res = text_sys(img) + elapse = time.time() - starttime + print("Predict time of %s: %.3fs" % (image_file, elapse)) + dt_num = len(dt_boxes) + dt_boxes_final = [] + for dno in range(dt_num): + text, score = rec_res[dno] + if score >= 0: + text_str = "%s, %.3f" % (text, score) + print(text_str) + dt_boxes_final.append(dt_boxes[dno]) + utility.draw_text_det_res(dt_boxes_final, image_file) diff --git a/tools/infer/utility.py b/tools/infer/utility.py new file mode 100755 index 00000000..a4f9f03d --- /dev/null +++ b/tools/infer/utility.py @@ -0,0 +1,147 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os, sys +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from paddle.fluid.core import PaddleTensor +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import create_paddle_predictor +import cv2 +import numpy as np + + +def parse_args(): + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + #params for prediction engine + parser.add_argument("--use_gpu", type=str2bool, default=True) + parser.add_argument("--ir_optim", type=str2bool, default=True) + parser.add_argument("--use_tensorrt", type=str2bool, default=False) + parser.add_argument("--gpu_mem", type=int, default=8000) + + #params for text detector + parser.add_argument("--image_dir", type=str) + parser.add_argument("--det_algorithm", type=str, default='DB') + parser.add_argument("--det_model_dir", type=str) + parser.add_argument("--det_max_side_len", type=float, default=960) + + #DB parmas + parser.add_argument("--det_db_thresh", type=float, default=0.3) + parser.add_argument("--det_db_box_thresh", type=float, default=0.5) + + #EAST parmas + parser.add_argument("--det_east_score_thresh", type=float, default=0.8) + parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) + parser.add_argument("--det_east_nms_thresh", type=float, default=0.2) + + #params for text recognizer + parser.add_argument("--rec_algorithm", type=str, default='CRNN') + parser.add_argument("--rec_model_dir", type=str) + parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") + parser.add_argument("--rec_char_type", type=str, default='ch') + parser.add_argument( + "--rec_char_dict_path", + type=str, + default="./ppocr/utils/ppocr_keys_v1.txt") + return parser.parse_args() + + +def get_image_file_list(image_dir): + image_file_list = [] + if image_dir is None: + return image_file_list + if os.path.isfile(image_dir): + image_file_list = [image_dir] + elif os.path.isdir(image_dir): + for single_file in os.listdir(image_dir): + image_file_list.append(os.path.join(image_dir, single_file)) + return image_file_list + + +def create_predictor(args, mode): + if mode == "det": + model_dir = args.det_model_dir + else: + model_dir = args.rec_model_dir + + if model_dir is None: + logger.info("not find {} model file path {}".format(mode, model_dir)) + sys.exit(0) + model_file_path = model_dir + "/model" + params_file_path = model_dir + "/params" + if not os.path.exists(model_file_path): + logger.info("not find model file path {}".format(model_file_path)) + sys.exit(0) + if not os.path.exists(params_file_path): + logger.info("not find params file path {}".format(params_file_path)) + sys.exit(0) + + config = AnalysisConfig(model_file_path, params_file_path) + + if args.use_gpu: + config.enable_use_gpu(args.gpu_mem, 0) + else: + config.disable_gpu() + + config.disable_glog_info() + config.switch_ir_optim(args.ir_optim) + # if args.use_tensorrt: + # config.enable_tensorrt_engine( + # precision_mode=AnalysisConfig.Precision.Half + # if args.use_fp16 else AnalysisConfig.Precision.Float32, + # max_batch_size=args.batch_size) + + config.enable_memory_optim() + # use zero copy + config.switch_use_feed_fetch_ops(False) + predictor = create_paddle_predictor(config) + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_tensor(input_names[0]) + output_names = predictor.get_output_names() + output_tensors = [] + for output_name in output_names: + output_tensor = predictor.get_output_tensor(output_name) + output_tensors.append(output_tensor) + return predictor, input_tensor, output_tensors + + +def draw_text_det_res(dt_boxes, img_path): + src_im = cv2.imread(img_path) + for box in dt_boxes: + box = np.array(box).astype(np.int32).reshape(-1, 2) + cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) + img_name_pure = img_path.split("/")[-1] + cv2.imwrite("./output/%s" % img_name_pure, src_im) + + +if __name__ == '__main__': + args = parse_args() + args.use_gpu = False + root_path = "/Users/liuweiwei06/Desktop/TEST_CODES/icode/baidu/personal-code/PaddleOCR/" + args.det_model_dir = root_path + "test_models/public_v1/ch_det_mv3_db" + + predictor, input_tensor, output_tensors = create_predictor(args, mode='det') + print(predictor.get_input_names()) + print(predictor.get_output_names()) + print(predictor.program(), file=open("det_program.txt", 'w')) + + args.rec_model_dir = root_path + "test_models/public_v1/ch_rec_mv3_crnn/" + rec_predictor, input_tensor, output_tensors = create_predictor( + args, mode='rec') + print(rec_predictor.get_input_names()) + print(rec_predictor.get_output_names()) diff --git a/tools/infer_rec.py b/tools/infer_rec.py new file mode 100755 index 00000000..71977391 --- /dev/null +++ b/tools/infer_rec.py @@ -0,0 +1,125 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +import multiprocessing +import numpy as np + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +from paddle import fluid + +# from ppocr.utils.utility import load_config, merge_config +from ppocr.data.reader_main import test_reader +import program +from paddle import fluid +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.data.reader_main import reader_main +from ppocr.utils.save_load import init_model +from ppocr.utils.character import CharacterOps +from ppocr.utils.utility import create_module + +logger = initial_logger() + + +def main(): + config = program.load_config(FLAGS.config) + program.merge_config(FLAGS.opt) + logger.info(config) + char_ops = CharacterOps(config['Global']) + config['Global']['char_ops'] = char_ops + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config['Global']['use_gpu'] + # check_gpu(use_gpu) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + rec_model = create_module(config['Architecture']['function'])(params=config) + + startup_prog = fluid.Program() + eval_prog = fluid.Program() + with fluid.program_guard(eval_prog, startup_prog): + with fluid.unique_name.guard(): + _, outputs = rec_model(mode="test") + fetch_name_list = list(outputs.keys()) + fetch_varname_list = [outputs[v].name for v in fetch_name_list] + eval_prog = eval_prog.clone(for_test=True) + exe.run(startup_prog) + + init_model(config, eval_prog, exe) + + blobs = reader_main(config, 'test') + imgs = next(blobs()) + for img in imgs: + predict = exe.run(program=eval_prog, + feed={"image": img}, + fetch_list=fetch_varname_list, + return_numpy=False) + + preds = np.array(predict[0]) + if preds.shape[1] == 1: + preds = preds.reshape(-1) + preds_lod = predict[0].lod()[0] + preds_text = char_ops.decode(preds) + else: + end_pos = np.where(preds[0, :] == 1)[0] + if len(end_pos) <= 1: + preds_text = preds[0, 1:] + else: + preds_text = preds[0, 1:end_pos[1]] + preds_text = preds_text.reshape(-1) + preds_text = char_ops.decode(preds_text) + + print(preds) + print(preds_text) + + # save for inference model + target_var = [] + for key, values in outputs.items(): + target_var.append(values) + + fluid.io.save_inference_model( + "./output/", + feeded_var_names=['image'], + target_vars=target_var, + executor=exe, + main_program=eval_prog, + model_filename="model", + params_filename="params") + + +if __name__ == '__main__': + parser = program.ArgsParser() + FLAGS = parser.parse_args() + main() diff --git a/tools/program.py b/tools/program.py new file mode 100755 index 00000000..a34e56ca --- /dev/null +++ b/tools/program.py @@ -0,0 +1,365 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from argparse import ArgumentParser, RawDescriptionHelpFormatter +import sys +import yaml +import os +from ppocr.utils.utility import create_module +from ppocr.utils.utility import initial_logger +logger = initial_logger() + +import paddle.fluid as fluid +import time +from ppocr.utils.stats import TrainingStats +from eval_utils.eval_det_utils import eval_det_run +from eval_utils.eval_rec_utils import eval_rec_run +from ppocr.utils.save_load import save_model +import numpy as np +from ppocr.utils.character import cal_predicts_accuracy + + +class ArgsParser(ArgumentParser): + def __init__(self): + super(ArgsParser, self).__init__( + formatter_class=RawDescriptionHelpFormatter) + self.add_argument("-c", "--config", help="configuration file to use") + self.add_argument( + "-o", "--opt", nargs='+', help="set configuration options") + + def parse_args(self, argv=None): + args = super(ArgsParser, self).parse_args(argv) + assert args.config is not None, \ + "Please specify --config=configure_file_path." + args.opt = self._parse_opt(args.opt) + return args + + def _parse_opt(self, opts): + config = {} + if not opts: + return config + for s in opts: + s = s.strip() + k, v = s.split('=') + config[k] = yaml.load(v, Loader=yaml.Loader) + return config + + +class AttrDict(dict): + """Single level attribute dict, NOT recursive""" + + def __init__(self, **kwargs): + super(AttrDict, self).__init__() + super(AttrDict, self).update(kwargs) + + def __getattr__(self, key): + if key in self: + return self[key] + raise AttributeError("object has no attribute '{}'".format(key)) + + +global_config = AttrDict() + + +def load_config(file_path): + """ + Load config from yml/yaml file. + + Args: + file_path (str): Path of the config file to be loaded. + + Returns: global config + """ + _, ext = os.path.splitext(file_path) + assert ext in ['.yml', '.yaml'], "only support yaml files for now" + merge_config(yaml.load(open(file_path), Loader=yaml.Loader)) + assert "reader_yml" in global_config['Global'],\ + "absence reader_yml in global" + reader_file_path = global_config['Global']['reader_yml'] + _, ext = os.path.splitext(reader_file_path) + assert ext in ['.yml', '.yaml'], "only support yaml files for reader" + merge_config(yaml.load(open(reader_file_path), Loader=yaml.Loader)) + return global_config + + +def merge_config(config): + """ + Merge config into global config. + + Args: + config (dict): Config to be merged. + + Returns: global config + """ + for key, value in config.items(): + if "." not in key: + if isinstance(value, dict) and key in global_config: + global_config[key].update(value) + else: + global_config[key] = value + else: + sub_keys = key.split('.') + assert (sub_keys[0] in global_config) + cur = global_config[sub_keys[0]] + for idx, sub_key in enumerate(sub_keys[1:]): + assert (sub_key in cur) + if idx == len(sub_keys) - 2: + cur[sub_key] = value + else: + cur = cur[sub_key] + + +def check_gpu(use_gpu): + """ + Log error and exit when set use_gpu=true in paddlepaddle + cpu version. + """ + err = "Config use_gpu cannot be set as true while you are " \ + "using paddlepaddle cpu version ! \nPlease try: \n" \ + "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ + "\t2. Set use_gpu as false in config file to run " \ + "model on CPU" + + try: + if use_gpu and not fluid.is_compiled_with_cuda(): + logger.error(err) + sys.exit(1) + except Exception as e: + pass + + +def build(config, main_prog, startup_prog, mode): + """ + Build a program using a model and an optimizer + 1. create feeds + 2. create a dataloader + 3. create a model + 4. create fetchs + 5. create an optimizer + + Args: + config(dict): config + main_prog(): main program + startup_prog(): startup program + is_train(bool): train or valid + + Returns: + dataloader(): a bridge between the model and the data + fetchs(dict): dict of model outputs(included loss and measures) + """ + with fluid.program_guard(main_prog, startup_prog): + with fluid.unique_name.guard(): + func_infor = config['Architecture']['function'] + model = create_module(func_infor)(params=config) + dataloader, outputs = model(mode=mode) + fetch_name_list = list(outputs.keys()) + fetch_varname_list = [outputs[v].name for v in fetch_name_list] + opt_loss_name = None + if mode == "train": + opt_loss = outputs['total_loss'] + opt_params = config['Optimizer'] + optimizer = create_module(opt_params['function'])(opt_params) + optimizer.minimize(opt_loss) + opt_loss_name = opt_loss.name + global_lr = optimizer._global_learning_rate() + global_lr.persistable = True + fetch_name_list.insert(0, "lr") + fetch_varname_list.insert(0, global_lr.name) + return (dataloader, fetch_name_list, fetch_varname_list, opt_loss_name) + + +def build_export(config, main_prog, startup_prog): + """ + Build a program using a model and an optimizer + 1. create feeds + 2. create a dataloader + 3. create a model + 4. create fetchs + 5. create an optimizer + + Args: + config(dict): config + main_prog(): main program + startup_prog(): startup program + is_train(bool): train or valid + + Returns: + dataloader(): a bridge between the model and the data + fetchs(dict): dict of model outputs(included loss and measures) + """ + with fluid.program_guard(main_prog, startup_prog): + with fluid.unique_name.guard(): + func_infor = config['Architecture']['function'] + model = create_module(func_infor)(params=config) + image, outputs = model(mode='export') + fetches_var = [outputs[name] for name in outputs] + fetches_var_name = [name for name in outputs] + feeded_var_names = [image.name] + target_vars = fetches_var + return feeded_var_names, target_vars, fetches_var_name + + +def create_multi_devices_program(program, loss_var_name): + build_strategy = fluid.BuildStrategy() + build_strategy.memory_optimize = False + build_strategy.enable_inplace = True + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.num_iteration_per_drop_scope = 1 + compile_program = fluid.CompiledProgram(program).with_data_parallel( + loss_name=loss_var_name, + build_strategy=build_strategy, + exec_strategy=exec_strategy) + return compile_program + + +def train_eval_det_run(config, exe, train_info_dict, eval_info_dict): + train_batch_id = 0 + log_smooth_window = config['Global']['log_smooth_window'] + epoch_num = config['Global']['epoch_num'] + print_batch_step = config['Global']['print_batch_step'] + eval_batch_step = config['Global']['eval_batch_step'] + save_epoch_step = config['Global']['save_epoch_step'] + save_model_dir = config['Global']['save_model_dir'] + train_stats = TrainingStats(log_smooth_window, + train_info_dict['fetch_name_list']) + best_eval_hmean = -1 + best_batch_id = 0 + best_epoch = 0 + train_loader = train_info_dict['reader'] + for epoch in range(epoch_num): + train_loader.start() + try: + while True: + t1 = time.time() + train_outs = exe.run( + program=train_info_dict['compile_program'], + fetch_list=train_info_dict['fetch_varname_list'], + return_numpy=False) + stats = {} + for tno in range(len(train_outs)): + fetch_name = train_info_dict['fetch_name_list'][tno] + fetch_value = np.mean(np.array(train_outs[tno])) + stats[fetch_name] = fetch_value + t2 = time.time() + train_batch_elapse = t2 - t1 + train_stats.update(stats) + if train_batch_id > 0 and train_batch_id \ + % print_batch_step == 0: + logs = train_stats.log() + strs = 'epoch: {}, iter: {}, {}, time: {:.3f}'.format( + epoch, train_batch_id, logs, train_batch_elapse) + logger.info(strs) + + if train_batch_id > 0 and\ + train_batch_id % eval_batch_step == 0: + metrics = eval_det_run(exe, config, eval_info_dict, "eval") + hmean = metrics['hmean'] + if hmean >= best_eval_hmean: + best_eval_hmean = hmean + best_batch_id = train_batch_id + best_epoch = epoch + save_path = save_model_dir + "/best_accuracy" + save_model(train_info_dict['train_program'], save_path) + strs = 'Test iter: {}, metrics:{}, best_hmean:{:.6f}, best_epoch:{}, best_batch_id:{}'.format( + train_batch_id, metrics, best_eval_hmean, best_epoch, + best_batch_id) + logger.info(strs) + train_batch_id += 1 + + except fluid.core.EOFException: + train_loader.reset() + + if epoch > 0 and epoch % save_epoch_step == 0: + save_path = save_model_dir + "/iter_epoch_%d" % (epoch) + save_model(train_info_dict['train_program'], save_path) + return + + +def train_eval_rec_run(config, exe, train_info_dict, eval_info_dict): + train_batch_id = 0 + log_smooth_window = config['Global']['log_smooth_window'] + epoch_num = config['Global']['epoch_num'] + print_batch_step = config['Global']['print_batch_step'] + eval_batch_step = config['Global']['eval_batch_step'] + save_epoch_step = config['Global']['save_epoch_step'] + save_model_dir = config['Global']['save_model_dir'] + train_stats = TrainingStats(log_smooth_window, ['loss', 'acc']) + best_eval_acc = -1 + best_batch_id = 0 + best_epoch = 0 + train_loader = train_info_dict['reader'] + for epoch in range(epoch_num): + train_loader.start() + try: + while True: + t1 = time.time() + train_outs = exe.run( + program=train_info_dict['compile_program'], + fetch_list=train_info_dict['fetch_varname_list'], + return_numpy=False) + fetch_map = dict( + zip(train_info_dict['fetch_name_list'], + range(len(train_outs)))) + + loss = np.mean(np.array(train_outs[fetch_map['total_loss']])) + lr = np.mean(np.array(train_outs[fetch_map['lr']])) + preds_idx = fetch_map['decoded_out'] + preds = np.array(train_outs[preds_idx]) + preds_lod = train_outs[preds_idx].lod()[0] + labels_idx = fetch_map['label'] + labels = np.array(train_outs[labels_idx]) + labels_lod = train_outs[labels_idx].lod()[0] + + acc, acc_num, img_num = cal_predicts_accuracy( + config['Global']['char_ops'], preds, preds_lod, labels, + labels_lod) + t2 = time.time() + train_batch_elapse = t2 - t1 + stats = {'loss': loss, 'acc': acc} + train_stats.update(stats) + if train_batch_id > 0 and train_batch_id \ + % print_batch_step == 0: + logs = train_stats.log() + strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format( + epoch, train_batch_id, lr, logs, train_batch_elapse) + logger.info(strs) + + if train_batch_id > 0 and\ + train_batch_id % eval_batch_step == 0: + metrics = eval_rec_run(exe, config, eval_info_dict, "eval") + eval_acc = metrics['avg_acc'] + eval_sample_num = metrics['total_sample_num'] + if eval_acc > best_eval_acc: + best_eval_acc = eval_acc + best_batch_id = train_batch_id + best_epoch = epoch + save_path = save_model_dir + "/best_accuracy" + save_model(train_info_dict['train_program'], save_path) + strs = 'Test iter: {}, acc:{:.6f}, best_acc:{:.6f}, best_epoch:{}, best_batch_id:{}, eval_sample_num:{}'.format( + train_batch_id, eval_acc, best_eval_acc, best_epoch, + best_batch_id, eval_sample_num) + logger.info(strs) + train_batch_id += 1 + + except fluid.core.EOFException: + train_loader.reset() + + if epoch > 0 and epoch % save_epoch_step == 0: + save_path = save_model_dir + "/iter_epoch_%d" % (epoch) + save_model(train_info_dict['train_program'], save_path) + return diff --git a/tools/tmp/eval_det.py b/tools/tmp/eval_det.py new file mode 100755 index 00000000..38929f26 --- /dev/null +++ b/tools/tmp/eval_det.py @@ -0,0 +1,134 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import time +import numpy as np +from copy import deepcopy +import json + +# from paddle.fluid.contrib.model_stat import summary + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +from paddle import fluid +from ppocr.utils.utility import create_module +from ppocr.utils.utility import load_config, merge_config +import ppocr.data.det.reader_main as reader +from ppocr.utils.utility import ArgsParser +from ppocr.utils.check import check_gpu +from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model + +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.utils.eval_utils import eval_det_run + + +def draw_det_res(dt_boxes, config, img_name, ino): + if len(dt_boxes) > 0: + img_set_path = config['TestReader']['img_set_dir'] + img_path = img_set_path + img_name + import cv2 + src_im = cv2.imread(img_path) + for box in dt_boxes: + box = box.astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) + cv2.imwrite("tmp%d.jpg" % ino, src_im) + + +def main(): + config = load_config(FLAGS.config) + merge_config(FLAGS.opt) + print(config) + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config['Global']['use_gpu'] + check_gpu(use_gpu) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + det_model = create_module(config['Architecture']['function'])(params=config) + + startup_prog = fluid.Program() + eval_prog = fluid.Program() + with fluid.program_guard(eval_prog, startup_prog): + with fluid.unique_name.guard(): + eval_loader, eval_outputs = det_model(mode="test") + eval_fetch_list = [v.name for v in eval_outputs] + eval_prog = eval_prog.clone(for_test=True) + exe.run(startup_prog) + + pretrain_weights = config['Global']['pretrain_weights'] + if pretrain_weights is not None: + load_pretrain(exe, eval_prog, pretrain_weights) +# fluid.load(eval_prog, pretrain_weights) +# def if_exist(var): +# return os.path.exists(os.path.join(pretrain_weights, var.name)) +# fluid.io.load_vars(exe, pretrain_weights, predicate=if_exist, main_program=eval_prog) + else: + logger.info("Not find pretrain_weights:%s" % pretrain_weights) + sys.exit(0) + +# fluid.io.save_inference_model("./output/", feeded_var_names=['image'], +# target_vars=eval_outputs, executor=exe, main_program=eval_prog, +# model_filename="model", params_filename="params") +# sys.exit(-1) + + metrics = eval_det_run(exe, eval_prog, eval_fetch_list, config, "test") + logger.info("metrics:{}".format(metrics)) + logger.info("success!") + + +def test_reader(): + config = load_config(FLAGS.config) + merge_config(FLAGS.opt) + print(config) + tmp_reader = reader.test_reader(config=config) + count = 0 + print_count = 0 + import time + starttime = time.time() + for data in tmp_reader(): + count += len(data) + print_count += 1 + if print_count % 10 == 0: + batch_time = (time.time() - starttime) / print_count + print("reader:", count, len(data), batch_time) + print("finish reader:", count) + print("success") + + +if __name__ == '__main__': + parser = ArgsParser() + FLAGS = parser.parse_args() + main() +# test_reader() diff --git a/tools/tmp/infer_det.py b/tools/tmp/infer_det.py new file mode 100755 index 00000000..21c2e1cc --- /dev/null +++ b/tools/tmp/infer_det.py @@ -0,0 +1,160 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import time +import numpy as np +from copy import deepcopy +import json + +# from paddle.fluid.contrib.model_stat import summary + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +from paddle import fluid +from ppocr.utils.utility import create_module +from ppocr.utils.utility import load_config, merge_config +import ppocr.data.det.reader_main as reader +from ppocr.utils.utility import ArgsParser +from ppocr.utils.check import check_gpu +from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model + +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.utils.eval_utils import eval_det_run + + +def draw_det_res(dt_boxes, config, img_name, ino): + if len(dt_boxes) > 0: + img_set_path = config['TestReader']['img_set_dir'] + img_path = img_set_path + img_name + import cv2 + src_im = cv2.imread(img_path) + for box in dt_boxes: + box = box.astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) + cv2.imwrite("tmp%d.jpg" % ino, src_im) + + +def main(): + config = load_config(FLAGS.config) + merge_config(FLAGS.opt) + print(config) + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config['Global']['use_gpu'] + check_gpu(use_gpu) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + det_model = create_module(config['Architecture']['function'])(params=config) + + startup_prog = fluid.Program() + eval_prog = fluid.Program() + with fluid.program_guard(eval_prog, startup_prog): + with fluid.unique_name.guard(): + eval_outputs = det_model(mode="test") + eval_fetch_list = [v.name for v in eval_outputs] + eval_prog = eval_prog.clone(for_test=True) + exe.run(startup_prog) + + pretrain_weights = config['Global']['pretrain_weights'] + if pretrain_weights is not None: + fluid.load(eval_prog, pretrain_weights) + else: + logger.info("Not find pretrain_weights:%s" % pretrain_weights) + sys.exit(0) + + save_res_path = config['Global']['save_res_path'] + with open(save_res_path, "wb") as fout: + test_reader = reader.test_reader(config=config) + tackling_num = 0 + for data in test_reader(): + img_num = len(data) + tackling_num = tackling_num + img_num + logger.info("tackling_num:%d", tackling_num) + img_list = [] + ratio_list = [] + img_name_list = [] + for ino in range(img_num): + img_list.append(data[ino][0]) + ratio_list.append(data[ino][1]) + img_name_list.append(data[ino][2]) + img_list = np.concatenate(img_list, axis=0) + outs = exe.run(eval_prog,\ + feed={'image': img_list},\ + fetch_list=eval_fetch_list) + + global_params = config['Global'] + postprocess_params = deepcopy(config["PostProcess"]) + postprocess_params.update(global_params) + postprocess = create_module(postprocess_params['function'])\ + (params=postprocess_params) + dt_boxes_list = postprocess(outs, ratio_list) + for ino in range(img_num): + dt_boxes = dt_boxes_list[ino] + img_name = img_name_list[ino] + dt_boxes_json = [] + for box in dt_boxes: + tmp_json = {"transcription": ""} + tmp_json['points'] = box.tolist() + dt_boxes_json.append(tmp_json) + otstr = img_name + "\t" + json.dumps(dt_boxes_json) + "\n" + fout.write(otstr.encode()) + #draw_det_res(dt_boxes, config, img_name, ino) + logger.info("success!") + + +def test_reader(): + config = load_config(FLAGS.config) + merge_config(FLAGS.opt) + print(config) + tmp_reader = reader.test_reader(config=config) + count = 0 + print_count = 0 + import time + starttime = time.time() + for data in tmp_reader(): + count += len(data) + print_count += 1 + if print_count % 10 == 0: + batch_time = (time.time() - starttime) / print_count + print("reader:", count, len(data), batch_time) + print("finish reader:", count) + print("success") + + +if __name__ == '__main__': + parser = ArgsParser() + FLAGS = parser.parse_args() + main() +# test_reader() diff --git a/tools/tmp/infer_rec.py b/tools/tmp/infer_rec.py new file mode 100755 index 00000000..ea308953 --- /dev/null +++ b/tools/tmp/infer_rec.py @@ -0,0 +1,116 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +import multiprocessing +import numpy as np + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +from paddle import fluid + +from ppocr.utils.utility import load_config, merge_config +from ppocr.data.rec.reader_main import test_reader + +from ppocr.utils.utility import ArgsParser +from ppocr.utils.character import CharacterOps, cal_predicts_accuracy +from ppocr.utils.check import check_gpu +from ppocr.utils.utility import create_module + +from ppocr.utils.utility import initial_logger +logger = initial_logger() + + +def main(): + config = load_config(FLAGS.config) + merge_config(FLAGS.opt) + char_ops = CharacterOps(config['Global']) + config['Global']['char_num'] = char_ops.get_char_num() + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config['Global']['use_gpu'] + check_gpu(use_gpu) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + rec_model = create_module(config['Architecture']['function'])(params=config) + + startup_prog = fluid.Program() + eval_prog = fluid.Program() + with fluid.program_guard(eval_prog, startup_prog): + with fluid.unique_name.guard(): + eval_outputs = rec_model(mode="test") + eval_fetch_list = [v.name for v in eval_outputs] + eval_prog = eval_prog.clone(for_test=True) + exe.run(startup_prog) + + pretrain_weights = config['Global']['pretrain_weights'] + if pretrain_weights is not None: + fluid.load(eval_prog, pretrain_weights) + + test_img_path = config['test_img_path'] + image_shape = config['Global']['image_shape'] + blobs = test_reader(image_shape, test_img_path) + predict = exe.run(program=eval_prog, + feed={"image": blobs}, + fetch_list=eval_fetch_list, + return_numpy=False) + preds = np.array(predict[0]) + if preds.shape[1] == 1: + preds = preds.reshape(-1) + preds_lod = predict[0].lod()[0] + preds_text = char_ops.decode(preds) + else: + end_pos = np.where(preds[0, :] == 1)[0] + if len(end_pos) <= 1: + preds_text = preds[0, 1:] + else: + preds_text = preds[0, 1:end_pos[1]] + preds_text = preds_text.reshape(-1) + preds_text = char_ops.decode(preds_text) + + fluid.io.save_inference_model( + "./output/", + feeded_var_names=['image'], + target_vars=eval_outputs, + executor=exe, + main_program=eval_prog, + model_filename="model", + params_filename="params") + print(preds) + print(preds_text) + + +if __name__ == '__main__': + parser = ArgsParser() + FLAGS = parser.parse_args() + main() diff --git a/tools/tmp/test_rec_benchmark.py b/tools/tmp/test_rec_benchmark.py new file mode 100755 index 00000000..33cd136c --- /dev/null +++ b/tools/tmp/test_rec_benchmark.py @@ -0,0 +1,128 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import time +import multiprocessing +import numpy as np + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +from paddle import fluid + +from ppocr.utils.utility import load_config, merge_config +import ppocr.data.rec.reader_main as reader + +from ppocr.utils.utility import ArgsParser +from ppocr.utils.character import CharacterOps, cal_predicts_accuracy +from ppocr.utils.check import check_gpu +from ppocr.utils.utility import create_module + +from ppocr.utils.eval_utils import eval_run + +from ppocr.utils.utility import initial_logger +logger = initial_logger() + + +def main(): + config = load_config(FLAGS.config) + merge_config(FLAGS.opt) + char_ops = CharacterOps(config['Global']) + config['Global']['char_num'] = char_ops.get_char_num() + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config['Global']['use_gpu'] + check_gpu(use_gpu) + + if use_gpu: + devices_num = fluid.core.get_cuda_device_count() + else: + devices_num = int( + os.environ.get('CPU_NUM', multiprocessing.cpu_count())) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + rec_model = create_module(config['Architecture']['function'])(params=config) + + startup_prog = fluid.Program() + eval_prog = fluid.Program() + with fluid.program_guard(eval_prog, startup_prog): + with fluid.unique_name.guard(): + eval_loader, eval_outputs = rec_model(mode="eval") + eval_fetch_list = [v.name for v in eval_outputs] + eval_prog = eval_prog.clone(for_test=True) + + exe.run(startup_prog) + pretrain_weights = config['Global']['pretrain_weights'] + if pretrain_weights is not None: + fluid.load(eval_prog, pretrain_weights) + + eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867',\ + 'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80'] + eval_data_dir = config['TestReader']['lmdb_sets_dir'] + total_forward_time = 0 + total_evaluation_data_number = 0 + total_correct_number = 0 + eval_data_acc_info = {} + for eval_data in eval_data_list: + config['TestReader']['lmdb_sets_dir'] = \ + eval_data_dir + "/" + eval_data + eval_reader = reader.train_eval_reader( + config=config, char_ops=char_ops, mode="test") + eval_loader.set_sample_list_generator(eval_reader, places=place) + + start_time = time.time() + outs = eval_run(exe, eval_prog, eval_loader, eval_fetch_list, char_ops, + "best", "test") + infer_time = time.time() - start_time + eval_acc, acc_num, sample_num = outs + total_forward_time += infer_time + total_evaluation_data_number += sample_num + total_correct_number += acc_num + eval_data_acc_info[eval_data] = outs + + avg_forward_time = total_forward_time / total_evaluation_data_number + avg_acc = total_correct_number * 1.0 / total_evaluation_data_number + logger.info('-' * 50) + strs = "" + for eval_data in eval_data_list: + eval_acc, acc_num, sample_num = eval_data_acc_info[eval_data] + strs += "\n {}, accuracy:{:.6f}".format(eval_data, eval_acc) + strs += "\n average, accuracy:{:.6f}, time:{:.6f}".format(avg_acc, + avg_forward_time) + logger.info(strs) + logger.info('-' * 50) + + +if __name__ == '__main__': + parser = ArgsParser() + FLAGS = parser.parse_args() + main() diff --git a/tools/tmp/train_det.py b/tools/tmp/train_det.py new file mode 100755 index 00000000..bfa2f632 --- /dev/null +++ b/tools/tmp/train_det.py @@ -0,0 +1,216 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import time +import multiprocessing +import numpy as np + +# from paddle.fluid.contrib.model_stat import summary + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +from paddle import fluid +from ppocr.utils.utility import create_module +from ppocr.utils.utility import load_config, merge_config +import ppocr.data.det.reader_main as reader +from ppocr.utils.utility import ArgsParser +from ppocr.utils.character import CharacterOps, cal_predicts_accuracy +from ppocr.utils.check import check_gpu +from ppocr.utils.stats import TrainingStats +from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model +from ppocr.utils.eval_utils import eval_run +from ppocr.utils.eval_utils import eval_det_run + +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.utils.utility import create_multi_devices_program + + +def main(): + config = load_config(FLAGS.config) + merge_config(FLAGS.opt) + print(config) + + alg = config['Global']['algorithm'] + assert alg in ['EAST', 'DB'] + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config['Global']['use_gpu'] + check_gpu(use_gpu) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + det_model = create_module(config['Architecture']['function'])(params=config) + + startup_prog = fluid.Program() + train_prog = fluid.Program() + with fluid.program_guard(train_prog, startup_prog): + with fluid.unique_name.guard(): + train_loader, train_outputs = det_model(mode="train") + train_fetch_list = [v.name for v in train_outputs] + train_loss = train_outputs[0] + opt_params = config['Optimizer'] + optimizer = create_module(opt_params['function'])(opt_params) + optimizer.minimize(train_loss) + global_lr = optimizer._global_learning_rate() + global_lr.persistable = True + train_fetch_list.append(global_lr.name) + + eval_prog = fluid.Program() + with fluid.program_guard(eval_prog, startup_prog): + with fluid.unique_name.guard(): + eval_loader, eval_outputs = det_model(mode="eval") + eval_fetch_list = [v.name for v in eval_outputs] + eval_prog = eval_prog.clone(for_test=True) + + train_reader = reader.train_reader(config=config) + train_loader.set_sample_list_generator(train_reader, places=place) + + exe.run(startup_prog) + + # compile program for multi-devices + train_compile_program = create_multi_devices_program(train_prog, + train_loss.name) + + pretrain_weights = config['Global']['pretrain_weights'] + if pretrain_weights is not None: + load_pretrain(exe, train_prog, pretrain_weights) + print("pretrain weights loaded!") + + train_batch_id = 0 + if alg == 'EAST': + train_log_keys = ['loss_total', 'loss_cls', 'loss_offset'] + elif alg == 'DB': + train_log_keys = [ + 'loss_total', 'loss_shrink', 'loss_threshold', 'loss_binary' + ] + log_smooth_window = config['Global']['log_smooth_window'] + epoch_num = config['Global']['epoch_num'] + print_step = config['Global']['print_step'] + eval_step = config['Global']['eval_step'] + save_epoch_step = config['Global']['save_epoch_step'] + save_dir = config['Global']['save_dir'] + train_stats = TrainingStats(log_smooth_window, train_log_keys) + best_eval_hmean = -1 + best_batch_id = 0 + best_epoch = 0 + for epoch in range(epoch_num): + train_loader.start() + try: + while True: + t1 = time.time() + train_outs = exe.run(program=train_compile_program, + fetch_list=train_fetch_list, + return_numpy=False) + loss_total = np.mean(np.array(train_outs[0])) + if alg == 'EAST': + loss_cls = np.mean(np.array(train_outs[1])) + loss_offset = np.mean(np.array(train_outs[2])) + stats = {'loss_total':loss_total, 'loss_cls':loss_cls,\ + 'loss_offset':loss_offset} + elif alg == 'DB': + loss_shrink_maps = np.mean(np.array(train_outs[1])) + loss_threshold_maps = np.mean(np.array(train_outs[2])) + loss_binary_maps = np.mean(np.array(train_outs[3])) + stats = {'loss_total':loss_total, 'loss_shrink':loss_shrink_maps, \ + 'loss_threshold':loss_threshold_maps, 'loss_binary':loss_binary_maps} + lr = np.mean(np.array(train_outs[-1])) + t2 = time.time() + train_batch_elapse = t2 - t1 + + # stats = {'loss_total':loss_total, 'loss_cls':loss_cls,\ + # 'loss_offset':loss_offset} + train_stats.update(stats) + if train_batch_id > 0 and train_batch_id % print_step == 0: + logs = train_stats.log() + strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format( + epoch, train_batch_id, lr, logs, train_batch_elapse) + logger.info(strs) + + if train_batch_id > 0 and\ + train_batch_id % eval_step == 0: + metrics = eval_det_run(exe, eval_prog, eval_fetch_list, + config, "eval") + hmean = metrics['hmean'] + if hmean >= best_eval_hmean: + best_eval_hmean = hmean + best_batch_id = train_batch_id + best_epoch = epoch + save_path = save_dir + "/best_accuracy" + save_model(train_prog, save_path) + strs = 'Test iter: {}, metrics:{}, best_hmean:{:.6f}, best_epoch:{}, best_batch_id:{}'.format( + train_batch_id, metrics, best_eval_hmean, best_epoch, + best_batch_id) + logger.info(strs) + train_batch_id += 1 + + except fluid.core.EOFException: + train_loader.reset() + + if epoch > 0 and epoch % save_epoch_step == 0: + save_path = save_dir + "/iter_epoch_%d" % (epoch) + save_model(train_prog, save_path) + + +def test_reader(): + config = load_config(FLAGS.config) + merge_config(FLAGS.opt) + print(config) + tmp_reader = reader.train_reader(config=config) + count = 0 + print_count = 0 + import time + while True: + starttime = time.time() + count = 0 + for data in tmp_reader(): + count += 1 + if print_count % 1 == 0: + batch_time = time.time() - starttime + starttime = time.time() + print("reader:", count, len(data), batch_time) + print("finish reader:", count) + print("success") + + +if __name__ == '__main__': + parser = ArgsParser() + parser.add_argument( + "-r", + "--resume_checkpoint", + default=None, + type=str, + help="Checkpoint path for resuming training.") + FLAGS = parser.parse_args() + main() + # test_reader() diff --git a/tools/tmp/train_rec.py b/tools/tmp/train_rec.py new file mode 100755 index 00000000..21b9a9ca --- /dev/null +++ b/tools/tmp/train_rec.py @@ -0,0 +1,222 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import time +import multiprocessing +import numpy as np + +# from paddle.fluid.contrib.model_stat import summary + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +from paddle import fluid +from ppocr.utils.utility import create_module +from ppocr.utils.utility import load_config, merge_config +import ppocr.data.rec.reader_main as reader +from ppocr.utils.utility import ArgsParser +from ppocr.utils.character import CharacterOps, cal_predicts_accuracy +from ppocr.utils.check import check_gpu +from ppocr.utils.stats import TrainingStats +from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model +from ppocr.utils.eval_utils import eval_run + +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.utils.utility import create_multi_devices_program + + +def main(): + config = load_config(FLAGS.config) + merge_config(FLAGS.opt) + char_ops = CharacterOps(config['Global']) + config['Global']['char_num'] = char_ops.get_char_num() + print(config) + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config['Global']['use_gpu'] + check_gpu(use_gpu) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + + rec_model = create_module(config['Architecture']['function'])(params=config) + + startup_prog = fluid.Program() + train_prog = fluid.Program() + with fluid.program_guard(train_prog, startup_prog): + with fluid.unique_name.guard(): + train_loader, train_outputs = rec_model(mode="train") + save_var = train_outputs[1] + + if "gradient_clip" in config['Global']: + gradient_clip = config['Global']['gradient_clip'] + clip = fluid.clip.GradientClipByGlobalNorm(gradient_clip) + fluid.clip.set_gradient_clip(clip, program=train_prog) + + train_fetch_list = [v.name for v in train_outputs] + train_loss = train_outputs[0] + opt_params = config['Optimizer'] + optimizer = create_module(opt_params['function'])(opt_params) + optimizer.minimize(train_loss) + global_lr = optimizer._global_learning_rate() + global_lr.persistable = True + train_fetch_list.append(global_lr.name) + + train_reader = reader.train_eval_reader( + config=config, char_ops=char_ops, mode="train") + train_loader.set_sample_list_generator(train_reader, places=place) + + eval_prog = fluid.Program() + with fluid.program_guard(eval_prog, startup_prog): + with fluid.unique_name.guard(): + eval_loader, eval_outputs = rec_model(mode="eval") + eval_fetch_list = [v.name for v in eval_outputs] + + eval_prog = eval_prog.clone(for_test=True) + exe.run(startup_prog) + + eval_reader = reader.train_eval_reader( + config=config, char_ops=char_ops, mode="eval") + eval_loader.set_sample_list_generator(eval_reader, places=place) + + # compile program for multi-devices + train_compile_program = create_multi_devices_program(train_prog, + train_loss.name) + + pretrain_weights = config['Global']['pretrain_weights'] + if pretrain_weights is not None: + load_pretrain(exe, train_prog, pretrain_weights) + + train_batch_id = 0 + train_log_keys = ['loss', 'acc'] + log_smooth_window = config['Global']['log_smooth_window'] + epoch_num = config['Global']['epoch_num'] + loss_type = config['Global']['loss_type'] + print_step = config['Global']['print_step'] + eval_step = config['Global']['eval_step'] + save_epoch_step = config['Global']['save_epoch_step'] + save_dir = config['Global']['save_dir'] + train_stats = TrainingStats(log_smooth_window, train_log_keys) + best_eval_acc = -1 + best_batch_id = 0 + best_epoch = 0 + for epoch in range(epoch_num): + train_loader.start() + try: + while True: + t1 = time.time() + train_outs = exe.run(program=train_compile_program, + fetch_list=train_fetch_list, + return_numpy=False) + loss = np.mean(np.array(train_outs[0])) + lr = np.mean(np.array(train_outs[-1])) + + preds = np.array(train_outs[1]) + preds_lod = train_outs[1].lod()[0] + labels = np.array(train_outs[2]) + labels_lod = train_outs[2].lod()[0] + + acc, acc_num, img_num = cal_predicts_accuracy( + char_ops, preds, preds_lod, labels, labels_lod) + + t2 = time.time() + train_batch_elapse = t2 - t1 + + stats = {'loss': loss, 'acc': acc} + train_stats.update(stats) + if train_batch_id > 0 and train_batch_id % print_step == 0: + logs = train_stats.log() + strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format( + epoch, train_batch_id, lr, logs, train_batch_elapse) + logger.info(strs) + + if train_batch_id > 0 and train_batch_id % eval_step == 0: + outs = eval_run(exe, eval_prog, eval_loader, + eval_fetch_list, char_ops, train_batch_id, + "eval") + eval_acc, acc_num, sample_num = outs + if eval_acc > best_eval_acc: + best_eval_acc = eval_acc + best_batch_id = train_batch_id + best_epoch = epoch + save_path = save_dir + "/best_accuracy" + save_model(train_prog, save_path) + + strs = 'Test iter: {}, acc:{:.6f}, best_acc:{:.6f}, best_epoch:{}, best_batch_id:{}, sample_num:{}'.format( + train_batch_id, eval_acc, best_eval_acc, best_epoch, + best_batch_id, sample_num) + logger.info(strs) + train_batch_id += 1 + + except fluid.core.EOFException: + train_loader.reset() + + if epoch > 0 and epoch % save_epoch_step == 0: + save_path = save_dir + "/iter_epoch_%d" % (epoch) + save_model(train_prog, save_path) + + +def test_reader(): + config = load_config(FLAGS.config) + merge_config(FLAGS.opt) + char_ops = CharacterOps(config['Global']) + config['Global']['char_num'] = char_ops.get_char_num() + print(config) + # tmp_reader = reader.train_eval_reader( + # config=cfg, char_ops=char_ops, mode="train") + tmp_reader = reader.train_eval_reader( + config=config, char_ops=char_ops, mode="eval") + count = 0 + print_count = 0 + import time + starttime = time.time() + for data in tmp_reader(): + count += len(data) + print_count += 1 + if print_count % 10 == 0: + batch_time = (time.time() - starttime) / print_count + print("reader:", count, len(data), batch_time) + print("finish reader:", count) + print("success") + + +if __name__ == '__main__': + parser = ArgsParser() + parser.add_argument( + "-r", + "--resume_checkpoint", + default=None, + type=str, + help="Checkpoint path for resuming training.") + FLAGS = parser.parse_args() + main() +# test_reader() diff --git a/tools/train.py b/tools/train.py new file mode 100755 index 00000000..08c5a641 --- /dev/null +++ b/tools/train.py @@ -0,0 +1,113 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import time +import multiprocessing +import numpy as np + + +def set_paddle_flags(**kwargs): + for key, value in kwargs.items(): + if os.environ.get(key, None) is None: + os.environ[key] = str(value) + + +# NOTE(paddle-dev): All of these flags should be +# set before `import paddle`. Otherwise, it would +# not take any effect. +set_paddle_flags( + FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory +) + +import program +from paddle import fluid +from ppocr.utils.utility import initial_logger +logger = initial_logger() +from ppocr.data.reader_main import reader_main +from ppocr.utils.save_load import init_model +from ppocr.utils.character import CharacterOps + + +def main(): + config = program.load_config(FLAGS.config) + program.merge_config(FLAGS.opt) + logger.info(config) + + # check if set use_gpu=True in paddlepaddle cpu version + use_gpu = config['Global']['use_gpu'] + program.check_gpu(True) + + alg = config['Global']['algorithm'] + assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE'] + if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']: + config['Global']['char_ops'] = CharacterOps(config['Global']) + + place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() + startup_program = fluid.Program() + train_program = fluid.Program() + train_build_outputs = program.build( + config, train_program, startup_program, mode='train') + train_loader = train_build_outputs[0] + train_fetch_name_list = train_build_outputs[1] + train_fetch_varname_list = train_build_outputs[2] + train_opt_loss_name = train_build_outputs[3] + + eval_program = fluid.Program() + eval_build_outputs = program.build( + config, eval_program, startup_program, mode='eval') + eval_fetch_name_list = eval_build_outputs[1] + eval_fetch_varname_list = eval_build_outputs[2] + eval_program = eval_program.clone(for_test=True) + + train_reader = reader_main(config=config, mode="train") + train_loader.set_sample_list_generator(train_reader, places=place) + + eval_reader = reader_main(config=config, mode="eval") + + exe = fluid.Executor(place) + exe.run(startup_program) + + # compile program for multi-devices + train_compile_program = program.create_multi_devices_program( + train_program, train_opt_loss_name) + init_model(config, train_program, exe) + + train_info_dict = {'compile_program':train_compile_program,\ + 'train_program':train_program,\ + 'reader':train_loader,\ + 'fetch_name_list':train_fetch_name_list,\ + 'fetch_varname_list':train_fetch_varname_list} + + eval_info_dict = {'program':eval_program,\ + 'reader':eval_reader,\ + 'fetch_name_list':eval_fetch_name_list,\ + 'fetch_varname_list':eval_fetch_varname_list} + + if alg in ['EAST', 'DB']: + program.train_eval_det_run(config, exe, train_info_dict, eval_info_dict) + else: + program.train_eval_rec_run(config, exe, train_info_dict, eval_info_dict) + + +if __name__ == '__main__': + parser = program.ArgsParser() + FLAGS = parser.parse_args() + main() +# test_reader() -- GitLab