diff --git a/configs/rec/rec_chinese_lite_train.yml b/configs/rec/rec_chinese_lite_train.yml new file mode 100755 index 0000000000000000000000000000000000000000..279aa6999a727045e50ce079e9a33bbdda321a89 --- /dev/null +++ b/configs/rec/rec_chinese_lite_train.yml @@ -0,0 +1,42 @@ +Global: + algorithm: CRNN + use_gpu: true + epoch_num: 3000 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_CRNN + save_epoch_step: 3 + eval_batch_step: 2000 + train_batch_size_per_card: 256 + test_batch_size_per_card: 256 + image_shape: [3, 32, 100] + max_text_length: 25 + character_type: ch + character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt + loss_type: ctc + reader_yml: ./configs/rec/rec_chinese_reader.yml + pretrain_weights: ./pretrain_models/CRNN/best_accuracy + checkpoints: + save_inference_dir: +Architecture: + function: ppocr.modeling.architectures.rec_model,RecModel + +Backbone: + function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3 + scale: 0.5 + model_name: small + +Head: + function: ppocr.modeling.heads.rec_ctc_head,CTCPredict + encoder_type: rnn + SeqRNN: + hidden_size: 48 + +Loss: + function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss + +Optimizer: + function: ppocr.optimizer,AdamDecay + base_lr: 0.0005 + beta1: 0.9 + beta2: 0.999 diff --git a/configs/rec/rec_chinese_reader.yml b/configs/rec/rec_chinese_reader.yml new file mode 100755 index 0000000000000000000000000000000000000000..f09a1ea72e6d929d0446fbbf51ca218e52ae5b3e --- /dev/null +++ b/configs/rec/rec_chinese_reader.yml @@ -0,0 +1,14 @@ +TrainReader: + reader_function: ppocr.data.rec.dataset_traversal,SimpleReader + num_workers: 8 + img_set_dir: ./train_data + label_file_path: ./train_data/rec_gt_train.txt + +EvalReader: + reader_function: ppocr.data.rec.dataset_traversal,SimpleReader + img_set_dir: ./train_data + label_file_path: ./train_data/rec_gt_test.txt + +TestReader: + reader_function: ppocr.data.rec.dataset_traversal,SimpleReader + infer_img: ./infer_img diff --git a/configs/rec/rec_icdar15_train.yml b/configs/rec/rec_icdar15_train.yml index 35aba6684eecb6a932b74dd7e1ebd32c9f0f2f43..e8710b47d2cb48595f9fc37bb52030ed908b2254 100755 --- a/configs/rec/rec_icdar15_train.yml +++ b/configs/rec/rec_icdar15_train.yml @@ -12,10 +12,11 @@ Global: image_shape: [3, 32, 100] max_text_length: 25 character_type: ch - character_dict_path: ./ppocr/utils/ic15_dict.txt + #character_dict_path: ./ppocr/utils/ic15_dict.txt + character_dict_path: /workspace/PaddleOCR/train_data/ic15_dict.txt loss_type: ctc reader_yml: ./configs/rec/rec_icdar15_reader.yml - pretrain_weights: ./pretrain_models/CRNN/best_accuracy + pretrain_weights: /workspace/PaddleOCR/pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy checkpoints: save_inference_dir: Architecture: @@ -24,13 +25,13 @@ Architecture: Backbone: function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3 scale: 0.5 - model_name: small + model_name: large Head: function: ppocr.modeling.heads.rec_ctc_head,CTCPredict encoder_type: rnn SeqRNN: - hidden_size: 48 + hidden_size: 96 Loss: function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss diff --git a/ppocr/utils/character.py b/ppocr/utils/character.py index 7dfeeef4c15b6bb95d37edc16b99f04ea5ffe020..b40750392291f271b26fef88e58844be9020d2ea 100755 --- a/ppocr/utils/character.py +++ b/ppocr/utils/character.py @@ -64,7 +64,7 @@ class CharacterOps(object): [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] length: length of each text. [batch_size] """ - if self.character_type == "en" or text.encode( 'UTF-8' ).isalpha(): + if self.character_type == "en": text = text.lower() text_list = [] diff --git a/ppocr/utils/ic15_dict.txt b/ppocr/utils/ic15_dict.txt index c1f9993dae8be61d968b602ddde5683b8e4ca959..71043689051fb5a2da516b2e005d1d9b0fdecfb3 100644 --- a/ppocr/utils/ic15_dict.txt +++ b/ppocr/utils/ic15_dict.txt @@ -1,3 +1,13 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 a b c @@ -24,13 +34,3 @@ w x y z -0 -1 -2 -3 -4 -5 -6 -7 -8 -9