diff --git a/configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml b/configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml new file mode 100644 index 0000000000000000000000000000000000000000..ff536edec4d6e7a85a6e6c189d56a23ffabc5583 --- /dev/null +++ b/configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml @@ -0,0 +1,131 @@ +Global: + debug: false + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/v3_en_mobile + save_epoch_step: 3 + eval_batch_step: [0, 2000] + cal_metric_during_train: true + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: false + infer_img: doc/imgs_words/ch/word_1.jpg + character_dict_path: ppocr/utils/en_dict.txt + max_text_length: &max_text_length 25 + infer_mode: false + use_space_char: true + distributed: true + save_res_path: ./output/rec/predicts_ppocrv3_en.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + warmup_epoch: 5 + regularizer: + name: L2 + factor: 3.0e-05 + + +Architecture: + model_type: rec + algorithm: SVTR + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + last_conv_stride: [1, 2] + last_pool_type: avg + Head: + name: MultiHead + head_list: + - CTCHead: + Neck: + name: svtr + dims: 64 + depth: 2 + hidden_dims: 120 + use_guide: True + Head: + fc_decay: 0.00001 + - SARHead: + enc_dim: 512 + max_text_length: *max_text_length + +Loss: + name: MultiLoss + loss_config_list: + - CTCLoss: + - SARLoss: + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + ignore_space: False + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ + ext_op_transform_idx: 1 + label_file_list: + - ./train_data/train_list.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - RecConAug: + prob: 0.5 + ext_data_num: 2 + image_shape: [48, 320, 3] + - RecAug: + - MultiLabelEncode: + - RecResizeImg: + image_shape: [3, 48, 320] + - KeepKeys: + keep_keys: + - image + - label_ctc + - label_sar + - length + - valid_ratio + loader: + shuffle: true + batch_size_per_card: 128 + drop_last: true + num_workers: 4 +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data + label_file_list: + - ./train_data/val_list.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - MultiLabelEncode: + - RecResizeImg: + image_shape: [3, 48, 320] + - KeepKeys: + keep_keys: + - image + - label_ctc + - label_sar + - length + - valid_ratio + loader: + shuffle: false + drop_last: false + batch_size_per_card: 128 + num_workers: 4