diff --git a/configs/det/det_r50_vd_db.yml b/configs/det/det_r50_vd_db.yml index 57940926280d67356924f78514102982124b8564..1275c7655c5f7596bb65eb9f2b3448f1bd6944b7 100644 --- a/configs/det/det_r50_vd_db.yml +++ b/configs/det/det_r50_vd_db.yml @@ -3,15 +3,15 @@ Global: epoch_num: 1200 log_smooth_window: 20 print_batch_step: 2 - save_model_dir: ./output/20201015_r50/ + save_model_dir: ./output/det_r50_vd/ save_epoch_step: 1200 # evaluation is run every 5000 iterations after the 4000th iteration eval_batch_step: 8 # if pretrained_model is saved in static mode, load_static_weights must set to True load_static_weights: True cal_metric_during_train: False - pretrained_model: /home/zhoujun20/pretrain_models/ResNet50_vd_ssld_pretrained/ - checkpoints: #./output/det_db_0.001_DiceLoss_256_pp_config_2.0b_4gpu/best_accuracy + pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/ + checkpoints: save_inference_dir: use_visualdl: True infer_img: doc/imgs_en/img_10.jpg @@ -65,9 +65,9 @@ Metric: TRAIN: dataset: name: SimpleDataSet - data_dir: /home/zhoujun20/detection/ + data_dir: ./detection/ file_list: - - /home/zhoujun20/detection/train_icdar2015_label.txt # dataset1 + - ./detection/train_icdar2015_label.txt # dataset1 ratio_list: [1.0] transforms: - DecodeImage: # load image @@ -107,9 +107,9 @@ TRAIN: EVAL: dataset: name: SimpleDataSet - data_dir: /home/zhoujun20/detection/ + data_dir: ./detection/ file_list: - - /home/zhoujun20/detection/test_icdar2015_label.txt + - ./detection/test_icdar2015_label.txt transforms: - DecodeImage: # load image img_mode: BGR diff --git a/configs/rec/rec_mv3_none_bilstm_ctc.yml b/configs/rec/rec_mv3_none_bilstm_ctc.yml index 3f30afcb4b89644883718300227842b7cb51e6b7..9c45bc70a40502c977469bfee141aa3bb7ce3310 100644 --- a/configs/rec/rec_mv3_none_bilstm_ctc.yml +++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml @@ -68,7 +68,7 @@ TRAIN: name: SimpleDataSet data_dir: ./rec file_list: - - ./rec/real_data.txt # dataset1 + - ./rec/train.txt # dataset1 ratio_list: [ 0.4,0.6 ] transforms: - DecodeImage: # load image @@ -91,7 +91,7 @@ EVAL: name: SimpleDataSet data_dir: ./rec file_list: - - ./rec/label_val_all.txt + - ./rec/val.txt transforms: - DecodeImage: # load image img_mode: BGR diff --git a/configs/rec/rec_mv3_none_none_ctc_lmdb.yml b/configs/rec/rec_mv3_none_none_ctc_lmdb.yml index 413e1c3c315ae90fbc3f096dfbb7f8bb3a8f39e9..cc52bf71212722f392a9770944cc4030d4d11168 100644 --- a/configs/rec/rec_mv3_none_none_ctc_lmdb.yml +++ b/configs/rec/rec_mv3_none_none_ctc_lmdb.yml @@ -1,25 +1,25 @@ Global: use_gpu: false - epoch_num: 500 + epoch_num: 72 log_smooth_window: 20 - print_batch_step: 1 - save_model_dir: ./output/rec/test/ + print_batch_step: 10 + save_model_dir: ./output/rec/mv3_none_none_ctc/ save_epoch_step: 500 # evaluation is run every 5000 iterations after the 4000th iteration - eval_batch_step: 1016 + eval_batch_step: 2000 # if pretrained_model is saved in static mode, load_static_weights must set to True load_static_weights: True cal_metric_during_train: True pretrained_model: - checkpoints: #output/rec/rec_crnn/best_accuracy + checkpoints: save_inference_dir: use_visualdl: True infer_img: doc/imgs_words/ch/word_1.jpg # for data or label process - max_text_length: 80 - character_dict_path: /home/zhoujun20/rec/lmdb/dict.txt + max_text_length: 25 + character_dict_path: character_type: 'en' - use_space_char: True + use_space_char: False infer_mode: False use_tps: False @@ -29,9 +29,9 @@ Optimizer: beta1: 0.9 beta2: 0.999 learning_rate: - name: Cosine +# name: Cosine lr: 0.0005 - warmup_epoch: 1 +# warmup_epoch: 1 regularizer: name: 'L2' factor: 0.00001 @@ -43,7 +43,7 @@ Architecture: Backbone: name: MobileNetV3 scale: 0.5 - model_name: small + model_name: large small_stride: [ 1, 2, 2, 2 ] Neck: name: SequenceEncoder @@ -66,7 +66,7 @@ TRAIN: dataset: name: LMDBDateSet file_list: - - /Users/zhoujun20/Downloads/evaluation_new # dataset1 + - ./rec/train # dataset1 ratio_list: [ 0.4,0.6 ] transforms: - DecodeImage: # load image @@ -75,7 +75,7 @@ TRAIN: - CTCLabelEncode: # Class handling label - RecAug: - RecResizeImg: - image_shape: [ 3,32,320 ] + image_shape: [ 3,32,100 ] - keepKeys: keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list loader: @@ -88,14 +88,14 @@ EVAL: dataset: name: LMDBDateSet file_list: - - /home/zhoujun20/rec/lmdb/val + - ./rec/val/ transforms: - DecodeImage: # load image img_mode: BGR channel_first: False - CTCLabelEncode: # Class handling label - RecResizeImg: - image_shape: [ 3,32,320 ] + image_shape: [ 3,32,100 ] - keepKeys: keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list loader: diff --git a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml index adb4195b6612a6e86804a9fe1909a68bbbc78782..06576d315e8f3bb23a127f5e01e8da2ca5938cba 100644 --- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml +++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml @@ -64,9 +64,9 @@ Metric: TRAIN: dataset: name: SimpleDataSet - data_dir: /home/zhoujun20/rec + data_dir: ./rec file_list: - - /home/zhoujun20/rec/real_data.txt # dataset1 + - ./rec/train.txt # dataset1 ratio_list: [ 0.4,0.6 ] transforms: - DecodeImage: # load image @@ -87,9 +87,9 @@ TRAIN: EVAL: dataset: name: SimpleDataSet - data_dir: /home/zhoujun20/rec + data_dir: ./rec file_list: - - /home/zhoujun20/rec/label_val_all.txt + - ./rec/val.txt transforms: - DecodeImage: # load image img_mode: BGR diff --git a/configs/rec/rec_r34_vd_none_none_ctc.yml b/configs/rec/rec_r34_vd_none_none_ctc.yml new file mode 100644 index 0000000000000000000000000000000000000000..4e2367c966b8e9b03eccecfe62137f90ca4be949 --- /dev/null +++ b/configs/rec/rec_r34_vd_none_none_ctc.yml @@ -0,0 +1,105 @@ +Global: + use_gpu: false + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/res34_none_none_ctc/ + save_epoch_step: 500 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: 127 + # if pretrained_model is saved in static mode, load_static_weights must set to True + load_static_weights: True + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + max_text_length: 80 + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + character_type: 'ch' + use_space_char: False + infer_mode: False + use_tps: False + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + learning_rate: + name: Cosine + lr: 0.001 + warmup_epoch: 4 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + type: rec + algorithm: CRNN + Transform: + Backbone: + name: ResNet + layers: 34 + Neck: + name: SequenceEncoder + encoder_type: reshape + Head: + name: CTC + fc_decay: 0.00001 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +TRAIN: + dataset: + name: SimpleDataSet + data_dir: ./rec + file_list: + - ./rec/train.txt # dataset1 + ratio_list: [ 0.4,0.6 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecAug: + - RecResizeImg: + image_shape: [ 3,32,320 ] + - keepKeys: + keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list + loader: + batch_size: 256 + shuffle: True + drop_last: True + num_workers: 8 + +EVAL: + dataset: + name: SimpleDataSet + data_dir: ./rec + file_list: + - ./rec/val.txt + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [ 3,32,320 ] + - keepKeys: + keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list + loader: + shuffle: False + drop_last: False + batch_size: 256 + num_workers: 8