Global: use_gpu: True epoch_num: 20 log_smooth_window: 20 print_batch_step: 10 save_model_dir: ./output/rec/vitstr_none_ce/ save_epoch_step: 1 # evaluation is run every 2000 iterations after the 0th iteration# eval_batch_step: [0, 50] cal_metric_during_train: True pretrained_model: checkpoints: save_inference_dir: use_visualdl: False infer_img: doc/imgs_words_en/word_10.png # for data or label process character_dict_path: ppocr/utils/EN_symbol_dict.txt max_text_length: 25 infer_mode: False use_space_char: False save_res_path: ./output/rec/predicts_vitstr.txt Optimizer: name: Adadelta epsilon: 0.00000001 rho: 0.95 clip_norm: 5.0 lr: learning_rate: 1.0 Architecture: model_type: rec algorithm: ViTSTR in_channels: 1 Transform: Backbone: name: ViTSTR scale: tiny Neck: name: SequenceEncoder encoder_type: reshape Head: name: CTCHead Loss: name: CELoss smoothing: False with_all: True PostProcess: name: ViTSTRLabelDecode Metric: name: RecMetric main_indicator: acc Train: dataset: name: LMDBDataSet data_dir: ./train_data/data_lmdb_release/training transforms: - DecodeImage: # load image img_mode: BGR channel_first: False - ViTSTRLabelEncode: # Class handling label - GrayRecResizeImg: image_shape: [224, 224] # W H resize_type: PIL # PIL or OpenCV inter_type: 'Image.BICUBIC' scale: false - KeepKeys: keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order loader: shuffle: True batch_size_per_card: 48 drop_last: True num_workers: 2 Eval: dataset: name: LMDBDataSet data_dir: ./train_data/data_lmdb_release/validation transforms: - DecodeImage: # load image img_mode: BGR channel_first: False - ViTSTRLabelEncode: # Class handling label - GrayRecResizeImg: image_shape: [224, 224] # W H resize_type: PIL # PIL or OpenCV inter_type: 'Image.BICUBIC' scale: false - KeepKeys: keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order loader: shuffle: False drop_last: False batch_size_per_card: 256 num_workers: 2