Global: use_gpu: True epoch_num: 20 log_smooth_window: 20 print_batch_step: 10 save_model_dir: ./output/rec/svtr/ save_epoch_step: 1 # evaluation is run every 2000 iterations after the 0th iteration eval_batch_step: [0, 2000] cal_metric_during_train: True pretrained_model: checkpoints: save_inference_dir: use_visualdl: False infer_img: doc/imgs_words_en/word_10.png # for data or label process character_dict_path: character_type: en max_text_length: 25 infer_mode: False use_space_char: False save_res_path: ./output/rec/predicts_svtr_tiny.txt d2s_train_image_shape: [3, 64, 256] Optimizer: name: AdamW beta1: 0.9 beta2: 0.99 epsilon: 1.e-8 weight_decay: 0.05 no_weight_decay_name: norm pos_embed one_dim_param_no_weight_decay: True lr: name: Cosine learning_rate: 0.0005 warmup_epoch: 2 Architecture: model_type: rec algorithm: SVTR Transform: name: STN_ON tps_inputsize: [32, 64] tps_outputsize: [32, 100] num_control_points: 20 tps_margins: [0.05,0.05] stn_activation: none Backbone: name: SVTRNet img_size: [32, 100] out_char_num: 25 # W//4 or W//8 or W/12 out_channels: 192 patch_merging: 'Conv' embed_dim: [64, 128, 256] depth: [3, 6, 3] num_heads: [2, 4, 8] mixer: ['Local','Local','Local','Local','Local','Local','Global','Global','Global','Global','Global','Global'] local_mixer: [[7, 11], [7, 11], [7, 11]] last_stage: True prenorm: False Neck: name: SequenceEncoder encoder_type: reshape Head: name: CTCHead Loss: name: CTCLoss PostProcess: name: CTCLabelDecode Metric: name: RecMetric main_indicator: acc Train: dataset: name: LMDBDataSet data_dir: ./train_data/data_lmdb_release/training/ transforms: - DecodeImage: # load image img_mode: BGR channel_first: False - SVTRRecAug: aug_type: 0 # or 1 - CTCLabelEncode: # Class handling label - SVTRRecResizeImg: image_shape: [3, 64, 256] padding: False - KeepKeys: keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order loader: shuffle: True batch_size_per_card: 512 drop_last: True num_workers: 8 Eval: dataset: name: LMDBDataSet data_dir: ./train_data/data_lmdb_release/evaluation/ transforms: - DecodeImage: # load image img_mode: BGR channel_first: False - CTCLabelEncode: # Class handling label - SVTRRecResizeImg: image_shape: [3, 64, 256] padding: False - KeepKeys: keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order loader: shuffle: False drop_last: False batch_size_per_card: 256 num_workers: 2