ch_PP-OCRv4_rec_distill.yml

Global:
  debug: false
  use_gpu: true
  epoch_num: 200
  log_smooth_window: 20
  print_batch_step: 10
  save_model_dir: ./output/rec_dkd_400w_svtr_ctc_lcnet_blank_dkd0.1/
  save_epoch_step: 40
  eval_batch_step:
  - 0
  - 2000
  cal_metric_during_train: true
  pretrained_model: null
  checkpoints: ./output/rec_dkd_400w_svtr_ctc_lcnet_blank_dkd0.1/latest
  save_inference_dir: null
  use_visualdl: false
  infer_img: doc/imgs_words/ch/word_1.jpg
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
  max_text_length: 25
  infer_mode: false
  use_space_char: true
  distributed: true
  save_res_path: ./output/rec/predicts_ppocrv3.txt
Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  lr:
    name: Cosine
    learning_rate: 0.001
    warmup_epoch: 2
  regularizer:
    name: L2
    factor: 3.0e-05
Architecture:
  model_type: rec
  name: DistillationModel
  algorithm: Distillation
  Models:
    Teacher:
      pretrained: 
      freeze_params: true
      return_all_feats: true
      model_type: rec
      algorithm: SVTR
      Transform: null
      Backbone:
        name: SVTRNet
        img_size:
        - 48
        - 320
        out_char_num: 40
        out_channels: 192
        patch_merging: Conv
        embed_dim:
        - 64
        - 128
        - 256
        depth:
        - 3
        - 6
        - 3
        num_heads:
        - 2
        - 4
        - 8
        mixer:
        - Conv
        - Conv
        - Conv
        - Conv
        - Conv
        - Conv
        - Global
        - Global
        - Global
        - Global
        - Global
        - Global
        local_mixer:
        - - 5
          - 5
        - - 5
          - 5
        - - 5
          - 5
        last_stage: false
        prenorm: true
      Head:
        name: MultiHead
        head_list:
          - CTCHead:
              Neck:
                name: svtr
                dims: 120
                depth: 2
                hidden_dims: 120
                kernel_size: [1, 3]
                use_guide: True
              Head:
                fc_decay: 0.00001
          - NRTRHead:
              nrtr_dim: 384
              max_text_length: *max_text_length
    Student:
      pretrained: 
      freeze_params: false
      return_all_feats: true
      model_type: rec
      algorithm: SVTR
      Transform: null
      Backbone:
        name: PPLCNetV3
        scale: 0.95
      Head:
        name: MultiHead
        head_list:
          - CTCHead:
              Neck:
                name: svtr
                dims: 120
                depth: 2
                hidden_dims: 120
                kernel_size: [1, 3]
                use_guide: True
              Head:
                fc_decay: 0.00001
          - NRTRHead:
              nrtr_dim: 384
              max_text_length: *max_text_length
Loss:
  name: CombinedLoss
  loss_config_list:
  - DistillationDKDLoss:
      weight: 0.1
      model_name_pairs:
      - - Student
        - Teacher
      key: head_out
      multi_head: true
      alpha: 1.0
      beta: 2.0
      dis_head: gtc
      name: dkd
  - DistillationCTCLoss:
      weight: 1.0
      model_name_list:
      - Student
      key: head_out
      multi_head: true
  - DistillationNRTRLoss:
      weight: 1.0
      smoothing: false
      model_name_list:
      - Student
      key: head_out
      multi_head: true
  - DistillCTCLogits:
      weight: 1.0
      reduction: mean
      model_name_pairs:
      - - Student
        - Teacher
      key: head_out
PostProcess:
  name: DistillationCTCLabelDecode
  model_name:
  - Student
  key: head_out
  multi_head: true
Metric:
  name: DistillationMetric
  base_metric_name: RecMetric
  main_indicator: acc
  key: Student
  ignore_space: false
Train:
  dataset:
    name: SimpleDataSet
    data_dir: ./train_data/
    label_file_list:
    - ./train_data/train_list.txt
    ratio_list:
    - 1.0
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - RecAug:
    - MultiLabelEncode:
        gtc_encode: NRTRLabelEncode
    - KeepKeys:
        keep_keys:
        - image
        - label_ctc
        - label_gtc
        - length
        - valid_ratio
  loader:
    shuffle: true
    batch_size_per_card: 128
    drop_last: true
    num_workers: 8
    use_shared_memory: true
Eval:
  dataset:
    name: SimpleDataSet
    data_dir: ./train_data
    label_file_list:
    - ./train_data/val_list.txt
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - MultiLabelEncode:
        gtc_encode: NRTRLabelEncode
    - RecResizeImg:
        image_shape: [3, 48, 320]
    - KeepKeys:
        keep_keys:
        - image
        - label_ctc
        - label_gtc
        - length
        - valid_ratio
  loader:
    shuffle: false
    drop_last: false
    batch_size_per_card: 128
    num_workers: 4
profiler_options: null