ch_PP-OCRv4_rec_distill.yml 4.7 KB
Newer Older
D
Double_V 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
Global:
  debug: false
  use_gpu: true
  epoch_num: 200
  log_smooth_window: 20
  print_batch_step: 10
  save_model_dir: ./output/rec_dkd_400w_svtr_ctc_lcnet_blank_dkd0.1/
  save_epoch_step: 40
  eval_batch_step:
  - 0
  - 2000
  cal_metric_during_train: true
  pretrained_model: null
  checkpoints: ./output/rec_dkd_400w_svtr_ctc_lcnet_blank_dkd0.1/latest
  save_inference_dir: null
  use_visualdl: false
  infer_img: doc/imgs_words/ch/word_1.jpg
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
  max_text_length: 25
  infer_mode: false
  use_space_char: true
  distributed: true
  save_res_path: ./output/rec/predicts_ppocrv3.txt
Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  lr:
    name: Cosine
    learning_rate: 0.001
    warmup_epoch: 2
  regularizer:
    name: L2
    factor: 3.0e-05
Architecture:
  model_type: rec
  name: DistillationModel
  algorithm: Distillation
  Models:
    Teacher:
      pretrained: 
      freeze_params: true
      return_all_feats: true
      model_type: rec
      algorithm: SVTR
      Transform: null
      Backbone:
        name: SVTRNet
        img_size:
        - 48
        - 320
        out_char_num: 40
        out_channels: 192
        patch_merging: Conv
        embed_dim:
        - 64
        - 128
        - 256
        depth:
        - 3
        - 6
        - 3
        num_heads:
        - 2
        - 4
        - 8
        mixer:
        - Conv
        - Conv
        - Conv
        - Conv
        - Conv
        - Conv
        - Global
        - Global
        - Global
        - Global
        - Global
        - Global
        local_mixer:
        - - 5
          - 5
        - - 5
          - 5
        - - 5
          - 5
        last_stage: false
        prenorm: true
      Head:
        name: MultiHead
        head_list:
          - CTCHead:
              Neck:
                name: svtr
                dims: 120
                depth: 2
                hidden_dims: 120
                kernel_size: [1, 3]
                use_guide: True
              Head:
                fc_decay: 0.00001
          - NRTRHead:
              nrtr_dim: 384
              max_text_length: *max_text_length
    Student:
      pretrained: 
      freeze_params: false
      return_all_feats: true
      model_type: rec
      algorithm: SVTR
      Transform: null
      Backbone:
        name: PPLCNetV3
        scale: 0.95
      Head:
        name: MultiHead
        head_list:
          - CTCHead:
              Neck:
                name: svtr
                dims: 120
                depth: 2
                hidden_dims: 120
                kernel_size: [1, 3]
                use_guide: True
              Head:
                fc_decay: 0.00001
          - NRTRHead:
              nrtr_dim: 384
              max_text_length: *max_text_length
Loss:
  name: CombinedLoss
  loss_config_list:
  - DistillationDKDLoss:
      weight: 0.1
      model_name_pairs:
      - - Student
        - Teacher
      key: head_out
      multi_head: true
      alpha: 1.0
      beta: 2.0
      dis_head: gtc
      name: dkd
  - DistillationCTCLoss:
      weight: 1.0
      model_name_list:
      - Student
      key: head_out
      multi_head: true
  - DistillationNRTRLoss:
      weight: 1.0
      smoothing: false
      model_name_list:
      - Student
      key: head_out
      multi_head: true
  - DistillCTCLogits:
      weight: 1.0
      reduction: mean
      model_name_pairs:
      - - Student
        - Teacher
      key: head_out
PostProcess:
  name: DistillationCTCLabelDecode
  model_name:
  - Student
  key: head_out
  multi_head: true
Metric:
  name: DistillationMetric
  base_metric_name: RecMetric
  main_indicator: acc
  key: Student
  ignore_space: false
Train:
  dataset:
    name: SimpleDataSet
    data_dir: ./train_data/
    label_file_list:
    - ./train_data/train_list.txt
    ratio_list:
    - 1.0
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - RecAug:
    - MultiLabelEncode:
        gtc_encode: NRTRLabelEncode
    - KeepKeys:
        keep_keys:
        - image
        - label_ctc
        - label_gtc
        - length
        - valid_ratio
  loader:
    shuffle: true
    batch_size_per_card: 128
    drop_last: true
    num_workers: 8
    use_shared_memory: true
Eval:
  dataset:
    name: SimpleDataSet
    data_dir: ./train_data
    label_file_list:
    - ./train_data/val_list.txt
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - MultiLabelEncode:
        gtc_encode: NRTRLabelEncode
    - RecResizeImg:
        image_shape: [3, 48, 320]
    - KeepKeys:
        keep_keys:
        - image
        - label_ctc
        - label_gtc
        - length
        - valid_ratio
  loader:
    shuffle: false
    drop_last: false
    batch_size_per_card: 128
    num_workers: 4
profiler_options: null