SLANet_ch.yml 3.5 KB
Newer Older
文幕地方's avatar
文幕地方 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
Global:
  use_gpu: True
  epoch_num: 400
  log_smooth_window: 20
  print_batch_step: 20
  save_model_dir: ./output/SLANet_ch
  save_epoch_step: 400
  # evaluation is run every 331 iterations after the 0th iteration
  eval_batch_step: [0, 331]
  cal_metric_during_train: True
  pretrained_model: 
  checkpoints: 
  save_inference_dir: ./output/SLANet_ch/infer
  use_visualdl: False
  infer_img: doc/table/table.jpg
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_structure_dict_ch.txt
  character_type: en
  max_text_length: &max_text_length 500
  box_format: &box_format xyxyxyxy # 'xywh', 'xyxy', 'xyxyxyxy'
  infer_mode: False
  use_sync_bn: True
  save_res_path: output/infer

Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  clip_norm: 5.0
  lr:
    learning_rate: 0.001
  regularizer:
    name: 'L2'
    factor: 0.00000

Architecture:
  model_type: table
  algorithm: SLANet
  Backbone:
    name: PPLCNet
    scale: 1.0
    pretrained: True
    use_ssld: True
  Neck:
    name: CSPPAN
    out_channels: 96
  Head:
    name: SLAHead
    hidden_size: 256
    max_text_length: *max_text_length
    loc_reg_num: &loc_reg_num 8

Loss:
  name: SLALoss
  structure_weight: 1.0
  loc_weight: 2.0
  loc_loss: smooth_l1

PostProcess:
  name: TableLabelDecode
  merge_no_span_structure: &merge_no_span_structure True

Metric:
  name: TableMetric
  main_indicator: acc
  compute_bbox_metric: False
  loc_reg_num: *loc_reg_num
  box_format: *box_format
  del_thead_tbody: True

Train:
  dataset:
    name: PubTabDataSet
    data_dir: /path/to/train_data/
    label_file_list: [/path/to/train_gt.txt]
    transforms:
      - DecodeImage:
          img_mode: BGR
          channel_first: False
      - TableLabelEncode:
          learn_empty_box: False
          merge_no_span_structure: *merge_no_span_structure
          replace_empty_cell_token: False
          loc_reg_num: *loc_reg_num
          max_text_length: *max_text_length
      - TableBoxEncode:
          in_box_format: *box_format
          out_box_format: *box_format
      - ResizeTableImage:
          max_len: 488
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - PaddingTableImage:
          size: [488, 488]
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  loader:
    shuffle: True
    batch_size_per_card: 48
    drop_last: True
    num_workers: 1

Eval:
  dataset:
    name: PubTabDataSet
    data_dir: /path/to/val_data/
    label_file_list: [/path/to/val_gt.txt]
    transforms:
      - DecodeImage:
          img_mode: BGR
          channel_first: False
      - TableLabelEncode:
          learn_empty_box: False
          merge_no_span_structure: *merge_no_span_structure
          replace_empty_cell_token: False
          loc_reg_num: *loc_reg_num
          max_text_length: *max_text_length
      - TableBoxEncode:
          in_box_format: *box_format
          out_box_format: *box_format
      - ResizeTableImage:
          max_len: 488
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - PaddingTableImage:
          size: [488, 488]
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 48
    num_workers: 1