table_master.yml 3.4 KB
Newer Older
文幕地方's avatar
文幕地方 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
Global:
  use_gpu: true
  epoch_num: 17
  log_smooth_window: 20
  print_batch_step: 5
  save_model_dir: ./output/table_master/
  save_epoch_step: 17
  # evaluation is run every 400 iterations after the 0th iteration
  eval_batch_step: [0, 400]
  cal_metric_during_train: True
  pretrained_model:
  checkpoints:
  save_inference_dir:
  use_visualdl: False
  infer_img: ppstructure/docs/table/table.jpg
  save_res_path: output/table_master
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt
  infer_mode: False
  max_text_length: 500
  process_total_num: 0
  process_cut_num: 0


Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  lr:
    name: MultiStepDecay
    learning_rate: 0.001
    milestones: [12, 15]
    gamma: 0.1
    warmup_epoch: 0.02
  regularizer:
    name: 'L2'
    factor: 0.00000

Architecture:
  model_type: table
  algorithm: TableMaster
  Backbone:
    name: TableResNetExtra
    gcb_config:
      ratio: 0.0625
      headers: 1
      att_scale: False
      fusion_type: channel_add
      layers: [False, True, True, True]
    layers: [1,2,5,3]
  Head:
    name: TableMasterHead
    hidden_size: 512
    headers: 8
    dropout: 0
    d_ff: 2024
    max_text_length: 500

Loss:
  name: TableMasterLoss
  ignore_index: 42 # set to len of dict + 3

PostProcess:
  name: TableMasterLabelDecode
  box_shape: pad

Metric:
  name: TableMetric
  main_indicator: acc
  compute_bbox_metric: true # cost many time, set False for training

Train:
  dataset:
    name: PubTabDataSet
    data_dir: /home/zhoujun20/table/PubTabNe/pubtabnet/train/
    label_file_list: [/home/zhoujun20/table/PubTabNe/pubtabnet/PubTabNet_2.0.0_train.jsonl]
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - TableMasterLabelEncode:
          learn_empty_box: False
          merge_no_span_structure: True
          replace_empty_cell_token: True
      - ResizeTableImage:
          max_len: 480
          resize_bboxes: True
      - PaddingTableImage:
          size: [480, 480]
      - TableBoxEncode:
          use_xywh: true
      - NormalizeImage:
          scale: 1./255.
          mean: [0.5, 0.5, 0.5]
          std: [0.5, 0.5, 0.5]
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
          keep_keys: ['image', 'structure', 'bboxes', 'bbox_masks','shape']
  loader:
    shuffle: True
    batch_size_per_card: 8
    drop_last: True
    num_workers: 1

Eval:
  dataset:
    name: PubTabDataSet
    data_dir: /home/zhoujun20/table/PubTabNe/pubtabnet/val/
    label_file_list: [/home/zhoujun20/table/PubTabNe/pubtabnet/val_500.jsonl]
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - TableMasterLabelEncode:
          learn_empty_box: False
          merge_no_span_structure: True
          replace_empty_cell_token: True
      - ResizeTableImage:
          max_len: 480
          resize_bboxes: True
      - PaddingTableImage:
          size: [ 480, 480 ]
      - TableBoxEncode:
          use_xywh: true
      - NormalizeImage:
          scale: 1./255.
          mean: [ 0.5, 0.5, 0.5 ]
          std: [ 0.5, 0.5, 0.5 ]
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks','shape' ]
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 2
    num_workers: 8