table_master.yml 3.1 KB
Newer Older
文幕地方's avatar
文幕地方 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
Global:
  use_gpu: true
  epoch_num: 17
  log_smooth_window: 20
  print_batch_step: 100
  save_model_dir: ./output/table_master/
  save_epoch_step: 17
  eval_batch_step: [0,  6259]
  cal_metric_during_train: true
  pretrained_model: null
  checkpoints: 
  save_inference_dir: output/table_master/infer
  use_visualdl: false
  infer_img: ppstructure/docs/table/table.jpg
  save_res_path: ./output/table_master
  character_dict_path: ppocr/utils/dict/table_master_structure_dict.txt
  infer_mode: false
  max_text_length: 500
  process_total_num: 0
  process_cut_num: 0


Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  lr:
    name: MultiStepDecay
    learning_rate: 0.001
    milestones: [12, 15]
    gamma: 0.1
    warmup_epoch: 0.02
  regularizer:
    name: L2
    factor: 0.0

Architecture:
  model_type: table
  algorithm: TableMaster
  Backbone:
    name: TableResNetExtra
    gcb_config:
      ratio: 0.0625
      headers: 1
      att_scale: False
      fusion_type: channel_add
      layers: [False, True, True, True]
    layers: [1,2,5,3]
  Head:
    name: TableMasterHead
    hidden_size: 512
    headers: 8
    dropout: 0
    d_ff: 2024
    max_text_length: 500

Loss:
  name: TableMasterLoss
  ignore_index: 42 # set to len of dict + 3

PostProcess:
  name: TableMasterLabelDecode
  box_shape: pad

Metric:
  name: TableMetric
  main_indicator: acc
  compute_bbox_metric: False

Train:
  dataset:
    name: PubTabDataSet
    data_dir: ./train_data/pubtabnet/train
    label_file_list: [./train_data/pubtabnet/train.jsonl]
    transforms:
      - DecodeImage:
          img_mode: BGR
          channel_first: False
      - TableMasterLabelEncode:
          learn_empty_box: False
          merge_no_span_structure: True
          replace_empty_cell_token: True
      - ResizeTableImage:
          max_len: 480
          resize_bboxes: True
      - PaddingTableImage:
          size: [480, 480]
      - TableBoxEncode:
          use_xywh: True
      - NormalizeImage:
          scale: 1./255.
          mean: [0.5, 0.5, 0.5]
          std: [0.5, 0.5, 0.5]
          order: hwc
      - ToCHWImage: null
      - KeepKeys:
          keep_keys: [image, structure, bboxes, bbox_masks, shape]
  loader:
    shuffle: True
    batch_size_per_card: 10
    drop_last: True
    num_workers: 8

Eval:
  dataset:
    name: PubTabDataSet
    data_dir: ./train_data/pubtabnet/test/
    label_file_list: [./train_data/pubtabnet/test.jsonl]
    transforms:
      - DecodeImage:
          img_mode: BGR
          channel_first: False
      - TableMasterLabelEncode:
          learn_empty_box: False
          merge_no_span_structure: True
          replace_empty_cell_token: True
      - ResizeTableImage:
          max_len: 480
          resize_bboxes: True
      - PaddingTableImage:
          size: [480, 480]
      - TableBoxEncode:
          use_xywh: True
      - NormalizeImage:
          scale: 1./255.
          mean: [0.5, 0.5, 0.5]
          std: [0.5, 0.5, 0.5]
          order: hwc
      - ToCHWImage: null
      - KeepKeys:
          keep_keys: [image, structure, bboxes, bbox_masks, shape]
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 10
    num_workers: 8