SLANet.yml 3.5 KB
Newer Older
文幕地方's avatar
文幕地方 已提交
1 2
Global:
  use_gpu: true
文幕地方's avatar
文幕地方 已提交
3
  epoch_num: 100
文幕地方's avatar
文幕地方 已提交
4 5 6 7 8 9 10 11
  log_smooth_window: 20
  print_batch_step: 20
  save_model_dir: ./output/SLANet
  save_epoch_step: 400
  # evaluation is run every 1000 iterations after the 0th iteration
  eval_batch_step: [0, 1000]
  cal_metric_during_train: True
  pretrained_model:
文幕地方's avatar
文幕地方 已提交
12
  checkpoints:
文幕地方's avatar
文幕地方 已提交
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
  save_inference_dir: ./output/SLANet/infer
  use_visualdl: False
  infer_img: doc/table/table.jpg
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  character_type: en
  max_text_length: &max_text_length 500
  box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
  infer_mode: False
  use_sync_bn: True
  save_res_path: 'output/infer'

Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  clip_norm: 5.0
  lr:
文幕地方's avatar
文幕地方 已提交
31
    name: Piecewise
文幕地方's avatar
文幕地方 已提交
32
    learning_rate: 0.001
文幕地方's avatar
文幕地方 已提交
33 34
    decay_epochs : [40, 50]
    values : [0.001, 0.0001, 0.00005]
文幕地方's avatar
文幕地方 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
  regularizer:
    name: 'L2'
    factor: 0.00000

Architecture:
  model_type: table
  algorithm: SLANet
  Backbone:
    name: PPLCNet
    scale: 1.0
    pretrained: true
    use_ssld: true
  Neck:
    name: CSPPAN
    out_channels: 96
  Head:
    name: SLAHead
    hidden_size: 256
    max_text_length: *max_text_length
    loc_reg_num: &loc_reg_num 4

Loss:
  name: SLANetLoss
  structure_weight: 1.0
  loc_weight: 2.0
  loc_loss: smooth_l1

PostProcess:
  name: TableLabelDecode
64
  merge_no_span_structure: &merge_no_span_structure False
文幕地方's avatar
文幕地方 已提交
65 66 67 68 69 70 71 72 73 74 75

Metric:
  name: TableMetric
  main_indicator: acc
  compute_bbox_metric: False
  loc_reg_num: *loc_reg_num
  box_format: *box_format

Train:
  dataset:
    name: PubTabDataSet
文幕地方's avatar
文幕地方 已提交
76 77
    data_dir: train_data/table/pubtabnet/train/
    label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
文幕地方's avatar
文幕地方 已提交
78 79 80 81 82 83
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - TableLabelEncode:
          learn_empty_box: False
84
          merge_no_span_structure: *merge_no_span_structure
文幕地方's avatar
文幕地方 已提交
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
          replace_empty_cell_token: False
          loc_reg_num: *loc_reg_num
          max_text_length: *max_text_length
      - TableBoxEncode:
          box_format: *box_format
      - ResizeTableImage:
          max_len: 488
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - PaddingTableImage:
          size: [488, 488]
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  loader:
    shuffle: True
    batch_size_per_card: 48
    drop_last: True
    num_workers: 1

Eval:
  dataset:
    name: PubTabDataSet
文幕地方's avatar
文幕地方 已提交
111 112
    data_dir: train_data/table/pubtabnet/val/
    label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
文幕地方's avatar
文幕地方 已提交
113 114 115 116 117 118
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - TableLabelEncode:
          learn_empty_box: False
119
          merge_no_span_structure: *merge_no_span_structure
文幕地方's avatar
文幕地方 已提交
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
          replace_empty_cell_token: False
          loc_reg_num: *loc_reg_num
          max_text_length: *max_text_length
      - TableBoxEncode:
          box_format: *box_format
      - ResizeTableImage:
          max_len: 488
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - PaddingTableImage:
          size: [488, 488]
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 48
    num_workers: 1