SLANet.yml 3.5 KB
Newer Older
文幕地方's avatar
文幕地方 已提交
1 2 3 4 5 6 7 8 9 10 11
Global:
  use_gpu: true
  epoch_num: 400
  log_smooth_window: 20
  print_batch_step: 20
  save_model_dir: ./output/SLANet
  save_epoch_step: 400
  # evaluation is run every 1000 iterations after the 0th iteration
  eval_batch_step: [0, 1000]
  cal_metric_during_train: True
  pretrained_model:
文幕地方's avatar
文幕地方 已提交
12
  checkpoints:
文幕地方's avatar
文幕地方 已提交
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
  save_inference_dir: ./output/SLANet/infer
  use_visualdl: False
  infer_img: doc/table/table.jpg
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  character_type: en
  max_text_length: &max_text_length 500
  box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
  infer_mode: False
  use_sync_bn: True
  save_res_path: 'output/infer'

Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  clip_norm: 5.0
  lr:
    learning_rate: 0.001
  regularizer:
    name: 'L2'
    factor: 0.00000

Architecture:
  model_type: table
  algorithm: SLANet
  Backbone:
    name: PPLCNet
    scale: 1.0
    pretrained: true
    use_ssld: true
  Neck:
    name: CSPPAN
    out_channels: 96
  Head:
    name: SLAHead
    hidden_size: 256
    max_text_length: *max_text_length
    loc_reg_num: &loc_reg_num 4

Loss:
  name: SLANetLoss
  structure_weight: 1.0
  loc_weight: 2.0
  loc_loss: smooth_l1

PostProcess:
  name: TableLabelDecode
61
  merge_no_span_structure: &merge_no_span_structure False
文幕地方's avatar
文幕地方 已提交
62 63 64 65 66 67 68 69 70 71 72

Metric:
  name: TableMetric
  main_indicator: acc
  compute_bbox_metric: False
  loc_reg_num: *loc_reg_num
  box_format: *box_format

Train:
  dataset:
    name: PubTabDataSet
文幕地方's avatar
文幕地方 已提交
73 74
    data_dir: train_data/table/pubtabnet/train/
    label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
文幕地方's avatar
文幕地方 已提交
75 76 77 78 79 80
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - TableLabelEncode:
          learn_empty_box: False
81
          merge_no_span_structure: *merge_no_span_structure
文幕地方's avatar
文幕地方 已提交
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
          replace_empty_cell_token: False
          loc_reg_num: *loc_reg_num
          max_text_length: *max_text_length
      - TableBoxEncode:
          box_format: *box_format
      - ResizeTableImage:
          max_len: 488
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - PaddingTableImage:
          size: [488, 488]
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  loader:
    shuffle: True
    batch_size_per_card: 48
    drop_last: True
    num_workers: 1

Eval:
  dataset:
    name: PubTabDataSet
    data_dir: /home/zhoujun20/table/PubTabNe/pubtabnet/val/
文幕地方's avatar
文幕地方 已提交
109
    label_file_list: [/home/zhoujun20/table/PubTabNe/pubtabnet/val_500.jsonl]
文幕地方's avatar
文幕地方 已提交
110 111 112 113 114 115
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - TableLabelEncode:
          learn_empty_box: False
116
          merge_no_span_structure: *merge_no_span_structure
文幕地方's avatar
文幕地方 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
          replace_empty_cell_token: False
          loc_reg_num: *loc_reg_num
          max_text_length: *max_text_length
      - TableBoxEncode:
          box_format: *box_format
      - ResizeTableImage:
          max_len: 488
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - PaddingTableImage:
          size: [488, 488]
      - ToCHWImage:
      - KeepKeys:
          keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 48
    num_workers: 1