det_r50_fce_ctw.yml 3.3 KB
Newer Older
z37757's avatar
z37757 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
Global:
  use_gpu: true
  epoch_num: 1500
  log_smooth_window: 20
  print_batch_step: 20
  save_model_dir: ./output/fce_r50_ctw/
  save_epoch_step: 100
  # evaluation is run every 835 iterations
  eval_batch_step: [0, 835]
  cal_metric_during_train: False
  pretrained_model: ../pretrain_models/ResNet50_vd_ssld_pretrained 
  checkpoints: #output/fce_r50_ctw/latest
  save_inference_dir: 
  use_visualdl: False
  infer_img: doc/imgs_en/img_10.jpg
  save_res_path: ./output/fce_r50_ctw/predicts_ctw.txt


Architecture:
  model_type: det
  algorithm: FCE
  Transform:
  Backbone:
    name: ResNet
    layers: 50
    dcn_stage: [False, True, True, True]
    out_indices: [1,2,3]
  Neck:
    name: FCEFPN
    in_channels: [512, 1024, 2048]
    out_channels: 256
    has_extra_convs: False
    extra_stage: 0
  Head:
    name: FCEHead
    in_channels: 256
    scales: [8, 16, 32]
    fourier_degree: 5
Loss:
  name: FCELoss
  fourier_degree: 5
  num_sample: 50
  
Optimizer:
  name: Adam
  beta1: 0.9
  beta2: 0.999
  lr:
    learning_rate: 0.0001
  regularizer:
    name: 'L2'
    factor: 0

PostProcess:
  name: FCEPostProcess
  scales: [8, 16, 32]
  alpha: 1.0
  beta: 1.0
  fourier_degree: 5

Metric:
  name: DetFCEMetric
  main_indicator: hmean

Train:
  dataset:
    name: SimpleDataSet
    data_dir: /data/Dataset/OCR_det/ctw1500/imgs/
    label_file_list: 
      - /data/Dataset/OCR_det/ctw1500/imgs/training.txt
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
          ignore_orientation: True
      - DetLabelEncode: # Class handling label
      - ColorJitter: 
          brightness: 0.142
          saturation: 0.5
          contrast: 0.5
      - RandomScaling: 
      - RandomCropFlip:
          crop_ratio: 0.5
      - RandomCropPolyInstances:
          crop_ratio: 0.8
          min_side_ratio: 0.3
      - RandomRotatePolyInstances:
          rotate_ratio: 0.5
          max_angle: 30
          pad_with_fixed_color: False
      - SquareResizePad:
          target_size: 800
          pad_ratio: 0.6
      - IaaAugment:
          augmenter_args:
            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
      - FCENetTargets:
          fourier_degree: 5
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - ToCHWImage:
      - KeepKeys:
          keep_keys: ['image', 'p3_maps', 'p4_maps', 'p5_maps'] # dataloader will return list in this order
  loader:
    shuffle: True
    drop_last: False
    batch_size_per_card: 6
    num_workers: 8

Eval:
  dataset:
    name: SimpleDataSet
    data_dir: /data/Dataset/OCR_det/ctw1500/imgs/
    label_file_list:
      - /data/Dataset/OCR_det/ctw1500/imgs/test.txt
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
          ignore_orientation: True
      - DetLabelEncode: # Class handling label
      - DetResizeForTest:
          # resize_long: 1280
          rescale_img: [1080, 736]
      - NormalizeImage:
          scale: 1./255.
          mean: [0.485, 0.456, 0.406]
          std: [0.229, 0.224, 0.225]
          order: 'hwc'
      - Pad: 
      - ToCHWImage:
      - KeepKeys:
          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
  loader:
    shuffle: False
    drop_last: False
    batch_size_per_card: 1 # must be 1
    num_workers: 2