mask_rcnn_r50_fpn_1x.yml 2.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
architecture: MaskRCNN
use_gpu: true
max_iters: 180000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r50_fpn_1x/model_final
num_classes: 81
load_static_weights: True

# Model Achitecture
MaskRCNN:
  # model anchor info flow
  anchor: AnchorRPN
  proposal: Proposal
  mask: Mask
  # model feat info flow
  backbone: ResNet
  neck: FPN
  rpn_head: RPNHead
  bbox_head: BBoxHead
  mask_head: MaskHead

ResNet:
  # index 0 stands for res2
  depth: 50
  norm_type: bn
  freeze_at: 0
  return_idx: [0,1,2,3]
  num_stages: 4

FPN:
  in_channels: [256, 512, 1024, 2048]
  out_channel: 256
  min_level: 0
  max_level: 4
  spatial_scale: [0.25, 0.125, 0.0625, 0.03125]


RPNHead:
  rpn_feat:
    name: RPNFeat
    feat_in: 256
    feat_out: 256
  anchor_per_position: 3
  rpn_channel: 256

BBoxHead:
  bbox_feat:
    name: BBoxFeat
    roi_extractor:
      name: RoIExtractor
      resolution: 7
      sampling_ratio: 2
    head_feat:
      name: TwoFCHead
      in_dim: 256
      mlp_dim: 1024
  in_feat: 1024

MaskHead:
  mask_feat:
    name: MaskFeat
    num_convs: 4
    feat_in: 256
    feat_out: 256
    mask_roi_extractor:
      name: RoIExtractor
      resolution: 14
      sampling_ratio: 2
    share_bbox_feat: False
  feat_in: 256

AnchorRPN:
  anchor_generator:
    name: AnchorGeneratorRPN
    aspect_ratios: [0.5, 1.0, 2.0]
    anchor_start_size: 32
    stride: [4., 4.]
  anchor_target_generator:
    name: AnchorTargetGeneratorRPN
    batch_size_per_im: 256
    fg_fraction: 0.5
    negative_overlap: 0.3
    positive_overlap: 0.7
    straddle_thresh: 0.0

Proposal:
  proposal_generator:
    name: ProposalGenerator
    min_size: 0.0
    nms_thresh: 0.7
    train_pre_nms_top_n: 2000
    train_post_nms_top_n: 2000
    infer_pre_nms_top_n: 1000
    infer_post_nms_top_n: 1000
  proposal_target_generator:
    name: ProposalTargetGenerator
    batch_size_per_im: 512
    bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],]
    bg_thresh_hi: [0.5,]
    bg_thresh_lo: [0.0,]
    fg_thresh: [0.5,]
    fg_fraction: 0.25
  bbox_post_process: # used in infer
    name: BBoxPostProcess
    # decode -> clip -> nms
    decode_clip_nms:
      name: DecodeClipNms
      keep_top_k: 100
      score_threshold: 0.05
      nms_threshold: 0.5

Mask:
  mask_target_generator:
    name: MaskTargetGenerator
    mask_resolution: 28
  mask_post_process:
    name: MaskPostProcess
    mask_resolution: 28


# Train
LearningRate:
  base_lr: 0.01
  schedulers:
  - !PiecewiseDecay
    gamma: 0.1
    milestones: [120000, 160000]
  - !LinearWarmup
    start_factor: 0.3333
    steps: 500

OptimizerBuilder:
  optimizer:
    momentum: 0.9
    type: Momentum
  regularizer:
    factor: 0.0001
    type: L2

_READER_: 'mask_reader.yml'