ssd_mobilenet_v1_voc.yml 3.1 KB
Newer Older
1
architecture: SSD
2
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_coco_pretrained.tar
3
use_gpu: true
4
max_iters: 28000
5 6 7
snapshot_iter: 2000
log_smooth_window: 1
metric: VOC
8
map_type: 11point
9 10
save_dir: output
weights: output/ssd_mobilenet_v1_voc/model_final/
11
# 20(label_class) + 1(background)
12
num_classes: 21
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27

SSD:
  backbone: MobileNet
  multi_box_head: MultiBoxHead
  output_decoder:
    background_label: 0
    keep_top_k: 200
    nms_eta: 1.0
    nms_threshold: 0.45
    nms_top_k: 400
    score_threshold: 0.01

MobileNet:
  norm_decay: 0.
  conv_group_scale: 1
28
  conv_learning_rate: 0.1
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
  extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
  with_extra_blocks: true

MultiBoxHead:
  aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
  base_size: 300
  flip: true
  max_ratio: 90
  max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0]
  min_ratio: 20
  min_sizes: [60.0, 105.0, 150.0, 195.0, 240.0, 285.0]
  offset: 0.5

LearningRate:
  schedulers:
  - !PiecewiseDecay
    milestones: [10000, 15000, 20000, 25000]
    values: [0.001, 0.0005, 0.00025, 0.0001, 0.00001]

OptimizerBuilder:
  optimizer:
    momentum: 0.0
    type: RMSPropOptimizer
  regularizer:
    factor: 0.00005
    type: L2

56 57 58 59
TrainReader:
  inputs_def:
    image_shape: [3, 300, 300]
    fields: ['image', 'gt_bbox', 'gt_class']
60
  dataset:
61 62
    !VOCDataSet
    anno_path: trainval.txt
63 64
    dataset_dir: dataset/voc
    use_default_label: true
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
  sample_transforms:
  - !DecodeImage
    to_rgb: true
  - !RandomDistort
    brightness_lower: 0.875
    brightness_upper: 1.125
    is_order: true
  - !RandomExpand
    fill_value: [127.5, 127.5, 127.5]
  - !RandomCrop
    allow_no_crop: false
  - !NormalizeBox {}
  - !ResizeImage
    interp: 1
    target_size: 300
    use_cv2: false
  - !RandomFlipImage
    is_normalized: true
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [127.5, 127.5, 127.5]
    std: [127.502231, 127.502231, 127.502231]
  batch_size: 32
  shuffle: true
  drop_last: true
  worker_num: 8
  bufsize: 16
93
  use_process: true
94 95 96 97 98

EvalReader:
  inputs_def:
    image_shape: [3, 300, 300]
    fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
99
  dataset:
100 101
    !VOCDataSet
    anno_path: test.txt
102 103
    dataset_dir: dataset/voc
    use_default_label: true
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
  sample_transforms:
  - !DecodeImage
    to_rgb: true
  - !NormalizeBox {}
  - !ResizeImage
    interp: 1
    target_size: 300
    use_cv2: false
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [127.5, 127.5, 127.5]
    std: [127.502231, 127.502231, 127.502231]
  batch_size: 32
  worker_num: 8
  bufsize: 32
  use_process: false
121

122 123 124 125
TestReader:
  inputs_def:
    image_shape: [3,300,300]
    fields: ['image', 'im_id', 'im_shape']
126
  dataset:
127 128
    !ImageFolder
    anno_path: test.txt
129
    use_default_label: true
130 131 132 133 134 135 136 137 138 139 140 141 142 143
  sample_transforms:
  - !DecodeImage
    to_rgb: true
  - !ResizeImage
    interp: 1
    max_size: 0
    target_size: 300
    use_cv2: false
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [127.5, 127.5, 127.5]
    std: [127.502231, 127.502231, 127.502231]
  batch_size: 1