ssd_vgg16_300.yml 3.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
architecture: SSD
use_gpu: true
max_iters: 400000
snapshot_iter: 10000
log_smooth_window: 20
log_iter: 20
metric: COCO
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar
save_dir: output
weights: output/ssd_vgg16_300/model_final
num_classes: 81

SSD:
  backbone: VGG
  multi_box_head: MultiBoxHead
  output_decoder:
    background_label: 0
    keep_top_k: 200
    nms_eta: 1.0
    nms_threshold: 0.45
    nms_top_k: 400
    score_threshold: 0.01

VGG:
  depth: 16
  with_extra_blocks: true
  normalizations: [20., -1, -1, -1, -1, -1]

MultiBoxHead:
  base_size: 300
  aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
  min_ratio: 15
  max_ratio: 90
  min_sizes: [30.0, 60.0, 111.0, 162.0, 213.0, 264.0]
  max_sizes: [60.0, 111.0, 162.0, 213.0, 264.0, 315.0]
  steps: [8, 16, 32, 64, 100, 300]
  offset: 0.5
  flip: true
  kernel_size: 3
  pad: 1

LearningRate:
  base_lr: 0.001
  schedulers:
  - !PiecewiseDecay
    gamma: 0.1
    milestones: [280000, 360000]
  - !LinearWarmup
    start_factor: 0.3333333333333333
    steps: 500

OptimizerBuilder:
  optimizer:
    momentum: 0.9
    type: Momentum
  regularizer:
    factor: 0.0005
    type: L2

60 61 62 63
TrainReader:
  inputs_def:
    image_shape: [3, 300, 300]
    fields: ['image', 'gt_bbox', 'gt_class']
64
  dataset:
65 66 67
    !COCODataSet
    image_dir: val2017
    anno_path: annotations/instances_val2017.json
68 69 70 71 72 73 74 75
    dataset_dir: dataset/coco
  sample_transforms:
  - !DecodeImage
    to_rgb: true
  - !RandomDistort
    brightness_lower: 0.875
    brightness_upper: 1.125
    is_order: true
76 77 78 79 80
  - !RandomExpand
    fill_value: [104, 117, 123]
  - !RandomCrop
    allow_no_crop: true
  - !NormalizeBox {}
81 82 83 84 85 86 87 88 89 90 91 92
  - !ResizeImage
    interp: 1
    target_size: 300
    use_cv2: false
  - !RandomFlipImage
    is_normalized: true
  - !Permute
    to_bgr: false
  - !NormalizeImage
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
93 94 95 96 97 98
  batch_size: 8
  shuffle: true
  worker_num: 8
  bufsize: 32
  use_process: true
  drop_empty: true
99

100 101 102 103
EvalReader:
  inputs_def:
    image_shape: [3, 300, 300]
    fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
104
  dataset:
105
    !COCODataSet
106
    image_dir: val2017
107 108
    anno_path: annotations/instances_val2017.json
    dataset_dir: dataset/coco
109 110 111 112
  sample_transforms:
  - !DecodeImage
    to_rgb: true
    with_mixup: false
113
  - !NormalizeBox {}
114 115 116 117 118 119 120 121 122 123
  - !ResizeImage
    interp: 1
    target_size: 300
    use_cv2: false
  - !Permute
    to_bgr: false
  - !NormalizeImage
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
124 125 126
  batch_size: 16
  worker_num: 8
  bufsize: 32
127

128 129 130 131
TestReader:
  inputs_def:
    image_shape: [3,300,300]
    fields: ['image', 'im_id', 'im_shape']
132
  dataset:
133 134
    !ImageFolder
    anno_path: annotations/instances_val2017.json
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
  sample_transforms:
  - !DecodeImage
    to_rgb: true
    with_mixup: false
  - !ResizeImage
    interp: 1
    max_size: 0
    target_size: 300
    use_cv2: false
  - !Permute
    to_bgr: false
  - !NormalizeImage
    is_scale: false
    mean: [104, 117, 123]
    std: [1, 1, 1]
150
  batch_size: 1