ssd_mobilenet_v1_roadsign_kunlun.yml 3.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
architecture: SSD
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar
use_gpu: false
use_xpu: true
max_iters: 3000
snapshot_iter: 500
log_iter: 1
metric: VOC
map_type: 11point
save_dir: output
weights: output/ssd_mobilenet_v1_roadsign_kunlun/model_final
num_classes: 5

SSD:
  backbone: MobileNet
  multi_box_head: MultiBoxHead
  output_decoder:
    background_label: 0
    keep_top_k: 200
    nms_eta: 1.0
    nms_threshold: 0.45
    nms_top_k: 400
    score_threshold: 0.01

MobileNet:
  norm_decay: 0.
  conv_group_scale: 1
  conv_learning_rate: 0.1
  extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
  with_extra_blocks: true

MultiBoxHead:
  aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
  base_size: 300
  flip: true
  max_ratio: 90
  max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0]
  min_ratio: 20
  min_sizes: [60.0, 105.0, 150.0, 195.0, 240.0, 285.0]
  offset: 0.5

LearningRate:
  schedulers:
  - !PiecewiseDecay
    milestones: [2000, 3000, 4000, 5000]
    values: [0.0001, 0.00005, 0.000025, 0.00001, 0.000001]

OptimizerBuilder:
  optimizer:
    momentum: 0.0
    type: RMSPropOptimizer
  regularizer:
    factor: 0.00005
    type: L2

TrainReader:
  inputs_def:
    image_shape: [3, 300, 300]
    fields: ['image', 'gt_bbox', 'gt_class']
  dataset:
    !VOCDataSet
      anno_path: train.txt
      dataset_dir: dataset/roadsign_voc
      #use_default_label: true
  sample_transforms:
  - !DecodeImage
    to_rgb: true
  - !RandomDistort
    brightness_lower: 0.875
    brightness_upper: 1.125
    is_order: true
  - !RandomExpand
    fill_value: [127.5, 127.5, 127.5]
  - !RandomCrop
    allow_no_crop: false
  - !NormalizeBox {}
  - !ResizeImage
    interp: 1
    target_size: 300
    use_cv2: false
  - !RandomFlipImage
    is_normalized: true
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [127.5, 127.5, 127.5]
    std: [127.502231, 127.502231, 127.502231]
  batch_size: 32
  shuffle: true
  drop_last: true
  worker_num: 8
  bufsize: 16
  use_process: false

EvalReader:
  inputs_def:
    image_shape: [3, 300, 300]
    fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id', 'is_difficult']
  dataset:
    !VOCDataSet
      anno_path: valid.txt
      dataset_dir: dataset/roadsign_voc
      #use_default_label: true
  sample_transforms:
  - !DecodeImage
    to_rgb: true
  - !NormalizeBox {}
  - !ResizeImage
    interp: 1
    target_size: 300
    use_cv2: false
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [127.5, 127.5, 127.5]
    std: [127.502231, 127.502231, 127.502231]
  batch_size: 32
  worker_num: 8
  bufsize: 16
  use_process: false

TestReader:
  inputs_def:
    image_shape: [3,300,300]
    fields: ['image', 'im_id', 'im_shape']
  dataset:
    !ImageFolder
    anno_path: test.txt
    use_default_label: true
  sample_transforms:
  - !DecodeImage
    to_rgb: true
  - !ResizeImage
    interp: 1
    max_size: 0
    target_size: 300
    use_cv2: true
  - !Permute {}
  - !NormalizeImage
    is_scale: false
    mean: [127.5, 127.5, 127.5]
    std: [127.502231, 127.502231, 127.502231]
  batch_size: 1