yolov3_darknet.yml 10.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
# Architecture of detection, which is also the prefix of data feed module
architecture: YOLOv3

# Data feed module.
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed

# Use GPU or CPU, true by default.
use_gpu: true

# Maximum number of iteration.
# In YOLOv3 model, default iteration number is to train for 270 epoches.
max_iters: 500200

# Smooth the log output in specified iterations, 20 by default.
log_smooth_window: 20

# The number of iteration interval to display in training log.
log_iter: 20

# The directory to save models.
save_dir: output

# Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 2000 by default.
snapshot_iter: 2000

# Evalution method, COCO and VOC are available.
metric: COCO

# The path of oretrained wegiths. If url is provided, it will be downloaded and decompressed automatically.
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar
# The path of final model for evaluation and test.
weights: output/yolov3_darknet/model_final

# Number of classes, 80 for COCO and 20 for VOC.
num_classes: 80

K
Kaipeng Deng 已提交
39 40 41 42 43
# Whether use fine grained YOLOv3 loss, if true, build YOLOv3 loss by python code with common OPs,
# if false, use fluid.layer.yolov3_loss OP to calculate YOLOv3 loss, the former one is better
# for redesign YOLOv3 loss, the latter one is better for training by original YOLOv3 loss
use_fine_grained_loss: false

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70

# YOLOv3 architecture, see https://arxiv.org/abs/1804.02767
YOLOv3:
  backbone: DarkNet
  yolo_head: YOLOv3Head

# Backbone module
DarkNet:
  # Batch normalization type in training, sync_bn for synchronized batch normalization
  norm_type: sync_bn
  # L2 weight decay factor of batch normalization layer
  norm_decay: 0.
  # Darknet convolution layer number, only support 53 currently
  depth: 53

# YOLOv3 head module
# Generate bbox output in evaluation and calculate loss in training
# fluid.layers.yolov3_loss / fluid.layers.yolo_box
YOLOv3Head:
  # anchor mask of 3 yolo_loss/yolo_box layers, each yolo_loss/yolo_box layer has 3 anchors
  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
  # 9 anchors for 3 yolo_loss/yolo_box layer, generated by perform kmeans on COCO gtboxes
  anchors: [[10, 13], [16, 30], [33, 23],
            [30, 61], [62, 45], [59, 119],
            [116, 90], [156, 198], [373, 326]]
  # L2 weight decay factor of batch normalization layer
  norm_decay: 0.
K
Kaipeng Deng 已提交
71 72
  # use YOLOv3Loss, which will be defined in following YOLOv3Loss segmentation.
  yolo_loss: YOLOv3Loss
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
  # fluid.layers.multiclass_nms
  # Non-max suppress for output prediction boxes, see multiclass_nms for following parameters.
  #   1. Select detection bounding boxes with high scores larger than score_threshold.
  #   2. Select detection bounding boxes with the largest nms_top_k scores.
  #   3. Suppress detection bounding boxes which have high IoU overlap witch already selected boxes.
  #   4. Keep the top keep_top_k detection bounding boxes as output.
  nms:
    # Which label is regard as backgroud and will be ignored, -1 for no backgroud label.
    background_label: -1
    # Number of total bboxes to be kept per image after NMS step.
    keep_top_k: 100
    # IoU threshold for NMS, bbox with IoU over nms_threshold will be suppressed.
    nms_threshold: 0.45
    # Maximum number of detections to be kept according to the confidences after the filtering detections based on score_threshold.
    nms_top_k: 1000
    # Whether detections are normalized.
    normalized: false
    # Threshold to filter out bounding boxes with low confidence score. 
    score_threshold: 0.01

K
Kaipeng Deng 已提交
93 94 95 96 97 98 99 100 101 102 103 104
YOLOv3Loss:
  # training batch size, this will be used when use_fine_grained_loss is set as True.
  # ATTENTION: this should be same as batch size defined in YoloTrainFeed in fine
  # grained loss mode.
  batch_size: 8
  # Ignore threshold for yolo_loss layer, 0.7 by default.
  # Objectness loss will be ignored if a predcition bbox overlap a gtbox over ignore_thresh.
  ignore_thresh: 0.7
  # Whether use label smooth in yolo_loss layer
  # It is recommended to set as true when only num_classes is very big
  label_smooth: false

105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
# Learning rate configuration
LearningRate:
  # Base learning rate for training, 1e-3 by default.
  base_lr: 0.001
  # Learning rate schedulers, PiecewiseDecay and LinearWarmup by default
  schedulers:
  # fluid.layers.piecewise_decay
  # each milestone stage decay gamma
  - !PiecewiseDecay
    gamma: 0.1
    milestones:
    - 400000
    - 450000
  # fluid.layers.linear_lr_warmup
  # Start learning rate equals to base_lr * start_factor
  - !LinearWarmup
    start_factor: 0.
    steps: 4000

# Optimizer module
OptimizerBuilder:
  # fluid.optimizer
  optimizer:
    momentum: 0.9
    type: Momentum
  # fluid.regularizer
  regularizer:
    factor: 0.0005
    type: L2

# Data feed module for training
YoloTrainFeed:
  # Batch size per device, 8 by default
  batch_size: 8
  # Dataset module
  dataset:
    # Dataset directory.
    dataset_dir: dataset/coco
    # Annotation file path.
    annotation: annotations/instances_train2017.json
    # Directory where image files are stored.
    image_dir: train2017
  # List of data fields needed.
  fields: [image, gt_box, gt_label, gt_score]
  # List of image dims
  image_shape: [3, 608, 608]
  # List of sample transformations to use.
  sample_transforms:
  # read image data and decode to numpy.
  - !DecodeImage
    to_rgb: true
    # YOLOv3 use image mixup in training.
    with_mixup: true
  # Mixup two images in training, a trick to improve performance.
  - !MixupImage
    alpha: 1.5 # default: 1.5
    beta: 1.5 # default: 1.5
  # Normalize gtbox to range [0, 1]
  - !NormalizeBox {}
  # Random color distort: brightness, contrast, hue, saturation.
  - !RandomDistort
    brightness_lower: 0.5
    brightness_prob: 0.5
    brightness_upper: 1.5
    contrast_lower: 0.5
    contrast_prob: 0.5
    contrast_upper: 1.5
    count: 4
    hue_lower: -18
    hue_prob: 0.5
    hue_upper: 18
    is_order: false
    saturation_lower: 0.5
    saturation_prob: 0.5
    saturation_upper: 1.5
  # Random Expand the image and modify bounding box.
  #   Operators:
  #     1. Scale the image weight and height.
  #     2. Construct new images with new height and width.
  #     3. Fill the new image with the mean.
  #     4. Put original imge into new image.
  #     5. Rescale the bounding box.
  #     6. Determine if the new bbox is satisfied in the new image.
  - !ExpandImage
    # max expand ratio, default 4.0.
    max_ratio: 4.0
    mean: [123.675, 116.28, 103.53]
    prob: 0.5
  # Random Crop the image and modify bounding box.
  #   Operators:
  #     1. Scale the image weight and height.
  #     2. Crop the image according to a radom sample.
  #     3. Rescale the bounding box.
  #     4. Determine if the new bbox is satisfied in the new image.
  - !CropImage
    # Recrop image if there are no bbox in output cropped image.
    avoid_no_bbox: true
    batch_sampler: [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
    # Whether should all bbox satisfy IoU constrains.
    satisfy_all: false
  # Interpolate image to target_size with random interpolate method:
  # 		cv2.INTER_NEAREST,
  # 		cv2.INTER_LINEAR,
  # 		cv2.INTER_AREA,
  # 		cv2.INTER_CUBIC,
  # 		cv2.INTER_LANCZOS4,
  - !RandomInterpImage
    max_size: 0
    target_size: 608
  # Filp the image and bounding box.
  #   Operators:
  #     1. Flip the image numpy.
  #     2. Transform the bboxes' x coordinates. (Must judge whether the coordinates are normalized!)
  #     3. Transform the segmentations' x coordinates. (Must judge whether the coordinates are normalized!)
  - !RandomFlipImage
    is_mask_flip: false
    is_normalized: true
    prob: 0.5
  # Normalize the image.
  #   Operators:
  #     1.(optional) Scale the image to [0,1]
  #     2. Each pixel minus mean and is divided by std
  - !NormalizeImage
    is_channel_first: false
    is_scale: true
    mean: [0.485, 0.456, 0.406]
    std: [0.229, 0.224, 0.225]
  # Change data layout to [C, H, W].
  - !Permute
    channel_first: true
    to_bgr: false
  # List of batch transformations to use.
  batch_transforms:
  # Random reshape images in each mini-batch to different shapes.
  - !RandomShape
    sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
  # YOLOv3 read gtbox into zero padded tensor with max box number as 50.
  num_max_boxes: 50
  # YOLOv3 read gtlabel without regarding backgroud as label 0.
  with_background: false
  # Number of samples, -1 represents all samples. -1 by default.
  samples: -1
  # Whether samples should be shuffled, true by default.
  shuffle: true
  # Whether drop last images which less than a batch.
  drop_last: true
  # Whether use multi-process reader in training.
  use_process: true
  # Use multi-process reader number.
  num_workers: 8
  # Buffer size for reader.
  bufsize: 128
  # Mixup image epoch number.
  mixup_epoch: 250

# Data feed module for evaluation
YoloEvalFeed:
  batch_size: 8
  dataset:
    dataset_dir: dataset/coco
    annotation: annotations/instances_val2017.json
    image_dir: val2017
  batch_transforms: []
  fields: [image, im_size, im_id, gt_box, gt_label, is_difficult]
  image_shape: [3, 608, 608]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
    with_mixup: false
  # Rescale image to the specified target size, and capped at max_size if max_size != 0.
  # If target_size is list, selected a scale randomly as the specified target size.
  - !ResizeImage
    interp: 2 # 2 for cv2.INTER_CUBIC
    max_size: 0
    target_size: 608
    use_cv2: true
  - !NormalizeImage
    is_channel_first: false
    is_scale: true
    mean: [0.485, 0.456, 0.406]
    std: [0.229, 0.224, 0.225]
  - !Permute
    channel_first: true
    to_bgr: false
  num_max_boxes: 50
  samples: -1
  shuffle: false
  drop_last: false
  # Use multi-thread reader in evaluation mode.
  use_process: false
  # Thread number for multi-thread reader.
  num_workers: 8
  with_background: false

# Data feed module for test
YoloTestFeed:
  batch_size: 1
  dataset:
    annotation: dataset/coco/annotations/instances_val2017.json
  batch_transforms: []
  fields: [image, im_size, im_id]
  sample_transforms:
  - !DecodeImage
    to_rgb: true
    with_mixup: false
  - !ResizeImage
    interp: 2
    max_size: 0
    target_size: 608
    use_cv2: true
  - !NormalizeImage
    is_channel_first: false
    is_scale: true
    mean: [0.485, 0.456, 0.406]
    std: [0.229, 0.224, 0.225]
  - !Permute
    channel_first: true
    to_bgr: false
  num_max_boxes: 50
  samples: -1
  shuffle: false
  drop_last: false
  # Use multi-thread reader in test mode.
  use_process: false
  num_workers: 8
  with_background: false