add config comment in yolov3_darknet.yml (#3126)

* add yolov3_darknet cinfig comment * fix commment * add log_iter

add config comment in yolov3_darknet.yml (#3126)
* add yolov3_darknet cinfig comment * fix commment * add log_iter
bdd4bc8a · Kaipeng Deng · GitHub · 1ea2a74b · bdd4bc8a
隐藏空白更改
内联并排

Showing with 323 addition and 0 deletion

docs/config_example/yolov3_darknet.yml docs/config_example/yolov3_darknet.yml +323 -0

未找到文件。
--- a/docs/config_example/yolov3_darknet.yml
+++ b/docs/config_example/yolov3_darknet.yml
+# Architecture of detection, which is also the prefix of data feed module
+architecture: YOLOv3
+
+# Data feed module.
+train_feed: YoloTrainFeed
+eval_feed: YoloEvalFeed
+test_feed: YoloTestFeed
+
+# Use GPU or CPU, true by default.
+use_gpu: true
+
+# Maximum number of iteration.
+# In YOLOv3 model, default iteration number is to train for 270 epoches.
+max_iters: 500200
+
+# Smooth the log output in specified iterations, 20 by default.
+log_smooth_window: 20
+
+# The number of iteration interval to display in training log.
+log_iter: 20
+
+# The directory to save models.
+save_dir: output
+
+# Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 2000 by default.
+snapshot_iter: 2000
+
+# Evalution method, COCO and VOC are available.
+metric: COCO
+
+# The path of oretrained wegiths. If url is provided, it will be downloaded and decompressed automatically.
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar
+# The path of final model for evaluation and test.
+weights: output/yolov3_darknet/model_final
+
+# Number of classes, 80 for COCO and 20 for VOC.
+num_classes: 80
+
+
+# YOLOv3 architecture, see https://arxiv.org/abs/1804.02767
+YOLOv3:
+  backbone: DarkNet
+  yolo_head: YOLOv3Head
+
+# Backbone module
+DarkNet:
+  # Batch normalization type in training, sync_bn for synchronized batch normalization
+  norm_type: sync_bn
+  # L2 weight decay factor of batch normalization layer
+  norm_decay: 0.
+  # Darknet convolution layer number, only support 53 currently
+  depth: 53
+
+# YOLOv3 head module
+# Generate bbox output in evaluation and calculate loss in training
+# fluid.layers.yolov3_loss / fluid.layers.yolo_box
+YOLOv3Head:
+  # anchor mask of 3 yolo_loss/yolo_box layers, each yolo_loss/yolo_box layer has 3 anchors
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  # 9 anchors for 3 yolo_loss/yolo_box layer, generated by perform kmeans on COCO gtboxes
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  # L2 weight decay factor of batch normalization layer
+  norm_decay: 0.
+  # Ignore threshold for yolo_loss layer, 0.7 by default.
+  # Objectness loss will be ignored if a predcition bbox overlap a gtbox over ignore_thresh.
+  ignore_thresh: 0.7
+  # Whether use label smooth in yolo_loss layer
+  # It is recommended to set as true when only num_classes is very big
+  label_smooth: true
+  # fluid.layers.multiclass_nms
+  # Non-max suppress for output prediction boxes, see multiclass_nms for following parameters.
+  #   1. Select detection bounding boxes with high scores larger than score_threshold.
+  #   2. Select detection bounding boxes with the largest nms_top_k scores.
+  #   3. Suppress detection bounding boxes which have high IoU overlap witch already selected boxes.
+  #   4. Keep the top keep_top_k detection bounding boxes as output.
+  nms:
+    # Which label is regard as backgroud and will be ignored, -1 for no backgroud label.
+    background_label: -1
+    # Number of total bboxes to be kept per image after NMS step.
+    keep_top_k: 100
+    # IoU threshold for NMS, bbox with IoU over nms_threshold will be suppressed.
+    nms_threshold: 0.45
+    # Maximum number of detections to be kept according to the confidences after the filtering detections based on score_threshold.
+    nms_top_k: 1000
+    # Whether detections are normalized.
+    normalized: false
+    # Threshold to filter out bounding boxes with low confidence score. 
+    score_threshold: 0.01
+
+# Learning rate configuration
+LearningRate:
+  # Base learning rate for training, 1e-3 by default.
+  base_lr: 0.001
+  # Learning rate schedulers, PiecewiseDecay and LinearWarmup by default
+  schedulers:
+  # fluid.layers.piecewise_decay
+  # each milestone stage decay gamma
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 400000
+    - 450000
+  # fluid.layers.linear_lr_warmup
+  # Start learning rate equals to base_lr * start_factor
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+
+# Optimizer module
+OptimizerBuilder:
+  # fluid.optimizer
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  # fluid.regularizer
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+# Data feed module for training
+YoloTrainFeed:
+  # Batch size per device, 8 by default
+  batch_size: 8
+  # Dataset module
+  dataset:
+    # Dataset directory.
+    dataset_dir: dataset/coco
+    # Annotation file path.
+    annotation: annotations/instances_train2017.json
+    # Directory where image files are stored.
+    image_dir: train2017
+  # List of data fields needed.
+  fields: [image, gt_box, gt_label, gt_score]
+  # List of image dims
+  image_shape: [3, 608, 608]
+  # List of sample transformations to use.
+  sample_transforms:
+  # read image data and decode to numpy.
+  - !DecodeImage
+    to_rgb: true
+    # YOLOv3 use image mixup in training.
+    with_mixup: true
+  # Mixup two images in training, a trick to improve performance.
+  - !MixupImage
+    alpha: 1.5 # default: 1.5
+    beta: 1.5 # default: 1.5
+  # Normalize gtbox to range [0, 1]
+  - !NormalizeBox {}
+  # Random color distort: brightness, contrast, hue, saturation.
+  - !RandomDistort
+    brightness_lower: 0.5
+    brightness_prob: 0.5
+    brightness_upper: 1.5
+    contrast_lower: 0.5
+    contrast_prob: 0.5
+    contrast_upper: 1.5
+    count: 4
+    hue_lower: -18
+    hue_prob: 0.5
+    hue_upper: 18
+    is_order: false
+    saturation_lower: 0.5
+    saturation_prob: 0.5
+    saturation_upper: 1.5
+  # Random Expand the image and modify bounding box.
+  #   Operators:
+  #     1. Scale the image weight and height.
+  #     2. Construct new images with new height and width.
+  #     3. Fill the new image with the mean.
+  #     4. Put original imge into new image.
+  #     5. Rescale the bounding box.
+  #     6. Determine if the new bbox is satisfied in the new image.
+  - !ExpandImage
+    # max expand ratio, default 4.0.
+    max_ratio: 4.0
+    mean: [123.675, 116.28, 103.53]
+    prob: 0.5
+  # Random Crop the image and modify bounding box.
+  #   Operators:
+  #     1. Scale the image weight and height.
+  #     2. Crop the image according to a radom sample.
+  #     3. Rescale the bounding box.
+  #     4. Determine if the new bbox is satisfied in the new image.
+  - !CropImage
+    # Recrop image if there are no bbox in output cropped image.
+    avoid_no_bbox: true
+    batch_sampler: [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
+                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
+                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
+                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0],
+                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0],
+                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0],
+                    [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]]
+    # Whether should all bbox satisfy IoU constrains.
+    satisfy_all: false
+  # Interpolate image to target_size with random interpolate method:
+  # 		cv2.INTER_NEAREST,
+  # 		cv2.INTER_LINEAR,
+  # 		cv2.INTER_AREA,
+  # 		cv2.INTER_CUBIC,
+  # 		cv2.INTER_LANCZOS4,
+  - !RandomInterpImage
+    max_size: 0
+    target_size: 608
+  # Filp the image and bounding box.
+  #   Operators:
+  #     1. Flip the image numpy.
+  #     2. Transform the bboxes' x coordinates. (Must judge whether the coordinates are normalized!)
+  #     3. Transform the segmentations' x coordinates. (Must judge whether the coordinates are normalized!)
+  - !RandomFlipImage
+    is_mask_flip: false
+    is_normalized: true
+    prob: 0.5
+  # Normalize the image.
+  #   Operators:
+  #     1.(optional) Scale the image to [0,1]
+  #     2. Each pixel minus mean and is divided by std
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485, 0.456, 0.406]
+    std: [0.229, 0.224, 0.225]
+  # Change data layout to [C, H, W].
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  # List of batch transformations to use.
+  batch_transforms:
+  # Random reshape images in each mini-batch to different shapes.
+  - !RandomShape
+    sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+  # YOLOv3 read gtbox into zero padded tensor with max box number as 50.
+  num_max_boxes: 50
+  # YOLOv3 read gtlabel without regarding backgroud as label 0.
+  with_background: false
+  # Number of samples, -1 represents all samples. -1 by default.
+  samples: -1
+  # Whether samples should be shuffled, true by default.
+  shuffle: true
+  # Whether drop last images which less than a batch.
+  drop_last: true
+  # Whether use multi-process reader in training.
+  use_process: true
+  # Use multi-process reader number.
+  num_workers: 8
+  # Buffer size for reader.
+  bufsize: 128
+  # Mixup image epoch number.
+  mixup_epoch: 250
+
+# Data feed module for evaluation
+YoloEvalFeed:
+  batch_size: 8
+  dataset:
+    dataset_dir: dataset/coco
+    annotation: annotations/instances_val2017.json
+    image_dir: val2017
+  batch_transforms: []
+  fields: [image, im_size, im_id, gt_box, gt_label, is_difficult]
+  image_shape: [3, 608, 608]
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  # Rescale image to the specified target size, and capped at max_size if max_size != 0.
+  # If target_size is list, selected a scale randomly as the specified target size.
+  - !ResizeImage
+    interp: 2 # 2 for cv2.INTER_CUBIC
+    max_size: 0
+    target_size: 608
+    use_cv2: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485, 0.456, 0.406]
+    std: [0.229, 0.224, 0.225]
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  num_max_boxes: 50
+  samples: -1
+  shuffle: false
+  drop_last: false
+  # Use multi-thread reader in evaluation mode.
+  use_process: false
+  # Thread number for multi-thread reader.
+  num_workers: 8
+  with_background: false
+
+# Data feed module for test
+YoloTestFeed:
+  batch_size: 1
+  dataset:
+    annotation: dataset/coco/annotations/instances_val2017.json
+  batch_transforms: []
+  fields: [image, im_size, im_id]
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !ResizeImage
+    interp: 2
+    max_size: 0
+    target_size: 608
+    use_cv2: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485, 0.456, 0.406]
+    std: [0.229, 0.224, 0.225]
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  num_max_boxes: 50
+  samples: -1
+  shuffle: false
+  drop_last: false
+  # Use multi-thread reader in test mode.
+  use_process: false
+  num_workers: 8
+  with_background: false