From bdd4bc8a2f44060c6886a2af532378a851d94180 Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Mon, 26 Aug 2019 20:19:21 +0800 Subject: [PATCH] add config comment in yolov3_darknet.yml (#3126) * add yolov3_darknet cinfig comment * fix commment * add log_iter --- docs/config_example/yolov3_darknet.yml | 323 +++++++++++++++++++++++++ 1 file changed, 323 insertions(+) create mode 100644 docs/config_example/yolov3_darknet.yml diff --git a/docs/config_example/yolov3_darknet.yml b/docs/config_example/yolov3_darknet.yml new file mode 100644 index 000000000..65f479b72 --- /dev/null +++ b/docs/config_example/yolov3_darknet.yml @@ -0,0 +1,323 @@ +# Architecture of detection, which is also the prefix of data feed module +architecture: YOLOv3 + +# Data feed module. +train_feed: YoloTrainFeed +eval_feed: YoloEvalFeed +test_feed: YoloTestFeed + +# Use GPU or CPU, true by default. +use_gpu: true + +# Maximum number of iteration. +# In YOLOv3 model, default iteration number is to train for 270 epoches. +max_iters: 500200 + +# Smooth the log output in specified iterations, 20 by default. +log_smooth_window: 20 + +# The number of iteration interval to display in training log. +log_iter: 20 + +# The directory to save models. +save_dir: output + +# Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 2000 by default. +snapshot_iter: 2000 + +# Evalution method, COCO and VOC are available. +metric: COCO + +# The path of oretrained wegiths. If url is provided, it will be downloaded and decompressed automatically. +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar +# The path of final model for evaluation and test. +weights: output/yolov3_darknet/model_final + +# Number of classes, 80 for COCO and 20 for VOC. +num_classes: 80 + + +# YOLOv3 architecture, see https://arxiv.org/abs/1804.02767 +YOLOv3: + backbone: DarkNet + yolo_head: YOLOv3Head + +# Backbone module +DarkNet: + # Batch normalization type in training, sync_bn for synchronized batch normalization + norm_type: sync_bn + # L2 weight decay factor of batch normalization layer + norm_decay: 0. + # Darknet convolution layer number, only support 53 currently + depth: 53 + +# YOLOv3 head module +# Generate bbox output in evaluation and calculate loss in training +# fluid.layers.yolov3_loss / fluid.layers.yolo_box +YOLOv3Head: + # anchor mask of 3 yolo_loss/yolo_box layers, each yolo_loss/yolo_box layer has 3 anchors + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + # 9 anchors for 3 yolo_loss/yolo_box layer, generated by perform kmeans on COCO gtboxes + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + # L2 weight decay factor of batch normalization layer + norm_decay: 0. + # Ignore threshold for yolo_loss layer, 0.7 by default. + # Objectness loss will be ignored if a predcition bbox overlap a gtbox over ignore_thresh. + ignore_thresh: 0.7 + # Whether use label smooth in yolo_loss layer + # It is recommended to set as true when only num_classes is very big + label_smooth: true + # fluid.layers.multiclass_nms + # Non-max suppress for output prediction boxes, see multiclass_nms for following parameters. + # 1. Select detection bounding boxes with high scores larger than score_threshold. + # 2. Select detection bounding boxes with the largest nms_top_k scores. + # 3. Suppress detection bounding boxes which have high IoU overlap witch already selected boxes. + # 4. Keep the top keep_top_k detection bounding boxes as output. + nms: + # Which label is regard as backgroud and will be ignored, -1 for no backgroud label. + background_label: -1 + # Number of total bboxes to be kept per image after NMS step. + keep_top_k: 100 + # IoU threshold for NMS, bbox with IoU over nms_threshold will be suppressed. + nms_threshold: 0.45 + # Maximum number of detections to be kept according to the confidences after the filtering detections based on score_threshold. + nms_top_k: 1000 + # Whether detections are normalized. + normalized: false + # Threshold to filter out bounding boxes with low confidence score. + score_threshold: 0.01 + +# Learning rate configuration +LearningRate: + # Base learning rate for training, 1e-3 by default. + base_lr: 0.001 + # Learning rate schedulers, PiecewiseDecay and LinearWarmup by default + schedulers: + # fluid.layers.piecewise_decay + # each milestone stage decay gamma + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 400000 + - 450000 + # fluid.layers.linear_lr_warmup + # Start learning rate equals to base_lr * start_factor + - !LinearWarmup + start_factor: 0. + steps: 4000 + +# Optimizer module +OptimizerBuilder: + # fluid.optimizer + optimizer: + momentum: 0.9 + type: Momentum + # fluid.regularizer + regularizer: + factor: 0.0005 + type: L2 + +# Data feed module for training +YoloTrainFeed: + # Batch size per device, 8 by default + batch_size: 8 + # Dataset module + dataset: + # Dataset directory. + dataset_dir: dataset/coco + # Annotation file path. + annotation: annotations/instances_train2017.json + # Directory where image files are stored. + image_dir: train2017 + # List of data fields needed. + fields: [image, gt_box, gt_label, gt_score] + # List of image dims + image_shape: [3, 608, 608] + # List of sample transformations to use. + sample_transforms: + # read image data and decode to numpy. + - !DecodeImage + to_rgb: true + # YOLOv3 use image mixup in training. + with_mixup: true + # Mixup two images in training, a trick to improve performance. + - !MixupImage + alpha: 1.5 # default: 1.5 + beta: 1.5 # default: 1.5 + # Normalize gtbox to range [0, 1] + - !NormalizeBox {} + # Random color distort: brightness, contrast, hue, saturation. + - !RandomDistort + brightness_lower: 0.5 + brightness_prob: 0.5 + brightness_upper: 1.5 + contrast_lower: 0.5 + contrast_prob: 0.5 + contrast_upper: 1.5 + count: 4 + hue_lower: -18 + hue_prob: 0.5 + hue_upper: 18 + is_order: false + saturation_lower: 0.5 + saturation_prob: 0.5 + saturation_upper: 1.5 + # Random Expand the image and modify bounding box. + # Operators: + # 1. Scale the image weight and height. + # 2. Construct new images with new height and width. + # 3. Fill the new image with the mean. + # 4. Put original imge into new image. + # 5. Rescale the bounding box. + # 6. Determine if the new bbox is satisfied in the new image. + - !ExpandImage + # max expand ratio, default 4.0. + max_ratio: 4.0 + mean: [123.675, 116.28, 103.53] + prob: 0.5 + # Random Crop the image and modify bounding box. + # Operators: + # 1. Scale the image weight and height. + # 2. Crop the image according to a radom sample. + # 3. Rescale the bounding box. + # 4. Determine if the new bbox is satisfied in the new image. + - !CropImage + # Recrop image if there are no bbox in output cropped image. + avoid_no_bbox: true + batch_sampler: [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0], + [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0], + [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0], + [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0], + [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0], + [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0], + [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]] + # Whether should all bbox satisfy IoU constrains. + satisfy_all: false + # Interpolate image to target_size with random interpolate method: + # cv2.INTER_NEAREST, + # cv2.INTER_LINEAR, + # cv2.INTER_AREA, + # cv2.INTER_CUBIC, + # cv2.INTER_LANCZOS4, + - !RandomInterpImage + max_size: 0 + target_size: 608 + # Filp the image and bounding box. + # Operators: + # 1. Flip the image numpy. + # 2. Transform the bboxes' x coordinates. (Must judge whether the coordinates are normalized!) + # 3. Transform the segmentations' x coordinates. (Must judge whether the coordinates are normalized!) + - !RandomFlipImage + is_mask_flip: false + is_normalized: true + prob: 0.5 + # Normalize the image. + # Operators: + # 1.(optional) Scale the image to [0,1] + # 2. Each pixel minus mean and is divided by std + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + # Change data layout to [C, H, W]. + - !Permute + channel_first: true + to_bgr: false + # List of batch transformations to use. + batch_transforms: + # Random reshape images in each mini-batch to different shapes. + - !RandomShape + sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] + # YOLOv3 read gtbox into zero padded tensor with max box number as 50. + num_max_boxes: 50 + # YOLOv3 read gtlabel without regarding backgroud as label 0. + with_background: false + # Number of samples, -1 represents all samples. -1 by default. + samples: -1 + # Whether samples should be shuffled, true by default. + shuffle: true + # Whether drop last images which less than a batch. + drop_last: true + # Whether use multi-process reader in training. + use_process: true + # Use multi-process reader number. + num_workers: 8 + # Buffer size for reader. + bufsize: 128 + # Mixup image epoch number. + mixup_epoch: 250 + +# Data feed module for evaluation +YoloEvalFeed: + batch_size: 8 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: [] + fields: [image, im_size, im_id, gt_box, gt_label, is_difficult] + image_shape: [3, 608, 608] + sample_transforms: + - !DecodeImage + to_rgb: true + with_mixup: false + # Rescale image to the specified target size, and capped at max_size if max_size != 0. + # If target_size is list, selected a scale randomly as the specified target size. + - !ResizeImage + interp: 2 # 2 for cv2.INTER_CUBIC + max_size: 0 + target_size: 608 + use_cv2: true + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + - !Permute + channel_first: true + to_bgr: false + num_max_boxes: 50 + samples: -1 + shuffle: false + drop_last: false + # Use multi-thread reader in evaluation mode. + use_process: false + # Thread number for multi-thread reader. + num_workers: 8 + with_background: false + +# Data feed module for test +YoloTestFeed: + batch_size: 1 + dataset: + annotation: dataset/coco/annotations/instances_val2017.json + batch_transforms: [] + fields: [image, im_size, im_id] + sample_transforms: + - !DecodeImage + to_rgb: true + with_mixup: false + - !ResizeImage + interp: 2 + max_size: 0 + target_size: 608 + use_cv2: true + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + - !Permute + channel_first: true + to_bgr: false + num_max_boxes: 50 + samples: -1 + shuffle: false + drop_last: false + # Use multi-thread reader in test mode. + use_process: false + num_workers: 8 + with_background: false -- GitLab