# Architecture of detection, which is also the prefix of data feed module architecture: YOLOv3 # Data feed module. train_feed: YoloTrainFeed eval_feed: YoloEvalFeed test_feed: YoloTestFeed # Use GPU or CPU, true by default. use_gpu: true # Maximum number of iteration. # In YOLOv3 model, default iteration number is to train for 270 epoches. max_iters: 500200 # Smooth the log output in specified iterations, 20 by default. log_smooth_window: 20 # The number of iteration interval to display in training log. log_iter: 20 # The directory to save models. save_dir: output # Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 2000 by default. snapshot_iter: 2000 # Evalution method, COCO and VOC are available. metric: COCO # The path of oretrained wegiths. If url is provided, it will be downloaded and decompressed automatically. pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar # The path of final model for evaluation and test. weights: output/yolov3_darknet/model_final # Number of classes, 80 for COCO and 20 for VOC. num_classes: 80 # YOLOv3 architecture, see https://arxiv.org/abs/1804.02767 YOLOv3: backbone: DarkNet yolo_head: YOLOv3Head # Backbone module DarkNet: # Batch normalization type in training, sync_bn for synchronized batch normalization norm_type: sync_bn # L2 weight decay factor of batch normalization layer norm_decay: 0. # Darknet convolution layer number, only support 53 currently depth: 53 # YOLOv3 head module # Generate bbox output in evaluation and calculate loss in training # fluid.layers.yolov3_loss / fluid.layers.yolo_box YOLOv3Head: # anchor mask of 3 yolo_loss/yolo_box layers, each yolo_loss/yolo_box layer has 3 anchors anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 9 anchors for 3 yolo_loss/yolo_box layer, generated by perform kmeans on COCO gtboxes anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]] # L2 weight decay factor of batch normalization layer norm_decay: 0. # Ignore threshold for yolo_loss layer, 0.7 by default. # Objectness loss will be ignored if a predcition bbox overlap a gtbox over ignore_thresh. ignore_thresh: 0.7 # Whether use label smooth in yolo_loss layer # It is recommended to set as true when only num_classes is very big label_smooth: true # fluid.layers.multiclass_nms # Non-max suppress for output prediction boxes, see multiclass_nms for following parameters. # 1. Select detection bounding boxes with high scores larger than score_threshold. # 2. Select detection bounding boxes with the largest nms_top_k scores. # 3. Suppress detection bounding boxes which have high IoU overlap witch already selected boxes. # 4. Keep the top keep_top_k detection bounding boxes as output. nms: # Which label is regard as backgroud and will be ignored, -1 for no backgroud label. background_label: -1 # Number of total bboxes to be kept per image after NMS step. keep_top_k: 100 # IoU threshold for NMS, bbox with IoU over nms_threshold will be suppressed. nms_threshold: 0.45 # Maximum number of detections to be kept according to the confidences after the filtering detections based on score_threshold. nms_top_k: 1000 # Whether detections are normalized. normalized: false # Threshold to filter out bounding boxes with low confidence score. score_threshold: 0.01 # Learning rate configuration LearningRate: # Base learning rate for training, 1e-3 by default. base_lr: 0.001 # Learning rate schedulers, PiecewiseDecay and LinearWarmup by default schedulers: # fluid.layers.piecewise_decay # each milestone stage decay gamma - !PiecewiseDecay gamma: 0.1 milestones: - 400000 - 450000 # fluid.layers.linear_lr_warmup # Start learning rate equals to base_lr * start_factor - !LinearWarmup start_factor: 0. steps: 4000 # Optimizer module OptimizerBuilder: # fluid.optimizer optimizer: momentum: 0.9 type: Momentum # fluid.regularizer regularizer: factor: 0.0005 type: L2 # Data feed module for training YoloTrainFeed: # Batch size per device, 8 by default batch_size: 8 # Dataset module dataset: # Dataset directory. dataset_dir: dataset/coco # Annotation file path. annotation: annotations/instances_train2017.json # Directory where image files are stored. image_dir: train2017 # List of data fields needed. fields: [image, gt_box, gt_label, gt_score] # List of image dims image_shape: [3, 608, 608] # List of sample transformations to use. sample_transforms: # read image data and decode to numpy. - !DecodeImage to_rgb: true # YOLOv3 use image mixup in training. with_mixup: true # Mixup two images in training, a trick to improve performance. - !MixupImage alpha: 1.5 # default: 1.5 beta: 1.5 # default: 1.5 # Normalize gtbox to range [0, 1] - !NormalizeBox {} # Random color distort: brightness, contrast, hue, saturation. - !RandomDistort brightness_lower: 0.5 brightness_prob: 0.5 brightness_upper: 1.5 contrast_lower: 0.5 contrast_prob: 0.5 contrast_upper: 1.5 count: 4 hue_lower: -18 hue_prob: 0.5 hue_upper: 18 is_order: false saturation_lower: 0.5 saturation_prob: 0.5 saturation_upper: 1.5 # Random Expand the image and modify bounding box. # Operators: # 1. Scale the image weight and height. # 2. Construct new images with new height and width. # 3. Fill the new image with the mean. # 4. Put original imge into new image. # 5. Rescale the bounding box. # 6. Determine if the new bbox is satisfied in the new image. - !ExpandImage # max expand ratio, default 4.0. max_ratio: 4.0 mean: [123.675, 116.28, 103.53] prob: 0.5 # Random Crop the image and modify bounding box. # Operators: # 1. Scale the image weight and height. # 2. Crop the image according to a radom sample. # 3. Rescale the bounding box. # 4. Determine if the new bbox is satisfied in the new image. - !CropImage # Recrop image if there are no bbox in output cropped image. avoid_no_bbox: true batch_sampler: [[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0], [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0], [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0], [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 1.0], [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 1.0], [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 1.0], [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]] # Whether should all bbox satisfy IoU constrains. satisfy_all: false # Interpolate image to target_size with random interpolate method: # cv2.INTER_NEAREST, # cv2.INTER_LINEAR, # cv2.INTER_AREA, # cv2.INTER_CUBIC, # cv2.INTER_LANCZOS4, - !RandomInterpImage max_size: 0 target_size: 608 # Filp the image and bounding box. # Operators: # 1. Flip the image numpy. # 2. Transform the bboxes' x coordinates. (Must judge whether the coordinates are normalized!) # 3. Transform the segmentations' x coordinates. (Must judge whether the coordinates are normalized!) - !RandomFlipImage is_mask_flip: false is_normalized: true prob: 0.5 # Normalize the image. # Operators: # 1.(optional) Scale the image to [0,1] # 2. Each pixel minus mean and is divided by std - !NormalizeImage is_channel_first: false is_scale: true mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] # Change data layout to [C, H, W]. - !Permute channel_first: true to_bgr: false # List of batch transformations to use. batch_transforms: # Random reshape images in each mini-batch to different shapes. - !RandomShape sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608] # YOLOv3 read gtbox into zero padded tensor with max box number as 50. num_max_boxes: 50 # YOLOv3 read gtlabel without regarding backgroud as label 0. with_background: false # Number of samples, -1 represents all samples. -1 by default. samples: -1 # Whether samples should be shuffled, true by default. shuffle: true # Whether drop last images which less than a batch. drop_last: true # Whether use multi-process reader in training. use_process: true # Use multi-process reader number. num_workers: 8 # Buffer size for reader. bufsize: 128 # Mixup image epoch number. mixup_epoch: 250 # Data feed module for evaluation YoloEvalFeed: batch_size: 8 dataset: dataset_dir: dataset/coco annotation: annotations/instances_val2017.json image_dir: val2017 batch_transforms: [] fields: [image, im_size, im_id, gt_box, gt_label, is_difficult] image_shape: [3, 608, 608] sample_transforms: - !DecodeImage to_rgb: true with_mixup: false # Rescale image to the specified target size, and capped at max_size if max_size != 0. # If target_size is list, selected a scale randomly as the specified target size. - !ResizeImage interp: 2 # 2 for cv2.INTER_CUBIC max_size: 0 target_size: 608 use_cv2: true - !NormalizeImage is_channel_first: false is_scale: true mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - !Permute channel_first: true to_bgr: false num_max_boxes: 50 samples: -1 shuffle: false drop_last: false # Use multi-thread reader in evaluation mode. use_process: false # Thread number for multi-thread reader. num_workers: 8 with_background: false # Data feed module for test YoloTestFeed: batch_size: 1 dataset: annotation: dataset/coco/annotations/instances_val2017.json batch_transforms: [] fields: [image, im_size, im_id] sample_transforms: - !DecodeImage to_rgb: true with_mixup: false - !ResizeImage interp: 2 max_size: 0 target_size: 608 use_cv2: true - !NormalizeImage is_channel_first: false is_scale: true mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] - !Permute channel_first: true to_bgr: false num_max_boxes: 50 samples: -1 shuffle: false drop_last: false # Use multi-thread reader in test mode. use_process: false num_workers: 8 with_background: false