提交 e26408d9 编写于 作者: G Guanghua Yu 提交者: wangguanzhong

[PaddleDetection] add SSD config_demo (#3176)

* add_ssd_config_demo
上级 77716bf0
# Architecture of detection, which is also the prefix of data feed module.
architecture: SSD
# Data feed module.
# Data feed in training.
train_feed: SSDTrainFeed
# Data feed in Evaluation.
eval_feed: SSDEvalFeed
# Data feed in infer.
test_feed: SSDTestFeed
# Use GPU or CPU, true by default.
use_gpu: true
# Maximum number of iteration.
max_iters: 400000
# Snapshot period. If training and test at same time, evaluate model at each snapshot_iter. 10000 by default.
snapshot_iter: 10000
# Smooth the log output in specified iterations, 20 by default.
log_smooth_window: 20
# The log in training is displayed once every period.
log_iter: 20
# Evalution method, COCO and VOC are available.
metric: COCO
# The path of final model for evaluation and test.
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_caffe_pretrained.tar
# The directory to save models.
save_dir: output
# The path of final model for evaluation and test.
weights: output/ssd_vgg16_300/model_final
# Number of classes, 81 for COCO and 21 for VOC.
num_classes: 81
# SSD architecture, see https://arxiv.org/abs/1512.02325
SSD:
# backbone instance, defined below.
backbone: VGG
# `MultiBoxHead` instance, defined below.
multi_box_head: MultiBoxHead
# fluid.layers.detection_output, Detection Output Layer for SSD.
# This operation is to get the detection results by performing following two steps:
# 1. Decode input bounding box predictions according to the prior boxes.
# 2. Get the final detection results by applying multi-class non maximum suppression (NMS).
# this operation doesn’t clip the final output bounding boxes to the image window.
output_decoder:
# The index of background label, the background label will be ignored.
# If set to -1, then all categories will be considered.
background_label: 0
# Number of total bboxes to be kept per image after NMS.
keep_top_k: 200
# The parameter for adaptive NMS.
nms_eta: 1.0
# The threshold to be used in NMS.
nms_threshold: 0.45
# Maximum number of detections to be kept according to the confidences
# aftern the filtering detections based on score_threshold.
nms_top_k: 400
# Threshold to filter out bounding boxes with low confidence score.
# If not provided, consider all boxes.
score_threshold: 0.01
# VGG backbone, see https://arxiv.org/abs/1409.1556
VGG:
# the VGG net depth (16 or 19
depth: 16
# whether or not extra blocks should be added
with_extra_blocks: true
# in each extra block, params:
# [in_channel, out_channel, padding_size, stride_size, filter_size]
extra_block_filters:
- [256, 512, 1, 2, 3]
- [128, 256, 1, 2, 3]
- [128, 256, 0, 1, 3]
- [128, 256, 0, 1, 3]
# params list of init scale in l2 norm, skip init scale if param is -1.
normalizations: [20., -1, -1, -1, -1, -1]
# fluid.layers.multi_box_head, Generate prior boxes for SSD algorithm.
# Generate `prior_box` according to the inputs list and other parameters
# Each position of the input produce N prior boxes, N is determined by
# the count of min_sizes, max_sizes and aspect_ratios, The size of the box
# is in range(min_size, max_size) interval, which is generated in sequence
# according to the aspect_ratios.
MultiBoxHead:
# the base_size is used to get min_size and max_size according to min_ratio and max_ratio.
base_size: 300
# the aspect ratios of generated prior boxes. The length of input and aspect_ratios must be equal.
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
# the min ratio of generated prior boxes.
min_ratio: 15
# the max ratio of generated prior boxes.
max_ratio: 90
# If len(inputs) <=2, min_sizes must be set up, and the length of min_sizes
# should equal to the length of inputs. Default: None.
min_sizes: [30.0, 60.0, 111.0, 162.0, 213.0, 264.0]
# If len(inputs) <=2, max_sizes must be set up, and the length of min_sizes
# should equal to the length of inputs. Default: None.
max_sizes: [60.0, 111.0, 162.0, 213.0, 264.0, 315.0]
# If step_w and step_h are the same, step_w and step_h can be replaced by steps.
steps: [8, 16, 32, 64, 100, 300]
# Prior boxes center offset. Default: 0.5
offset: 0.5
# Whether to flip aspect ratios. Default:False.
flip: true
# The kernel size of conv2d. Default: 1.
kernel_size: 3
# The padding of conv2d. Default:0.
pad: 1
# Learning rate configuration
LearningRate:
# Base learning rate, 0.01 by default
base_lr: 0.001
# Learning rate schedulers, PiecewiseDecay and LinearWarmup by default
schedulers:
# fluid.layers.piecewise_decay
# Values has higher priority and if values is null, learning rate is multipled by gamma at each stage
- !PiecewiseDecay
gamma: 0.1
milestones: [280000, 360000]
# fluid.layers.linear_lr_warmup
# Start learning rate equals to base_lr * start_factor
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
# Optimizer module
OptimizerBuilder:
# fluid.optimizer, Neural network in essence is a Optimization problem .
# With forward computing and back propagation , Optimizer use back-propagation
# gradients to optimize parameters in a neural network.
optimizer:
# Momentum optimizer adds momentum on the basis of SGD ,
# reducing noise problem in the process of random gradient descent.
momentum: 0.9
type: Momentum
# fluid.regularizer
regularizer:
# implements the L2 Weight Decay Regularization
# Small values of L2 can help prevent over fitting the training data.
factor: 0.0005
type: L2
# Data feed module for training
SSDTrainFeed:
# Batch size per device
batch_size: 16
# list of batch transformations to use
batch_transforms: []
# The data buffer size
bufsize: 10
# Dataset module
dataset:
# Dataset directory
dataset_dir: dataset/coco
# Annotation file path
annotation: annotations/instances_train2017.json
# Directory where image files are stored
image_dir: train2017
# Drop last batch if size is uneven, false by default
drop_last: true
# List of data fields needed
fields: [image, gt_box, gt_label]
# list of image dims
image_shape: [3, 300, 300]
# number of workers processes (or threads)
num_workers: 8
# List of sample transformations to use
sample_transforms:
# Transform the image data to numpy format.
- !DecodeImage
# whether to convert BGR to RGB
to_rgb: true # default: true
# whether or not to mixup image and gt_bbbox/gt_score
with_mixup: false # default: false
# Transform the bounding box's coornidates to [0,1].
- !NormalizeBox {}
# modify image brightness,contrast,saturation,hue,reordering channels and etc.
- !RandomDistort
# brightness_lower/ brightness_upper (float): the brightness
# between brightness_lower and brightness_upper
brightness_lower: 0.875
brightness_upper: 1.125
# brightness_prob (float): the probability of changing brightness
brightness_prob: 0.5
# contrast_lower/ contrast_upper (float): the contrast between
# contrast_lower and contrast_lower
contrast_lower: 0.5
contrast_upper: 1.5
# contrast_prob (float): the probability of changing contrast
contrast_prob: 0.5
# count (int): the kinds of doing distrot
count: 4
# hue_lower/ hue_upper (float): the hue between hue_lower and hue_upper
hue_lower: -18
hue_upper: 18
# hue_prob (float): the probability of changing hue
hue_prob: 0.5
# is_order (bool): whether determine the order of distortion
is_order: true
# saturation_lower/ saturation_upper (float): the saturation
# between saturation_lower and saturation_upper
saturation_lower: 0.5
saturation_upper: 1.5
# saturation_prob (float): the probability of changing saturation
saturation_prob: 0.5
#Expand the image and modify bounding box.
# Operators:
# 1. Scale the image weight and height.
# 2. Construct new images with new height and width.
# 3. Fill the new image with the mean.
# 4. Put original imge into new image.
# 5. Rescale the bounding box.
# 6. Determine if the new bbox is satisfied in the new image.
- !ExpandImage
# max_ratio (float): the ratio of expanding
max_ratio: 4
# mean (list): the pixel mean
mean: [104, 117, 123]
# prob (float): the probability of expanding image
prob: 0.5
# Crop the image and modify bounding box.
# Operators:
# 1. Scale the image weight and height.
# 2. Crop the image according to a radom sample.
# 3. Rescale the bounding box.
# 4. Determine if the new bbox is satisfied in the new image.
- !CropImage
# avoid_no_bbox (bool): whether to to avoid the
# situation where the box does not appear.
avoid_no_bbox: false
# batch_sampler (list): Multiple sets of different parameters for cropping.
batch_sampler:
- [1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0]
- [1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0]
- [1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0]
- [1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0]
- [1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0]
- [1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0]
- [1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0]
# satisfy_all (bool): whether all boxes must satisfy.
satisfy_all: false
# Rescale image to the specified target size, and capped at max_size if max_size != 0.
# If target_size is list, selected a scale randomly as the specified target size.
- !ResizeImage
# Resize method, cv2.INTER_LINEAR(1) by default
interp: 1
# max_size (int): the max size of image
max_size: 0
# target_size (int|list): the target size of image's short side,
# multi-scale training is adopted when type is list.
target_size: 300
# use_cv2 (bool): use the cv2 interpolation method or use PIL interpolation method
use_cv2: false
# Filp the image and bounding box.
# Operators:
# 1. Flip the image numpy.
# 2. Transform the bboxes' x coordinates. (Must judge whether the coordinates are normalized!)
# 3. Transform the segmentations' x coordinates. (Must judge whether the coordinates are normalized!)
- !RandomFlipImage
# is_mask_flip (bool): whether flip the segmentation
is_mask_flip: false
# is_normalized (bool): whether the bbox scale to [0,1]
is_normalized: true
# prob (float): the probability of flipping image
prob: 0.5
# Change the channel
- !Permute
# The format of image, [H, W, C]/[C, H, W], true by default
channel_first: true
# to_bgr (bool): confirm whether to convert RGB to BGR
to_bgr: true
# Normalize the image.
# Operators:
# 1.(optional) Scale the image to [0,1]
# 2. Each pixel minus mean and is divided by std
- !NormalizeImage
# The format of image, [H, W, C]/[C, H, W], true by default
is_channel_first: true
# Whether divide by 255, true by default
is_scale: false
# mean (list): the pixel mean
mean: [104, 117, 123]
# std (list): the pixel variance
std: [1, 1, 1]
# Number of samples, -1 represents all samples. -1 by default
samples: -1
# If samples should be shuffled, true by default
shuffle: true
# If use multi-process, false by default
use_process: true
# Data feed module for Eval
SSDEvalFeed:
# Batch size per device
batch_size: 32
# list of batch transformations to use
batch_transforms: []
# The data buffer size
bufsize: 10
# Dataset module
dataset:
# Dataset directory
dataset_dir: dataset/coco
# Annotation file path
annotation: annotations/instances_val2017.json
# Directory where image files are stored
image_dir: val2017
# Drop last batch if size is uneven, false by default
drop_last: true
# List of data fields needed
fields: [image, im_shape, im_id, gt_box, gt_label, is_difficult]
# list of image dims
image_shape: [3, 300, 300]
# number of workers processes (or threads)
num_workers: 8
# List of sample transformations to use
sample_transforms:
# Transform the image data to numpy format.
- !DecodeImage
# whether to convert BGR to RGB
to_rgb: true # default: true
# whether or not to mixup image and gt_bbbox/gt_score
with_mixup: false # default: false
# Transform the bounding box's coornidates to [0,1].
- !NormalizeBox {}
# Rescale image to the specified target size, and capped at max_size if max_size != 0.
# If target_size is list, selected a scale randomly as the specified target size.
- !ResizeImage
# Resize method, cv2.INTER_LINEAR(1) by default
interp: 1
# max_size (int): the max size of image
max_size: 0
# target_size (int|list): the target size of image's short side,
# multi-scale training is adopted when type is list.
target_size: 300
# use_cv2 (bool): use the cv2 interpolation method or use PIL interpolation method
use_cv2: false
- !Permute
# The format of image, [H, W, C]/[C, H, W], true by default
channel_first: true
# to_bgr (bool): confirm whether to convert RGB to BGR
to_bgr: true
# Normalize the image.
# Operators:
# 1.(optional) Scale the image to [0,1]
# 2. Each pixel minus mean and is divided by std
- !NormalizeImage
# The format of image, [H, W, C]/[C, H, W], true by default
is_channel_first: true
# Whether divide by 255, true by default
is_scale: false
# mean (list): the pixel mean
mean: [104, 117, 123]
# std (list): the pixel variance
std: [1, 1, 1]
# Number of samples, -1 represents all samples. -1 by default
samples: -1
# If samples should be shuffled, true by default
shuffle: false
# If use multi-process, false by default
use_process: false
# Data feed module for test
SSDTestFeed:
# Batch size per device
batch_size: 1
# list of batch transformations to use
batch_transforms: []
# The data buffer size
bufsize: 10
# Dataset module
dataset:
# Annotation file path
annotation: dataset/coco/annotations/instances_val2017.json
# Drop last batch if size is uneven, false by default
drop_last: false
# List of data fields needed
fields: [image, im_id]
# list of image dims
image_shape: [3, 300, 300]
# number of workers processes (or threads)
num_workers: 8
# List of sample transformations to use
sample_transforms:
# Transform the image data to numpy format.
- !DecodeImage
# whether to convert BGR to RGB
to_rgb: true # default: true
# whether or not to mixup image and gt_bbbox/gt_score
with_mixup: false # default: false
# Rescale image to the specified target size, and capped at max_size if max_size != 0.
# If target_size is list, selected a scale randomly as the specified target size.
- !ResizeImage
# Resize method, cv2.INTER_LINEAR(1) by default
interp: 1
# max_size (int): the max size of image
max_size: 0
# target_size (int|list): the target size of image's short side,
# multi-scale training is adopted when type is list.
target_size: 300
# use_cv2 (bool): use the cv2 interpolation method or use PIL interpolation method
use_cv2: false
- !Permute
# The format of image, [H, W, C]/[C, H, W], true by default
channel_first: true
# to_bgr (bool): confirm whether to convert RGB to BGR
to_bgr: true
# Normalize the image.
# Operators:
# 1.(optional) Scale the image to [0,1]
# 2. Each pixel minus mean and is divided by std
- !NormalizeImage
# The format of image, [H, W, C]/[C, H, W], true by default
is_channel_first: true
# Whether divide by 255, true by default
is_scale: false
# mean (list): the pixel mean
mean: [104, 117, 123]
# std (list): the pixel variance
std: [1, 1, 1]
# Number of samples, -1 represents all samples. -1 by default
samples: -1
# If samples should be shuffled, true by default
shuffle: false
# If use multi-process, false by default
use_process: false
......@@ -82,6 +82,7 @@ class DecodeImage(BaseOperator):
Args:
to_rgb (bool): whether to convert BGR to RGB
with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score
"""
super(DecodeImage, self).__init__()
......@@ -459,7 +460,7 @@ class ExpandImage(BaseOperator):
def __init__(self, max_ratio, prob, mean=[127.5, 127.5, 127.5]):
"""
Args:
ratio (float): the ratio of expanding
max_ratio (float): the ratio of expanding
prob (float): the probability of expanding image
mean (list): the pixel mean
"""
......
......@@ -19,7 +19,7 @@ from __future__ import print_function
from paddle import fluid
from ppdet.core.workspace import register
from ppdet.modeling.ops import SSDOutputDecoder, SSDMetric
from ppdet.modeling.ops import SSDOutputDecoder
__all__ = ['SSD']
......@@ -33,30 +33,25 @@ class SSD(object):
backbone (object): backbone instance
multi_box_head (object): `MultiBoxHead` instance
output_decoder (object): `SSDOutputDecoder` instance
metric (object): `SSDMetric` instance for training
num_classes (int): number of output classes
"""
__category__ = 'architecture'
__inject__ = ['backbone', 'multi_box_head', 'output_decoder', 'metric']
__inject__ = ['backbone', 'multi_box_head', 'output_decoder']
__shared__ = ['num_classes']
def __init__(self,
backbone,
multi_box_head='MultiBoxHead',
output_decoder=SSDOutputDecoder().__dict__,
metric=SSDMetric().__dict__,
num_classes=21):
super(SSD, self).__init__()
self.backbone = backbone
self.multi_box_head = multi_box_head
self.num_classes = num_classes
self.output_decoder = output_decoder
self.metric = metric
if isinstance(output_decoder, dict):
self.output_decoder = SSDOutputDecoder(**output_decoder)
if isinstance(metric, dict):
self.metric = SSDMetric(**metric)
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
......
......@@ -23,8 +23,7 @@ from ppdet.core.workspace import register, serializable
__all__ = [
'AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'MultiClassNMS',
'BBoxAssigner', 'MaskAssigner', 'RoIAlign', 'RoIPool', 'MultiBoxHead',
'SSDOutputDecoder', 'SSDMetric', 'RetinaTargetAssign',
'RetinaOutputDecoder', 'ConvNorm'
'SSDOutputDecoder', 'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm'
]
......@@ -303,22 +302,6 @@ class SSDOutputDecoder(object):
self.nms_eta = nms_eta
@register
@serializable
class SSDMetric(object):
__op__ = fluid.metrics.DetectionMAP
__append_doc__ = True
def __init__(self,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version='integral'):
super(SSDMetric, self).__init__()
self.overlap_threshold = overlap_threshold
self.evaluate_difficult = evaluate_difficult
self.ap_version = ap_version
@register
@serializable
class RetinaTargetAssign(object):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册