提交 743dd8d9 编写于 作者: littletomatodonkey's avatar littletomatodonkey

add esfpn model

上级 7c8fc03b
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
test_feed: FasterRCNNTestFeed
eval_feed: FasterRCNNEvalFeed
fusebn: false
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vc_oidv4_pretrained.tar
log_smooth_window: 20
snapshot_iter: 10000
max_iters: 360000
use_gpu: yes
metric: COCO
weights: https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_vc_esfpn_2x.tar
FasterRCNN:
backbone: ResNet
fpn: ESFPN
rpn_head: ESFPNRPNHead
roi_extractor: ESFPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
norm_decay: true
variant: c
ESFPN:
max_level: 4
min_level: 2
num_chan: 256
spatial_scale: [0.0625, 0.125, 0.25]
ESFPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
use_random: true
test_proposal:
eta: 1.0
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 6000
train_proposal:
eta: 1.0
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 12000
ESFPNRoIAlign:
box_resolution: 14
spatial_scale: 0.25
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.005
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 240000
- 320000
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: dataset/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
drop_last: false
num_workers: 2
shuffle: true
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
drop_last: false
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 0
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
......@@ -437,3 +437,59 @@ class FPNRPNHead(RPNHead):
anchors = fluid.layers.concat(anchors)
anchor_var = fluid.layers.concat(anchor_vars)
return rpn_cls, rpn_bbox, anchors, anchor_var
@register
class ESFPNRPNHead(RPNHead):
"""
ESFPNRPNHead: Enhanced Single FPNRPN Head, see ...
use P4 are RPN head.
Args:
anchor_generator (object): `AnchorGenerator` instance
rpn_target_assign (object): `RPNTargetAssign` instance
train_proposal (object): `GenerateProposals` instance for training
test_proposal (object): `GenerateProposals` instance for testing
"""
__inject__ = [
'anchor_generator', 'rpn_target_assign', 'train_proposal',
'test_proposal'
]
def __init__(self,
anchor_generator=AnchorGenerator().__dict__,
rpn_target_assign=RPNTargetAssign().__dict__,
train_proposal=GenerateProposals(12000, 2000).__dict__,
test_proposal=GenerateProposals().__dict__):
super(ESFPNRPNHead, self).__init__(anchor_generator, rpn_target_assign,
train_proposal, test_proposal)
def get_proposals(self, body_feats, im_info, mode='train'):
"""
Get proposals according to the output of backbone.
Args:
body_feats (dict): The dictionary of feature maps from backbone.
im_info(Variable): The information of image with shape [N, 3] with
shape (height, width, scale).
body_feat_names(list): A list of names of feature maps from
backbone.
Returns:
rpn_rois(Variable): Output proposals with shape of (rois_num, 4).
"""
# In ESFPNRPN Heads, only the last feature map of backbone is used.
body_feat = list(body_feats.values())[0]
rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat)
rpn_cls_score_prob = fluid.layers.sigmoid(
rpn_cls_score, name='rpn_cls_score_prob')
prop_op = self.train_proposal if mode == 'train' else self.test_proposal
rpn_rois, rpn_roi_probs = prop_op(
scores=rpn_cls_score_prob,
bbox_deltas=rpn_bbox_pred,
im_info=im_info,
anchors=self.anchor,
variances=self.anchor_var)
return rpn_rois
\ No newline at end of file
......@@ -25,7 +25,7 @@ from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register
__all__ = ['FPN']
__all__ = ['FPN', 'ESFPN']
@register
......@@ -174,3 +174,117 @@ class FPN(object):
self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5)
res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
return res_dict, self.spatial_scale
@register
class ESFPN(object):
"""
Enhanced Single Feature Pyramid Network
Args:
num_chan (int): number of feature channels
level (int) : level
min_level (int): lowest level of the backbone feature map to use
max_level (int): highest level of the backbone feature map to use
spatial_scale (list): feature map scaling factor
has_extra_convs (bool): whether has extral convolutions in higher levels
"""
def __init__(self,
num_chan=256,
min_level=2,
max_level=4,
spatial_scale=[1. / 16., 1. / 8., 1. / 4.],
has_extra_convs=False):
self.num_chan = num_chan
self.min_level = min_level
self.max_level = max_level
self.spatial_scale = spatial_scale
self.has_extra_convs = has_extra_convs
return
def _conv_reduce(self, input, name, num_filters=256, filter_size=1, padding=0):
'''conv_reduce'''
out = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=1,
padding=padding,
act=None,
param_attr=ParamAttr(name=name + "_reduce_weights"),
bias_attr=ParamAttr(name=name + "_reduce_biases"),
name=name + '.reduce.output.1')
return out
def _fpn_upsampling(self, input, shape_var):
shape_hw = fluid.layers.slice(shape_var, axes=[0], starts=[2], ends=[4])
out_shape_ = shape_hw
out_shape = fluid.layers.cast(out_shape_, dtype='int32')
out_shape.stop_gradient = True
output = fluid.layers.resize_bilinear(input, scale=2., actual_shape=out_shape)
return output
def _add_es_topdown_lateral(self, conv_upper, conv_down, output_level):
'''
_add_es_topdown_lateral
conv_upper : upper level feature
conv_down : low level feature
output_level : output feature map level, same as conv_down
'''
down_shape_op = fluid.layers.shape(conv_down)
up = self._fpn_upsampling(conv_upper, down_shape_op)
output = fluid.layers.elementwise_add(up, conv_down)
output_name = 'P{}.smooth.conv1_1.output.1'.format( output_level )
output = fluid.layers.conv2d(
input=output,
num_filters=self.num_chan,
filter_size=3,
stride=1,
padding=1,
act=None,
param_attr=ParamAttr(name="P{}_smooth_weights".format( output_level )),
bias_attr=ParamAttr("P{}_smooth_biases".format( output_level )),
name=output_name)
return output, output_name
def conv_smooth_reduce( self, conv, level ):
'''
conv_smooth
'''
ch_med = conv.shape[1] // 2
conv_st1 = self._conv_reduce(conv,
"res{}_r1".format(level),
num_filters = ch_med,
filter_size=3,
padding=1)
conv_st2 = self._conv_reduce(conv_st1,
"res{}".format(level),
num_filters = self.num_chan,
filter_size=1,
padding=0)
return conv_st2
def get_output(self, body_dict):
body_name_list = list(body_dict.keys())[::-1]
res5_name, res4_name, res3_name, res2_name = body_name_list
res2 = body_dict[res2_name]
res3 = body_dict[res3_name]
res4 = body_dict[res4_name]
res5 = body_dict[res5_name]
#reduce two stages
res5_reduce = self.conv_smooth_reduce( res5, 5 )
res4_reduce = self.conv_smooth_reduce( res4, 4 )
res3_reduce = self.conv_smooth_reduce( res3, 3 )
P4, P4_name = self._add_es_topdown_lateral( res5_reduce, res4_reduce, 4 )
P3, P3_name = self._add_es_topdown_lateral( P4, res3_reduce, 3 )
P2, P2_name = self._add_es_topdown_lateral( P3, res2, 2 )
fpn_name_list = [P4_name, P3_name, P2_name]
fpn_dict = { P4_name:P4, P3_name:P3, P2_name:P2 }
res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list])
return res_dict, self.spatial_scale
\ No newline at end of file
......@@ -95,3 +95,50 @@ class FPNRoIAlign(object):
roi_feat = fluid.layers.lod_reset(roi_feat_, rois)
return roi_feat
@register
class ESFPNRoIAlign(object):
"""
RoI align pooling for ESFPN feature maps
Args:
box_resolution (int): output height & width
sampling_ratio (int): number of sampling points
"""
def __init__(self,
sampling_ratio=0,
spatial_scale=1. / 4,
box_resolution=14,
mask_resolution=14):
super(ESFPNRoIAlign, self).__init__()
self.sampling_ratio = sampling_ratio
self.spatial_scale = spatial_scale
self.box_resolution = box_resolution
self.mask_resolution = mask_resolution
def __call__(self, head_inputs, rois, spatial_scale, is_mask=False):
"""
Adopt RoI align onto several feature map(P2) to get RoI features.
Distribute RoIs to different levels by area and get a list of RoI
features by distributed RoIs and their corresponding feature maps.
Returns:
roi_feat(Variable): RoI features with shape of [M, C, R, R],
where M is the number of RoIs and R is RoI resolution
"""
name_list = list(head_inputs.keys())
faster_rcnn_head_name = name_list[-1]
spatial_scale = spatial_scale[-1]
head_input = head_inputs[faster_rcnn_head_name]
resolution = is_mask and self.mask_resolution or self.box_resolution
roi_feat = fluid.layers.roi_align(
input=head_input,
rois=rois,
pooled_height=resolution,
pooled_width=resolution,
spatial_scale=spatial_scale,
sampling_ratio=self.sampling_ratio)
return roi_feat
......@@ -138,7 +138,7 @@ def main():
freeze_bn = getattr(model.backbone, 'freeze_norm', False)
if FLAGS.resume_checkpoint:
checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint)
elif cfg.pretrain_weights and freeze_bn:
elif cfg.pretrain_weights and freeze_bn and cfg["fusebn"]:
checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrain_weights)
elif cfg.pretrain_weights:
checkpoint.load_pretrain(exe, train_prog, cfg.pretrain_weights)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册