提交 109c5c2d 编写于 作者: Y Yuan Gao 提交者: wangguanzhong

add group normalization head to cascade mask rcnn (#3237)

* add cascade gn

* add cascade mask gn config

* update configs
上级 dca4a16d
...@@ -86,14 +86,14 @@ MaskAssigner: ...@@ -86,14 +86,14 @@ MaskAssigner:
resolution: 28 resolution: 28
CascadeBBoxHead: CascadeBBoxHead:
head: FC6FC7Head head: CascadeTwoFCHead
nms: nms:
keep_top_k: 100 keep_top_k: 100
nms_threshold: 0.5 nms_threshold: 0.5
score_threshold: 0.05 score_threshold: 0.05
FC6FC7Head: CascadeTwoFCHead:
num_chan: 1024 mlp_dim: 1024
LearningRate: LearningRate:
base_lr: 0.01 base_lr: 0.01
......
...@@ -77,14 +77,14 @@ CascadeBBoxAssigner: ...@@ -77,14 +77,14 @@ CascadeBBoxAssigner:
fg_fraction: 0.25 fg_fraction: 0.25
CascadeBBoxHead: CascadeBBoxHead:
head: FC6FC7Head head: CascadeTwoFCHead
nms: nms:
keep_top_k: 100 keep_top_k: 100
nms_threshold: 0.5 nms_threshold: 0.5
score_threshold: 0.05 score_threshold: 0.05
FC6FC7Head: CascadeTwoFCHead:
num_chan: 1024 mlp_dim: 1024
LearningRate: LearningRate:
base_lr: 0.02 base_lr: 0.02
......
...@@ -79,14 +79,14 @@ CascadeBBoxAssigner: ...@@ -79,14 +79,14 @@ CascadeBBoxAssigner:
fg_fraction: 0.25 fg_fraction: 0.25
CascadeBBoxHead: CascadeBBoxHead:
head: FC6FC7Head head: CascadeTwoFCHead
nms: nms:
keep_top_k: 100 keep_top_k: 100
nms_threshold: 0.5 nms_threshold: 0.5
score_threshold: 0.05 score_threshold: 0.05
FC6FC7Head: CascadeTwoFCHead:
num_chan: 1024 mlp_dim: 1024
LearningRate: LearningRate:
base_lr: 0.02 base_lr: 0.02
......
...@@ -79,14 +79,14 @@ CascadeBBoxAssigner: ...@@ -79,14 +79,14 @@ CascadeBBoxAssigner:
fg_fraction: 0.25 fg_fraction: 0.25
CascadeBBoxHead: CascadeBBoxHead:
head: FC6FC7Head head: CascadeTwoFCHead
nms: nms:
keep_top_k: 100 keep_top_k: 100
nms_threshold: 0.5 nms_threshold: 0.5
score_threshold: 0.05 score_threshold: 0.05
FC6FC7Head: CascadeTwoFCHead:
num_chan: 1024 mlp_dim: 1024
LearningRate: LearningRate:
base_lr: 0.02 base_lr: 0.02
......
...@@ -81,14 +81,14 @@ CascadeBBoxAssigner: ...@@ -81,14 +81,14 @@ CascadeBBoxAssigner:
fg_fraction: 0.25 fg_fraction: 0.25
CascadeBBoxHead: CascadeBBoxHead:
head: FC6FC7Head head: CascadeTwoFCHead
nms: nms:
keep_top_k: 100 keep_top_k: 100
nms_threshold: 0.5 nms_threshold: 0.5
score_threshold: 0.05 score_threshold: 0.05
FC6FC7Head: CascadeTwoFCHead:
num_chan: 1024 mlp_dim: 1024
LearningRate: LearningRate:
base_lr: 0.02 base_lr: 0.02
......
architecture: CascadeMaskRCNN
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
max_iters: 180000
snapshot_iter: 10000
use_gpu: true
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights: output/cascade_mask_rcnn_r50_fpn_gn_2x/model_final/
metric: COCO
num_classes: 81
CascadeMaskRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: CascadeBBoxHead
bbox_assigner: CascadeBBoxAssigner
mask_head: MaskHead
mask_assigner: MaskAssigner
ResNet:
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
norm_type: gn
FPNRPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
sampling_ratio: 2
box_resolution: 7
mask_resolution: 14
MaskHead:
dilation: 1
conv_dim: 256
num_convs: 4
resolution: 28
norm_type: gn
CascadeBBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [10, 20, 30]
bg_thresh_hi: [0.5, 0.6, 0.7]
bg_thresh_lo: [0.0, 0.0, 0.0]
fg_fraction: 0.25
fg_thresh: [0.5, 0.6, 0.7]
MaskAssigner:
resolution: 28
CascadeBBoxHead:
head: CascadeXConvNormHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
CascadeXConvNormHead:
norm_type: gn
LearningRate:
base_lr: 0.02
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
MaskRCNNTrainFeed:
batch_size: 2
dataset:
dataset_dir: dataset/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
batch_transforms:
- !PadBatch
pad_to_stride: 32
num_workers: 2
MaskRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: dataset/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
batch_transforms:
- !PadBatch
pad_to_stride: 32
num_workers: 2
MaskRCNNTestFeed:
batch_size: 1
dataset:
annotation: dataset/coco/annotations/instances_val2017.json
batch_transforms:
- !PadBatch
pad_to_stride: 32
num_workers: 2
...@@ -35,6 +35,7 @@ def ConvNorm(input, ...@@ -35,6 +35,7 @@ def ConvNorm(input,
norm_type='affine_channel', norm_type='affine_channel',
norm_groups=32, norm_groups=32,
dilation=1, dilation=1,
lr_scale=1,
freeze_norm=False, freeze_norm=False,
act=None, act=None,
norm_name=None, norm_name=None,
...@@ -51,18 +52,20 @@ def ConvNorm(input, ...@@ -51,18 +52,20 @@ def ConvNorm(input,
groups=groups, groups=groups,
act=None, act=None,
param_attr=ParamAttr( param_attr=ParamAttr(
name=name + "_weights", initializer=initializer), name=name + "_weights",
initializer=initializer,
learning_rate=lr_scale),
bias_attr=False, bias_attr=False,
name=name + '.conv2d.output.1') name=name + '.conv2d.output.1')
norm_lr = 0. if freeze_norm else 1. norm_lr = 0. if freeze_norm else 1.
pattr = ParamAttr( pattr = ParamAttr(
name=norm_name + '_scale', name=norm_name + '_scale',
learning_rate=norm_lr, learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay)) regularizer=L2Decay(norm_decay))
battr = ParamAttr( battr = ParamAttr(
name=norm_name + '_offset', name=norm_name + '_offset',
learning_rate=norm_lr, learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay)) regularizer=L2Decay(norm_decay))
if norm_type in ['bn', 'sync_bn']: if norm_type in ['bn', 'sync_bn']:
......
...@@ -19,8 +19,10 @@ import paddle.fluid as fluid ...@@ -19,8 +19,10 @@ import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, Xavier from paddle.fluid.initializer import Normal, Xavier
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from paddle.fluid.initializer import MSRA
from ppdet.modeling.ops import MultiClassNMS from ppdet.modeling.ops import MultiClassNMS
from ppdet.modeling.ops import ConvNorm
from ppdet.core.workspace import register from ppdet.core.workspace import register
__all__ = ['CascadeBBoxHead'] __all__ = ['CascadeBBoxHead']
...@@ -50,7 +52,7 @@ class CascadeBBoxHead(object): ...@@ -50,7 +52,7 @@ class CascadeBBoxHead(object):
def get_output(self, def get_output(self,
roi_feat, roi_feat,
cls_agnostic_bbox_reg=2, cls_agnostic_bbox_reg=2,
wb_scalar=2.0, wb_scalar=1.0,
name=''): name=''):
""" """
Get bbox head output. Get bbox head output.
...@@ -77,7 +79,7 @@ class CascadeBBoxHead(object): ...@@ -77,7 +79,7 @@ class CascadeBBoxHead(object):
learning_rate=wb_scalar), learning_rate=wb_scalar),
bias_attr=ParamAttr( bias_attr=ParamAttr(
name='cls_score%s_b' % name, name='cls_score%s_b' % name,
learning_rate=wb_scalar, learning_rate=wb_scalar * 2,
regularizer=L2Decay(0.))) regularizer=L2Decay(0.)))
bbox_pred = fluid.layers.fc(input=head_feat, bbox_pred = fluid.layers.fc(input=head_feat,
size=4 * cls_agnostic_bbox_reg, size=4 * cls_agnostic_bbox_reg,
...@@ -90,7 +92,7 @@ class CascadeBBoxHead(object): ...@@ -90,7 +92,7 @@ class CascadeBBoxHead(object):
learning_rate=wb_scalar), learning_rate=wb_scalar),
bias_attr=ParamAttr( bias_attr=ParamAttr(
name='bbox_pred%s_b' % name, name='bbox_pred%s_b' % name,
learning_rate=wb_scalar, learning_rate=wb_scalar * 2,
regularizer=L2Decay(0.))) regularizer=L2Decay(0.)))
return cls_score, bbox_pred return cls_score, bbox_pred
...@@ -177,7 +179,7 @@ class CascadeBBoxHead(object): ...@@ -177,7 +179,7 @@ class CascadeBBoxHead(object):
for i in range(repreat_num): for i in range(repreat_num):
# cls score # cls score
if i < 2: if i < 2:
cls_score = self._head_share( cls_score, _ = self.get_output(
roi_feat_list[-1], # roi_feat_3 roi_feat_list[-1], # roi_feat_3
name='_' + str(i + 1) if i > 0 else '') name='_' + str(i + 1) if i > 0 else '')
else: else:
...@@ -216,66 +218,82 @@ class CascadeBBoxHead(object): ...@@ -216,66 +218,82 @@ class CascadeBBoxHead(object):
pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean) pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean)
return {"bbox": pred_result} return {"bbox": pred_result}
def _head_share(self, roi_feat, wb_scalar=2.0, name=''):
# FC6 FC7 @register
fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] class CascadeXConvNormHead(object):
fc6 = fluid.layers.fc(input=roi_feat, """
size=self.head.num_chan, RCNN head with serveral convolution layers
act='relu',
name='fc6' + name, Args:
param_attr=ParamAttr( conv_num (int): num of convolution layers for the rcnn head
name='fc6%s_w' % name, conv_dim (int): num of filters for the conv layers
initializer=Xavier(fan_out=fan), mlp_dim (int): num of filters for the fc layers
learning_rate=wb_scalar, ), """
bias_attr=ParamAttr( __shared__ = ['norm_type', 'freeze_norm']
name='fc6%s_b' % name,
learning_rate=2.0, def __init__(self,
regularizer=L2Decay(0.))) num_conv=4,
fc7 = fluid.layers.fc(input=fc6, conv_dim=256,
size=self.head.num_chan, mlp_dim=1024,
act='relu', norm_type=None,
name='fc7' + name, freeze_norm=False):
param_attr=ParamAttr( super(CascadeXConvNormHead, self).__init__()
name='fc7%s_w' % name, self.conv_dim = conv_dim
initializer=Xavier(), self.mlp_dim = mlp_dim
learning_rate=wb_scalar, ), self.num_conv = num_conv
bias_attr=ParamAttr( self.norm_type = norm_type
name='fc7%s_b' % name, self.freeze_norm = freeze_norm
learning_rate=2.0,
regularizer=L2Decay(0.))) def __call__(self, roi_feat, wb_scalar=1.0, name=''):
cls_score = fluid.layers.fc(input=fc7, conv = roi_feat
size=self.num_classes, fan = self.conv_dim * 3 * 3
act=None, initializer = MSRA(uniform=False, fan_in=fan)
name='cls_score' + name, for i in range(self.num_conv):
name = 'bbox_head_conv' + str(i)
conv = ConvNorm(
conv,
self.conv_dim,
3,
act='relu',
initializer=initializer,
norm_type=self.norm_type,
freeze_norm=self.freeze_norm,
lr_scale=wb_scalar,
name=name,
norm_name=name)
fan = conv.shape[1] * conv.shape[2] * conv.shape[3]
head_heat = fluid.layers.fc(input=conv,
size=self.mlp_dim,
act='relu',
name='fc6' + name,
param_attr=ParamAttr( param_attr=ParamAttr(
name='cls_score%s_w' % name, name='fc6%s_w' % name,
initializer=Normal( initializer=Xavier(fan_out=fan),
loc=0.0, scale=0.01), learning_rate=wb_scalar),
learning_rate=wb_scalar, ),
bias_attr=ParamAttr( bias_attr=ParamAttr(
name='cls_score%s_b' % name, name='fc6%s_b' % name,
learning_rate=2.0, regularizer=L2Decay(0.),
regularizer=L2Decay(0.))) learning_rate=wb_scalar * 2))
return cls_score return head_heat
@register @register
class FC6FC7Head(object): class CascadeTwoFCHead(object):
""" """
Cascade RCNN head with two Fully Connected layers RCNN head with serveral convolution layers
Args: Args:
num_chan (int): num of filters for the fc layers mlp_dim (int): num of filters for the fc layers
""" """
def __init__(self, num_chan): def __init__(self, mlp_dim):
super(FC6FC7Head, self).__init__() super(CascadeTwoFCHead, self).__init__()
self.num_chan = num_chan self.mlp_dim = mlp_dim
def __call__(self, roi_feat, wb_scalar=1.0, name=''): def __call__(self, roi_feat, wb_scalar=1.0, name=''):
fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3]
fc6 = fluid.layers.fc(input=roi_feat, fc6 = fluid.layers.fc(input=roi_feat,
size=self.num_chan, size=self.mlp_dim,
act='relu', act='relu',
name='fc6' + name, name='fc6' + name,
param_attr=ParamAttr( param_attr=ParamAttr(
...@@ -284,10 +302,10 @@ class FC6FC7Head(object): ...@@ -284,10 +302,10 @@ class FC6FC7Head(object):
learning_rate=wb_scalar), learning_rate=wb_scalar),
bias_attr=ParamAttr( bias_attr=ParamAttr(
name='fc6%s_b' % name, name='fc6%s_b' % name,
learning_rate=wb_scalar, learning_rate=wb_scalar * 2,
regularizer=L2Decay(0.))) regularizer=L2Decay(0.)))
head_feat = fluid.layers.fc(input=fc6, head_feat = fluid.layers.fc(input=fc6,
size=self.num_chan, size=self.mlp_dim,
act='relu', act='relu',
name='fc7' + name, name='fc7' + name,
param_attr=ParamAttr( param_attr=ParamAttr(
...@@ -296,6 +314,6 @@ class FC6FC7Head(object): ...@@ -296,6 +314,6 @@ class FC6FC7Head(object):
learning_rate=wb_scalar), learning_rate=wb_scalar),
bias_attr=ParamAttr( bias_attr=ParamAttr(
name='fc7%s_b' % name, name='fc7%s_b' % name,
learning_rate=wb_scalar, learning_rate=wb_scalar * 2,
regularizer=L2Decay(0.))) regularizer=L2Decay(0.)))
return head_feat return head_feat
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册