提交 109c5c2d 编写于 作者: Y Yuan Gao 提交者: wangguanzhong

add group normalization head to cascade mask rcnn (#3237)

* add cascade gn

* add cascade mask gn config

* update configs
上级 dca4a16d
......@@ -86,14 +86,14 @@ MaskAssigner:
resolution: 28
CascadeBBoxHead:
head: FC6FC7Head
head: CascadeTwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
FC6FC7Head:
num_chan: 1024
CascadeTwoFCHead:
mlp_dim: 1024
LearningRate:
base_lr: 0.01
......
......@@ -77,14 +77,14 @@ CascadeBBoxAssigner:
fg_fraction: 0.25
CascadeBBoxHead:
head: FC6FC7Head
head: CascadeTwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
FC6FC7Head:
num_chan: 1024
CascadeTwoFCHead:
mlp_dim: 1024
LearningRate:
base_lr: 0.02
......
......@@ -79,14 +79,14 @@ CascadeBBoxAssigner:
fg_fraction: 0.25
CascadeBBoxHead:
head: FC6FC7Head
head: CascadeTwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
FC6FC7Head:
num_chan: 1024
CascadeTwoFCHead:
mlp_dim: 1024
LearningRate:
base_lr: 0.02
......
......@@ -79,14 +79,14 @@ CascadeBBoxAssigner:
fg_fraction: 0.25
CascadeBBoxHead:
head: FC6FC7Head
head: CascadeTwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
FC6FC7Head:
num_chan: 1024
CascadeTwoFCHead:
mlp_dim: 1024
LearningRate:
base_lr: 0.02
......
......@@ -81,14 +81,14 @@ CascadeBBoxAssigner:
fg_fraction: 0.25
CascadeBBoxHead:
head: FC6FC7Head
head: CascadeTwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
FC6FC7Head:
num_chan: 1024
CascadeTwoFCHead:
mlp_dim: 1024
LearningRate:
base_lr: 0.02
......
architecture: CascadeMaskRCNN
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
max_iters: 180000
snapshot_iter: 10000
use_gpu: true
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights: output/cascade_mask_rcnn_r50_fpn_gn_2x/model_final/
metric: COCO
num_classes: 81
CascadeMaskRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: CascadeBBoxHead
bbox_assigner: CascadeBBoxAssigner
mask_head: MaskHead
mask_assigner: MaskAssigner
ResNet:
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
norm_type: gn
FPNRPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
sampling_ratio: 2
box_resolution: 7
mask_resolution: 14
MaskHead:
dilation: 1
conv_dim: 256
num_convs: 4
resolution: 28
norm_type: gn
CascadeBBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [10, 20, 30]
bg_thresh_hi: [0.5, 0.6, 0.7]
bg_thresh_lo: [0.0, 0.0, 0.0]
fg_fraction: 0.25
fg_thresh: [0.5, 0.6, 0.7]
MaskAssigner:
resolution: 28
CascadeBBoxHead:
head: CascadeXConvNormHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
CascadeXConvNormHead:
norm_type: gn
LearningRate:
base_lr: 0.02
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
MaskRCNNTrainFeed:
batch_size: 2
dataset:
dataset_dir: dataset/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
batch_transforms:
- !PadBatch
pad_to_stride: 32
num_workers: 2
MaskRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: dataset/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
batch_transforms:
- !PadBatch
pad_to_stride: 32
num_workers: 2
MaskRCNNTestFeed:
batch_size: 1
dataset:
annotation: dataset/coco/annotations/instances_val2017.json
batch_transforms:
- !PadBatch
pad_to_stride: 32
num_workers: 2
......@@ -35,6 +35,7 @@ def ConvNorm(input,
norm_type='affine_channel',
norm_groups=32,
dilation=1,
lr_scale=1,
freeze_norm=False,
act=None,
norm_name=None,
......@@ -51,18 +52,20 @@ def ConvNorm(input,
groups=groups,
act=None,
param_attr=ParamAttr(
name=name + "_weights", initializer=initializer),
name=name + "_weights",
initializer=initializer,
learning_rate=lr_scale),
bias_attr=False,
name=name + '.conv2d.output.1')
norm_lr = 0. if freeze_norm else 1.
pattr = ParamAttr(
name=norm_name + '_scale',
learning_rate=norm_lr,
learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay))
battr = ParamAttr(
name=norm_name + '_offset',
learning_rate=norm_lr,
learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay))
if norm_type in ['bn', 'sync_bn']:
......
......@@ -19,8 +19,10 @@ import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, Xavier
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.initializer import MSRA
from ppdet.modeling.ops import MultiClassNMS
from ppdet.modeling.ops import ConvNorm
from ppdet.core.workspace import register
__all__ = ['CascadeBBoxHead']
......@@ -50,7 +52,7 @@ class CascadeBBoxHead(object):
def get_output(self,
roi_feat,
cls_agnostic_bbox_reg=2,
wb_scalar=2.0,
wb_scalar=1.0,
name=''):
"""
Get bbox head output.
......@@ -77,7 +79,7 @@ class CascadeBBoxHead(object):
learning_rate=wb_scalar),
bias_attr=ParamAttr(
name='cls_score%s_b' % name,
learning_rate=wb_scalar,
learning_rate=wb_scalar * 2,
regularizer=L2Decay(0.)))
bbox_pred = fluid.layers.fc(input=head_feat,
size=4 * cls_agnostic_bbox_reg,
......@@ -90,7 +92,7 @@ class CascadeBBoxHead(object):
learning_rate=wb_scalar),
bias_attr=ParamAttr(
name='bbox_pred%s_b' % name,
learning_rate=wb_scalar,
learning_rate=wb_scalar * 2,
regularizer=L2Decay(0.)))
return cls_score, bbox_pred
......@@ -177,7 +179,7 @@ class CascadeBBoxHead(object):
for i in range(repreat_num):
# cls score
if i < 2:
cls_score = self._head_share(
cls_score, _ = self.get_output(
roi_feat_list[-1], # roi_feat_3
name='_' + str(i + 1) if i > 0 else '')
else:
......@@ -216,66 +218,82 @@ class CascadeBBoxHead(object):
pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean)
return {"bbox": pred_result}
def _head_share(self, roi_feat, wb_scalar=2.0, name=''):
# FC6 FC7
fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3]
fc6 = fluid.layers.fc(input=roi_feat,
size=self.head.num_chan,
act='relu',
name='fc6' + name,
param_attr=ParamAttr(
name='fc6%s_w' % name,
initializer=Xavier(fan_out=fan),
learning_rate=wb_scalar, ),
bias_attr=ParamAttr(
name='fc6%s_b' % name,
learning_rate=2.0,
regularizer=L2Decay(0.)))
fc7 = fluid.layers.fc(input=fc6,
size=self.head.num_chan,
act='relu',
name='fc7' + name,
param_attr=ParamAttr(
name='fc7%s_w' % name,
initializer=Xavier(),
learning_rate=wb_scalar, ),
bias_attr=ParamAttr(
name='fc7%s_b' % name,
learning_rate=2.0,
regularizer=L2Decay(0.)))
cls_score = fluid.layers.fc(input=fc7,
size=self.num_classes,
act=None,
name='cls_score' + name,
@register
class CascadeXConvNormHead(object):
"""
RCNN head with serveral convolution layers
Args:
conv_num (int): num of convolution layers for the rcnn head
conv_dim (int): num of filters for the conv layers
mlp_dim (int): num of filters for the fc layers
"""
__shared__ = ['norm_type', 'freeze_norm']
def __init__(self,
num_conv=4,
conv_dim=256,
mlp_dim=1024,
norm_type=None,
freeze_norm=False):
super(CascadeXConvNormHead, self).__init__()
self.conv_dim = conv_dim
self.mlp_dim = mlp_dim
self.num_conv = num_conv
self.norm_type = norm_type
self.freeze_norm = freeze_norm
def __call__(self, roi_feat, wb_scalar=1.0, name=''):
conv = roi_feat
fan = self.conv_dim * 3 * 3
initializer = MSRA(uniform=False, fan_in=fan)
for i in range(self.num_conv):
name = 'bbox_head_conv' + str(i)
conv = ConvNorm(
conv,
self.conv_dim,
3,
act='relu',
initializer=initializer,
norm_type=self.norm_type,
freeze_norm=self.freeze_norm,
lr_scale=wb_scalar,
name=name,
norm_name=name)
fan = conv.shape[1] * conv.shape[2] * conv.shape[3]
head_heat = fluid.layers.fc(input=conv,
size=self.mlp_dim,
act='relu',
name='fc6' + name,
param_attr=ParamAttr(
name='cls_score%s_w' % name,
initializer=Normal(
loc=0.0, scale=0.01),
learning_rate=wb_scalar, ),
name='fc6%s_w' % name,
initializer=Xavier(fan_out=fan),
learning_rate=wb_scalar),
bias_attr=ParamAttr(
name='cls_score%s_b' % name,
learning_rate=2.0,
regularizer=L2Decay(0.)))
return cls_score
name='fc6%s_b' % name,
regularizer=L2Decay(0.),
learning_rate=wb_scalar * 2))
return head_heat
@register
class FC6FC7Head(object):
class CascadeTwoFCHead(object):
"""
Cascade RCNN head with two Fully Connected layers
RCNN head with serveral convolution layers
Args:
num_chan (int): num of filters for the fc layers
mlp_dim (int): num of filters for the fc layers
"""
def __init__(self, num_chan):
super(FC6FC7Head, self).__init__()
self.num_chan = num_chan
def __init__(self, mlp_dim):
super(CascadeTwoFCHead, self).__init__()
self.mlp_dim = mlp_dim
def __call__(self, roi_feat, wb_scalar=1.0, name=''):
fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3]
fc6 = fluid.layers.fc(input=roi_feat,
size=self.num_chan,
size=self.mlp_dim,
act='relu',
name='fc6' + name,
param_attr=ParamAttr(
......@@ -284,10 +302,10 @@ class FC6FC7Head(object):
learning_rate=wb_scalar),
bias_attr=ParamAttr(
name='fc6%s_b' % name,
learning_rate=wb_scalar,
learning_rate=wb_scalar * 2,
regularizer=L2Decay(0.)))
head_feat = fluid.layers.fc(input=fc6,
size=self.num_chan,
size=self.mlp_dim,
act='relu',
name='fc7' + name,
param_attr=ParamAttr(
......@@ -296,6 +314,6 @@ class FC6FC7Head(object):
learning_rate=wb_scalar),
bias_attr=ParamAttr(
name='fc7%s_b' % name,
learning_rate=wb_scalar,
learning_rate=wb_scalar * 2,
regularizer=L2Decay(0.)))
return head_feat
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册