diff --git a/configs/cascade_mask_rcnn_r50_fpn_1x.yml b/configs/cascade_mask_rcnn_r50_fpn_1x.yml index 9a7b7a8dce4d1b4e48ed794e8cc2a458cbf363b3..1d17f53c60e66061ab8a21d624f4191a15ee5a01 100644 --- a/configs/cascade_mask_rcnn_r50_fpn_1x.yml +++ b/configs/cascade_mask_rcnn_r50_fpn_1x.yml @@ -86,14 +86,14 @@ MaskAssigner: resolution: 28 CascadeBBoxHead: - head: FC6FC7Head + head: CascadeTwoFCHead nms: keep_top_k: 100 nms_threshold: 0.5 score_threshold: 0.05 -FC6FC7Head: - num_chan: 1024 +CascadeTwoFCHead: + mlp_dim: 1024 LearningRate: base_lr: 0.01 diff --git a/configs/cascade_rcnn_r50_fpn_1x.yml b/configs/cascade_rcnn_r50_fpn_1x.yml index f8830ae6b1fea5435295c714d03beab3283291ff..47c089c1d95a35aacdcc0766670dc984cb18dc32 100644 --- a/configs/cascade_rcnn_r50_fpn_1x.yml +++ b/configs/cascade_rcnn_r50_fpn_1x.yml @@ -77,14 +77,14 @@ CascadeBBoxAssigner: fg_fraction: 0.25 CascadeBBoxHead: - head: FC6FC7Head + head: CascadeTwoFCHead nms: keep_top_k: 100 nms_threshold: 0.5 score_threshold: 0.05 -FC6FC7Head: - num_chan: 1024 +CascadeTwoFCHead: + mlp_dim: 1024 LearningRate: base_lr: 0.02 diff --git a/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml b/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml index a491368395e366f3a29f7309b1a13e05b6559915..93373adb3a7f72b64ba45996ef61c2a9d3da8414 100644 --- a/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml +++ b/configs/dcn/cascade_rcnn_dcn_r101_vd_fpn_1x.yml @@ -79,14 +79,14 @@ CascadeBBoxAssigner: fg_fraction: 0.25 CascadeBBoxHead: - head: FC6FC7Head + head: CascadeTwoFCHead nms: keep_top_k: 100 nms_threshold: 0.5 score_threshold: 0.05 -FC6FC7Head: - num_chan: 1024 +CascadeTwoFCHead: + mlp_dim: 1024 LearningRate: base_lr: 0.02 diff --git a/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml b/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml index 97697a3aa6c6551ba8847e11781603d5f810fdd1..4c74bd877b644659812bbcab960a4ce4600277ef 100644 --- a/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml +++ b/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x.yml @@ -79,14 +79,14 @@ CascadeBBoxAssigner: fg_fraction: 0.25 CascadeBBoxHead: - head: FC6FC7Head + head: CascadeTwoFCHead nms: keep_top_k: 100 nms_threshold: 0.5 score_threshold: 0.05 -FC6FC7Head: - num_chan: 1024 +CascadeTwoFCHead: + mlp_dim: 1024 LearningRate: base_lr: 0.02 diff --git a/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml b/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml index a3dddbbbbfa16cbe392a19e22f953fabd80c1203..dbbe2d8014d716e68ba481f4b66eb7fe50164356 100644 --- a/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml +++ b/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x.yml @@ -81,14 +81,14 @@ CascadeBBoxAssigner: fg_fraction: 0.25 CascadeBBoxHead: - head: FC6FC7Head + head: CascadeTwoFCHead nms: keep_top_k: 100 nms_threshold: 0.5 score_threshold: 0.05 -FC6FC7Head: - num_chan: 1024 +CascadeTwoFCHead: + mlp_dim: 1024 LearningRate: base_lr: 0.02 diff --git a/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x.yml b/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x.yml new file mode 100644 index 0000000000000000000000000000000000000000..52c61ad4b57bf1464fe9e2816cec710563a9d707 --- /dev/null +++ b/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x.yml @@ -0,0 +1,147 @@ +architecture: CascadeMaskRCNN +train_feed: MaskRCNNTrainFeed +eval_feed: MaskRCNNEvalFeed +test_feed: MaskRCNNTestFeed +max_iters: 180000 +snapshot_iter: 10000 +use_gpu: true +log_smooth_window: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar +weights: output/cascade_mask_rcnn_r50_fpn_gn_2x/model_final/ +metric: COCO +num_classes: 81 + +CascadeMaskRCNN: + backbone: ResNet + fpn: FPN + rpn_head: FPNRPNHead + roi_extractor: FPNRoIAlign + bbox_head: CascadeBBoxHead + bbox_assigner: CascadeBBoxAssigner + mask_head: MaskHead + mask_assigner: MaskAssigner + +ResNet: + depth: 50 + feature_maps: [2, 3, 4, 5] + freeze_at: 2 + norm_type: affine_channel + +FPN: + max_level: 6 + min_level: 2 + num_chan: 256 + spatial_scale: [0.03125, 0.0625, 0.125, 0.25] + norm_type: gn + +FPNRPNHead: + anchor_generator: + aspect_ratios: [0.5, 1.0, 2.0] + variance: [1.0, 1.0, 1.0, 1.0] + anchor_start_size: 32 + max_level: 6 + min_level: 2 + num_chan: 256 + rpn_target_assign: + rpn_batch_size_per_im: 256 + rpn_fg_fraction: 0.5 + rpn_negative_overlap: 0.3 + rpn_positive_overlap: 0.7 + rpn_straddle_thresh: 0.0 + train_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 2000 + post_nms_top_n: 2000 + test_proposal: + min_size: 0.0 + nms_thresh: 0.7 + pre_nms_top_n: 1000 + post_nms_top_n: 1000 + +FPNRoIAlign: + canconical_level: 4 + canonical_size: 224 + max_level: 5 + min_level: 2 + sampling_ratio: 2 + box_resolution: 7 + mask_resolution: 14 + +MaskHead: + dilation: 1 + conv_dim: 256 + num_convs: 4 + resolution: 28 + norm_type: gn + +CascadeBBoxAssigner: + batch_size_per_im: 512 + bbox_reg_weights: [10, 20, 30] + bg_thresh_hi: [0.5, 0.6, 0.7] + bg_thresh_lo: [0.0, 0.0, 0.0] + fg_fraction: 0.25 + fg_thresh: [0.5, 0.6, 0.7] + +MaskAssigner: + resolution: 28 + +CascadeBBoxHead: + head: CascadeXConvNormHead + nms: + keep_top_k: 100 + nms_threshold: 0.5 + score_threshold: 0.05 + +CascadeXConvNormHead: + norm_type: gn + +LearningRate: + base_lr: 0.02 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [120000, 160000] + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +MaskRCNNTrainFeed: + batch_size: 2 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_train2017.json + image_dir: train2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +MaskRCNNEvalFeed: + batch_size: 1 + dataset: + dataset_dir: dataset/coco + annotation: annotations/instances_val2017.json + image_dir: val2017 + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 + +MaskRCNNTestFeed: + batch_size: 1 + dataset: + annotation: dataset/coco/annotations/instances_val2017.json + batch_transforms: + - !PadBatch + pad_to_stride: 32 + num_workers: 2 diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index 677a7b56bcdcf6ccd4a21ed1d33d527f73a539cf..1312f86b49077536043ceb96cbca0f6a1c9b9ef3 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -35,6 +35,7 @@ def ConvNorm(input, norm_type='affine_channel', norm_groups=32, dilation=1, + lr_scale=1, freeze_norm=False, act=None, norm_name=None, @@ -51,18 +52,20 @@ def ConvNorm(input, groups=groups, act=None, param_attr=ParamAttr( - name=name + "_weights", initializer=initializer), + name=name + "_weights", + initializer=initializer, + learning_rate=lr_scale), bias_attr=False, name=name + '.conv2d.output.1') norm_lr = 0. if freeze_norm else 1. pattr = ParamAttr( name=norm_name + '_scale', - learning_rate=norm_lr, + learning_rate=norm_lr * lr_scale, regularizer=L2Decay(norm_decay)) battr = ParamAttr( name=norm_name + '_offset', - learning_rate=norm_lr, + learning_rate=norm_lr * lr_scale, regularizer=L2Decay(norm_decay)) if norm_type in ['bn', 'sync_bn']: diff --git a/ppdet/modeling/roi_heads/cascade_head.py b/ppdet/modeling/roi_heads/cascade_head.py index 7c4a3b8c7387c35c0605651eda4aa0e51c06dbf5..2068b1d8d73c2e4a98143ce38bfecfbee79a7c35 100644 --- a/ppdet/modeling/roi_heads/cascade_head.py +++ b/ppdet/modeling/roi_heads/cascade_head.py @@ -19,8 +19,10 @@ import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr from paddle.fluid.initializer import Normal, Xavier from paddle.fluid.regularizer import L2Decay +from paddle.fluid.initializer import MSRA from ppdet.modeling.ops import MultiClassNMS +from ppdet.modeling.ops import ConvNorm from ppdet.core.workspace import register __all__ = ['CascadeBBoxHead'] @@ -50,7 +52,7 @@ class CascadeBBoxHead(object): def get_output(self, roi_feat, cls_agnostic_bbox_reg=2, - wb_scalar=2.0, + wb_scalar=1.0, name=''): """ Get bbox head output. @@ -77,7 +79,7 @@ class CascadeBBoxHead(object): learning_rate=wb_scalar), bias_attr=ParamAttr( name='cls_score%s_b' % name, - learning_rate=wb_scalar, + learning_rate=wb_scalar * 2, regularizer=L2Decay(0.))) bbox_pred = fluid.layers.fc(input=head_feat, size=4 * cls_agnostic_bbox_reg, @@ -90,7 +92,7 @@ class CascadeBBoxHead(object): learning_rate=wb_scalar), bias_attr=ParamAttr( name='bbox_pred%s_b' % name, - learning_rate=wb_scalar, + learning_rate=wb_scalar * 2, regularizer=L2Decay(0.))) return cls_score, bbox_pred @@ -177,7 +179,7 @@ class CascadeBBoxHead(object): for i in range(repreat_num): # cls score if i < 2: - cls_score = self._head_share( + cls_score, _ = self.get_output( roi_feat_list[-1], # roi_feat_3 name='_' + str(i + 1) if i > 0 else '') else: @@ -216,66 +218,82 @@ class CascadeBBoxHead(object): pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean) return {"bbox": pred_result} - def _head_share(self, roi_feat, wb_scalar=2.0, name=''): - # FC6 FC7 - fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] - fc6 = fluid.layers.fc(input=roi_feat, - size=self.head.num_chan, - act='relu', - name='fc6' + name, - param_attr=ParamAttr( - name='fc6%s_w' % name, - initializer=Xavier(fan_out=fan), - learning_rate=wb_scalar, ), - bias_attr=ParamAttr( - name='fc6%s_b' % name, - learning_rate=2.0, - regularizer=L2Decay(0.))) - fc7 = fluid.layers.fc(input=fc6, - size=self.head.num_chan, - act='relu', - name='fc7' + name, - param_attr=ParamAttr( - name='fc7%s_w' % name, - initializer=Xavier(), - learning_rate=wb_scalar, ), - bias_attr=ParamAttr( - name='fc7%s_b' % name, - learning_rate=2.0, - regularizer=L2Decay(0.))) - cls_score = fluid.layers.fc(input=fc7, - size=self.num_classes, - act=None, - name='cls_score' + name, + +@register +class CascadeXConvNormHead(object): + """ + RCNN head with serveral convolution layers + + Args: + conv_num (int): num of convolution layers for the rcnn head + conv_dim (int): num of filters for the conv layers + mlp_dim (int): num of filters for the fc layers + """ + __shared__ = ['norm_type', 'freeze_norm'] + + def __init__(self, + num_conv=4, + conv_dim=256, + mlp_dim=1024, + norm_type=None, + freeze_norm=False): + super(CascadeXConvNormHead, self).__init__() + self.conv_dim = conv_dim + self.mlp_dim = mlp_dim + self.num_conv = num_conv + self.norm_type = norm_type + self.freeze_norm = freeze_norm + + def __call__(self, roi_feat, wb_scalar=1.0, name=''): + conv = roi_feat + fan = self.conv_dim * 3 * 3 + initializer = MSRA(uniform=False, fan_in=fan) + for i in range(self.num_conv): + name = 'bbox_head_conv' + str(i) + conv = ConvNorm( + conv, + self.conv_dim, + 3, + act='relu', + initializer=initializer, + norm_type=self.norm_type, + freeze_norm=self.freeze_norm, + lr_scale=wb_scalar, + name=name, + norm_name=name) + fan = conv.shape[1] * conv.shape[2] * conv.shape[3] + head_heat = fluid.layers.fc(input=conv, + size=self.mlp_dim, + act='relu', + name='fc6' + name, param_attr=ParamAttr( - name='cls_score%s_w' % name, - initializer=Normal( - loc=0.0, scale=0.01), - learning_rate=wb_scalar, ), + name='fc6%s_w' % name, + initializer=Xavier(fan_out=fan), + learning_rate=wb_scalar), bias_attr=ParamAttr( - name='cls_score%s_b' % name, - learning_rate=2.0, - regularizer=L2Decay(0.))) - return cls_score + name='fc6%s_b' % name, + regularizer=L2Decay(0.), + learning_rate=wb_scalar * 2)) + return head_heat @register -class FC6FC7Head(object): +class CascadeTwoFCHead(object): """ - Cascade RCNN head with two Fully Connected layers + RCNN head with serveral convolution layers Args: - num_chan (int): num of filters for the fc layers + mlp_dim (int): num of filters for the fc layers """ - def __init__(self, num_chan): - super(FC6FC7Head, self).__init__() - self.num_chan = num_chan + def __init__(self, mlp_dim): + super(CascadeTwoFCHead, self).__init__() + self.mlp_dim = mlp_dim def __call__(self, roi_feat, wb_scalar=1.0, name=''): fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3] fc6 = fluid.layers.fc(input=roi_feat, - size=self.num_chan, + size=self.mlp_dim, act='relu', name='fc6' + name, param_attr=ParamAttr( @@ -284,10 +302,10 @@ class FC6FC7Head(object): learning_rate=wb_scalar), bias_attr=ParamAttr( name='fc6%s_b' % name, - learning_rate=wb_scalar, + learning_rate=wb_scalar * 2, regularizer=L2Decay(0.))) head_feat = fluid.layers.fc(input=fc6, - size=self.num_chan, + size=self.mlp_dim, act='relu', name='fc7' + name, param_attr=ParamAttr( @@ -296,6 +314,6 @@ class FC6FC7Head(object): learning_rate=wb_scalar), bias_attr=ParamAttr( name='fc7%s_b' % name, - learning_rate=wb_scalar, + learning_rate=wb_scalar * 2, regularizer=L2Decay(0.))) return head_feat