From fb82692ab6bc49d07cd567a547250a44976c2c1d Mon Sep 17 00:00:00 2001 From: xinyingxinying <63766413+xinyingxinying@users.noreply.github.com> Date: Sun, 26 Apr 2020 21:02:48 +0800 Subject: [PATCH] Add dcn on fcos head and backbone (#562) * #add dcn on FCOS_head and backbone --- configs/anchor_free/README.md | 1 + configs/anchor_free/fcos_dcn_r50_fpn_1x.yml | 183 ++++++++++++++++++++ ppdet/modeling/anchor_heads/fcos_head.py | 10 +- ppdet/modeling/ops.py | 130 +++++++++++++- 4 files changed, 320 insertions(+), 4 deletions(-) create mode 100644 configs/anchor_free/fcos_dcn_r50_fpn_1x.yml diff --git a/configs/anchor_free/README.md b/configs/anchor_free/README.md index dcc6de223..0c27913dd 100644 --- a/configs/anchor_free/README.md +++ b/configs/anchor_free/README.md @@ -30,6 +30,7 @@ | CornerNet-Squeeze-dcn-mixup-cosine* | ResNet50-vd | 14 | [faster\_rcnn\_dcn\_r50\_vd\_fpn\_2x](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r50_vd_fpn_2x.tar) | 38.2 | 40.05 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cornernet_squeeze_dcn_r50_vd_fpn_mixup_cosine.pdparams) | | FCOS | ResNet50 | 2 | [ResNet50\_cos\_pretrained](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar) | 39.8 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/fcos_r50_fpn_1x.pdparams) | | FCOS+multiscale_train | ResNet50 | 2 | [ResNet50\_cos\_pretrained](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar) | 42.0 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/fcos_r50_fpn_multiscale_2x.pdparams) | +| FCOS+DCN | ResNet50 | 2 | [ResNet50\_cos\_pretrained](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar) | 44.4 | - | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/fcos_dcn_r50_fpn_1x.pdparams) | **注意:** diff --git a/configs/anchor_free/fcos_dcn_r50_fpn_1x.yml b/configs/anchor_free/fcos_dcn_r50_fpn_1x.yml new file mode 100644 index 000000000..ff46e744e --- /dev/null +++ b/configs/anchor_free/fcos_dcn_r50_fpn_1x.yml @@ -0,0 +1,183 @@ +architecture: FCOS +max_iters: 90000 +use_gpu: true +snapshot_iter: 5000 +log_smooth_window: 20 +log_iter: 20 +save_dir: output +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar +metric: COCO +weights: output/fcos_dcn_r50_fpn_1x/model_final +num_classes: 81 + +FCOS: + backbone: ResNet + fpn: FPN + fcos_head: FCOSHead + +ResNet: + norm_type: affine_channel + norm_decay: 0. + depth: 50 + feature_maps: [3, 4, 5] + freeze_at: 2 + dcn_v2_stages: [3, 4, 5] + +FPN: + min_level: 3 + max_level: 7 + num_chan: 256 + use_c5: false + spatial_scale: [0.03125, 0.0625, 0.125] + has_extra_convs: true + +FCOSHead: + num_classes: 81 + fpn_stride: [8, 16, 32, 64, 128] + num_convs: 4 + norm_type: "gn" + fcos_loss: FCOSLoss + norm_reg_targets: True + centerness_on_reg: True + use_dcn_in_tower: True + nms: MultiClassNMS + +MultiClassNMS: + score_threshold: 0.025 + nms_top_k: 1000 + keep_top_k: 100 + nms_threshold: 0.6 + background_label: -1 + +FCOSLoss: + loss_alpha: 0.25 + loss_gamma: 2.0 + iou_loss_type: "giou" + reg_weights: 1.0 + +LearningRate: + base_lr: 0.01 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: [60000, 80000] + - !LinearWarmup + start_factor: 0.3333333333333333 + steps: 500 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0001 + type: L2 + +TrainReader: + inputs_def: + fields: ['image', 'gt_bbox', 'gt_class', 'gt_score', 'im_info'] + dataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + with_background: true + sample_transforms: + - !DecodeImage + to_rgb: true + - !RandomFlipImage + prob: 0.5 + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !ResizeImage + target_size: 800 + max_size: 1333 + interp: 1 + use_cv2: true + - !Permute + to_bgr: false + channel_first: true + batch_transforms: + - !PadBatch + pad_to_stride: 128 + use_padded_im_info: false + - !Gt2FCOSTarget + object_sizes_boundary: [64, 128, 256, 512] + center_sampling_radius: 1.5 + downsample_ratios: [8, 16, 32, 64, 128] + norm_reg_targets: True + batch_size: 2 + shuffle: true + worker_num: 16 + use_process: false + +EvalReader: + inputs_def: + fields: ['image', 'im_id', 'im_shape', 'im_info'] + dataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: true + with_mixup: false + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !ResizeImage + target_size: 800 + max_size: 1333 + interp: 1 + use_cv2: true + - !Permute + channel_first: true + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 128 + use_padded_im_info: true + batch_size: 8 + shuffle: false + worker_num: 2 + use_process: false + +TestReader: + inputs_def: + # set image_shape if needed + fields: ['image', 'im_id', 'im_shape', 'im_info'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: true + with_mixup: false + - !NormalizeImage + is_channel_first: false + is_scale: true + mean: [0.485,0.456,0.406] + std: [0.229, 0.224,0.225] + - !ResizeImage + interp: 1 + max_size: 1333 + target_size: 800 + use_cv2: true + - !Permute + channel_first: true + to_bgr: false + batch_transforms: + - !PadBatch + pad_to_stride: 128 + use_padded_im_info: true + batch_size: 1 + shuffle: false + diff --git a/ppdet/modeling/anchor_heads/fcos_head.py b/ppdet/modeling/anchor_heads/fcos_head.py index de0b76410..9b5c4b3c0 100644 --- a/ppdet/modeling/anchor_heads/fcos_head.py +++ b/ppdet/modeling/anchor_heads/fcos_head.py @@ -22,7 +22,7 @@ import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.fluid.regularizer import L2Decay -from ppdet.modeling.ops import ConvNorm +from ppdet.modeling.ops import ConvNorm, DeformConvNorm from ppdet.modeling.ops import MultiClassNMS from ppdet.core.workspace import register @@ -89,9 +89,13 @@ class FCOSHead(object): subnet_blob_cls = features subnet_blob_reg = features in_channles = features.shape[1] + if self.use_dcn_in_tower: + conv_norm = DeformConvNorm + else: + conv_norm = ConvNorm for lvl in range(0, self.num_convs): conv_cls_name = 'fcos_head_cls_tower_conv_{}'.format(lvl) - subnet_blob_cls = ConvNorm( + subnet_blob_cls = conv_norm( input=subnet_blob_cls, num_filters=in_channles, filter_size=3, @@ -104,7 +108,7 @@ class FCOSHead(object): norm_name=conv_cls_name + "_norm", name=conv_cls_name) conv_reg_name = 'fcos_head_reg_tower_conv_{}'.format(lvl) - subnet_blob_reg = ConvNorm( + subnet_blob_reg = conv_norm( input=subnet_blob_reg, num_filters=in_channles, filter_size=3, diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index ca861cf18..55e815161 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -27,11 +27,139 @@ __all__ = [ 'AnchorGenerator', 'DropBlock', 'RPNTargetAssign', 'GenerateProposals', 'MultiClassNMS', 'BBoxAssigner', 'MaskAssigner', 'RoIAlign', 'RoIPool', 'MultiBoxHead', 'SSDLiteMultiBoxHead', 'SSDOutputDecoder', - 'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm', + 'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm', 'DeformConvNorm', 'MultiClassSoftNMS', 'LibraBBoxAssigner' ] +def _conv_offset(input, filter_size, stride, padding, act=None, name=None): + out_channel = filter_size * filter_size * 3 + out = fluid.layers.conv2d( + input, + num_filters=out_channel, + filter_size=filter_size, + stride=stride, + padding=padding, + param_attr=ParamAttr( + initializer=fluid.initializer.Constant(value=0), + name=name + ".w_0"), + bias_attr=ParamAttr( + initializer=fluid.initializer.Constant(value=0), + name=name + ".b_0"), + act=act, + name=name) + return out + + +def DeformConvNorm(input, + num_filters, + filter_size, + stride=1, + groups=1, + norm_decay=0., + norm_type='affine_channel', + norm_groups=32, + dilation=1, + lr_scale=1, + freeze_norm=False, + act=None, + norm_name=None, + initializer=None, + bias_attr=False, + name=None): + if bias_attr: + bias_para = ParamAttr( + name=name + "_bias", + initializer=fluid.initializer.Constant(value=0), + learning_rate=lr_scale * 2) + else: + bias_para = False + offset_mask = _conv_offset( + input=input, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + act=None, + name=name + "_conv_offset") + offset_channel = filter_size**2 * 2 + mask_channel = filter_size**2 + offset, mask = fluid.layers.split( + input=offset_mask, + num_or_sections=[offset_channel, mask_channel], + dim=1) + mask = fluid.layers.sigmoid(mask) + conv = fluid.layers.deformable_conv( + input=input, + offset=offset, + mask=mask, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2 * dilation, + dilation=dilation, + groups=groups, + deformable_groups=1, + im2col_step=1, + param_attr=ParamAttr( + name=name + "_weights", + initializer=initializer, + learning_rate=lr_scale), + bias_attr=bias_para, + name=name + ".conv2d.output.1") + + norm_lr = 0. if freeze_norm else 1. + pattr = ParamAttr( + name=norm_name + '_scale', + learning_rate=norm_lr * lr_scale, + regularizer=L2Decay(norm_decay)) + battr = ParamAttr( + name=norm_name + '_offset', + learning_rate=norm_lr * lr_scale, + regularizer=L2Decay(norm_decay)) + + if norm_type in ['bn', 'sync_bn']: + global_stats = True if freeze_norm else False + out = fluid.layers.batch_norm( + input=conv, + act=act, + name=norm_name + '.output.1', + param_attr=pattr, + bias_attr=battr, + moving_mean_name=norm_name + '_mean', + moving_variance_name=norm_name + '_variance', + use_global_stats=global_stats) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif norm_type == 'gn': + out = fluid.layers.group_norm( + input=conv, + act=act, + name=norm_name + '.output.1', + groups=norm_groups, + param_attr=pattr, + bias_attr=battr) + scale = fluid.framework._get_var(pattr.name) + bias = fluid.framework._get_var(battr.name) + elif norm_type == 'affine_channel': + scale = fluid.layers.create_parameter( + shape=[conv.shape[1]], + dtype=conv.dtype, + attr=pattr, + default_initializer=fluid.initializer.Constant(1.)) + bias = fluid.layers.create_parameter( + shape=[conv.shape[1]], + dtype=conv.dtype, + attr=battr, + default_initializer=fluid.initializer.Constant(0.)) + out = fluid.layers.affine_channel( + x=conv, scale=scale, bias=bias, act=act) + + if freeze_norm: + scale.stop_gradient = True + bias.stop_gradient = True + return out + + def ConvNorm(input, num_filters, filter_size, -- GitLab