From fb82692ab6bc49d07cd567a547250a44976c2c1d Mon Sep 17 00:00:00 2001
From: xinyingxinying <63766413+xinyingxinying@users.noreply.github.com>
Date: Sun, 26 Apr 2020 21:02:48 +0800
Subject: [PATCH] Add dcn on fcos head and backbone (#562)

* #add dcn on  FCOS_head and backbone
---
 configs/anchor_free/README.md               |   1 +
 configs/anchor_free/fcos_dcn_r50_fpn_1x.yml | 183 ++++++++++++++++++++
 ppdet/modeling/anchor_heads/fcos_head.py    |  10 +-
 ppdet/modeling/ops.py                       | 130 +++++++++++++-
 4 files changed, 320 insertions(+), 4 deletions(-)
 create mode 100644 configs/anchor_free/fcos_dcn_r50_fpn_1x.yml

diff --git a/configs/anchor_free/README.md b/configs/anchor_free/README.md
index dcc6de223..0c27913dd 100644
--- a/configs/anchor_free/README.md
+++ b/configs/anchor_free/README.md
@@ -30,6 +30,7 @@
 | CornerNet-Squeeze-dcn-mixup-cosine*    | ResNet50-vd    | 14  |    [faster\_rcnn\_dcn\_r50\_vd\_fpn\_2x](https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_dcn_r50_vd_fpn_2x.tar)    | 38.2    | 40.05      | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/cornernet_squeeze_dcn_r50_vd_fpn_mixup_cosine.pdparams) |
 | FCOS    | ResNet50    | 2  |    [ResNet50\_cos\_pretrained](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar)    | 39.8 | -      | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/fcos_r50_fpn_1x.pdparams) |
 | FCOS+multiscale_train    | ResNet50    | 2  |    [ResNet50\_cos\_pretrained](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar)    | 42.0 | -      | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/fcos_r50_fpn_multiscale_2x.pdparams) |
+| FCOS+DCN    | ResNet50    | 2  |    [ResNet50\_cos\_pretrained](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar)    | 44.4 | -      | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/fcos_dcn_r50_fpn_1x.pdparams) |
 
 **注意:**
 
diff --git a/configs/anchor_free/fcos_dcn_r50_fpn_1x.yml b/configs/anchor_free/fcos_dcn_r50_fpn_1x.yml
new file mode 100644
index 000000000..ff46e744e
--- /dev/null
+++ b/configs/anchor_free/fcos_dcn_r50_fpn_1x.yml
@@ -0,0 +1,183 @@
+architecture: FCOS
+max_iters: 90000
+use_gpu: true
+snapshot_iter: 5000
+log_smooth_window: 20
+log_iter: 20
+save_dir: output
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
+metric: COCO
+weights: output/fcos_dcn_r50_fpn_1x/model_final
+num_classes: 81
+
+FCOS:
+  backbone: ResNet
+  fpn: FPN
+  fcos_head: FCOSHead
+
+ResNet:
+  norm_type: affine_channel
+  norm_decay: 0.
+  depth: 50
+  feature_maps: [3, 4, 5]
+  freeze_at: 2
+  dcn_v2_stages: [3, 4, 5]
+
+FPN:
+  min_level: 3
+  max_level: 7
+  num_chan: 256
+  use_c5: false
+  spatial_scale: [0.03125, 0.0625, 0.125]
+  has_extra_convs: true
+
+FCOSHead:
+  num_classes: 81
+  fpn_stride: [8, 16, 32, 64, 128]
+  num_convs: 4
+  norm_type: "gn"
+  fcos_loss: FCOSLoss
+  norm_reg_targets: True
+  centerness_on_reg: True
+  use_dcn_in_tower: True
+  nms: MultiClassNMS
+
+MultiClassNMS:
+  score_threshold: 0.025
+  nms_top_k: 1000
+  keep_top_k: 100
+  nms_threshold: 0.6
+  background_label: -1
+
+FCOSLoss:
+  loss_alpha: 0.25
+  loss_gamma: 2.0
+  iou_loss_type: "giou"
+  reg_weights: 1.0
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [60000, 80000]
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+TrainReader:
+  inputs_def:
+    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score', 'im_info']
+  dataset:
+    !COCODataSet
+    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+    with_background: true
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !RandomFlipImage
+    prob: 0.5
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+    use_cv2: true
+  - !Permute
+    to_bgr: false
+    channel_first: true
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 128
+    use_padded_im_info: false
+  - !Gt2FCOSTarget
+    object_sizes_boundary: [64, 128, 256, 512]
+    center_sampling_radius: 1.5
+    downsample_ratios: [8, 16, 32, 64, 128]
+    norm_reg_targets: True
+  batch_size: 2
+  shuffle: true
+  worker_num: 16
+  use_process: false
+
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_id', 'im_shape', 'im_info']
+  dataset:
+    !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+    with_background: false
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 128
+    use_padded_im_info: true
+  batch_size: 8
+  shuffle: false
+  worker_num: 2
+  use_process: false
+
+TestReader:
+  inputs_def:
+    # set image_shape if needed
+    fields: ['image', 'im_id', 'im_shape', 'im_info']
+  dataset:
+    !ImageFolder
+    anno_path: annotations/instances_val2017.json
+    with_background: false
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: 128
+    use_padded_im_info: true
+  batch_size: 1
+  shuffle: false
+  
diff --git a/ppdet/modeling/anchor_heads/fcos_head.py b/ppdet/modeling/anchor_heads/fcos_head.py
index de0b76410..9b5c4b3c0 100644
--- a/ppdet/modeling/anchor_heads/fcos_head.py
+++ b/ppdet/modeling/anchor_heads/fcos_head.py
@@ -22,7 +22,7 @@ import paddle.fluid as fluid
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer
 from paddle.fluid.regularizer import L2Decay
-from ppdet.modeling.ops import ConvNorm
+from ppdet.modeling.ops import ConvNorm, DeformConvNorm
 from ppdet.modeling.ops import MultiClassNMS
 
 from ppdet.core.workspace import register
@@ -89,9 +89,13 @@ class FCOSHead(object):
         subnet_blob_cls = features
         subnet_blob_reg = features
         in_channles = features.shape[1]
+        if self.use_dcn_in_tower:
+            conv_norm = DeformConvNorm
+        else:
+            conv_norm = ConvNorm
         for lvl in range(0, self.num_convs):
             conv_cls_name = 'fcos_head_cls_tower_conv_{}'.format(lvl)
-            subnet_blob_cls = ConvNorm(
+            subnet_blob_cls = conv_norm(
                 input=subnet_blob_cls,
                 num_filters=in_channles,
                 filter_size=3,
@@ -104,7 +108,7 @@ class FCOSHead(object):
                 norm_name=conv_cls_name + "_norm",
                 name=conv_cls_name)
             conv_reg_name = 'fcos_head_reg_tower_conv_{}'.format(lvl)
-            subnet_blob_reg = ConvNorm(
+            subnet_blob_reg = conv_norm(
                 input=subnet_blob_reg,
                 num_filters=in_channles,
                 filter_size=3,
diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py
index ca861cf18..55e815161 100644
--- a/ppdet/modeling/ops.py
+++ b/ppdet/modeling/ops.py
@@ -27,11 +27,139 @@ __all__ = [
     'AnchorGenerator', 'DropBlock', 'RPNTargetAssign', 'GenerateProposals',
     'MultiClassNMS', 'BBoxAssigner', 'MaskAssigner', 'RoIAlign', 'RoIPool',
     'MultiBoxHead', 'SSDLiteMultiBoxHead', 'SSDOutputDecoder',
-    'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm',
+    'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm', 'DeformConvNorm',
     'MultiClassSoftNMS', 'LibraBBoxAssigner'
 ]
 
 
+def _conv_offset(input, filter_size, stride, padding, act=None, name=None):
+    out_channel = filter_size * filter_size * 3
+    out = fluid.layers.conv2d(
+        input,
+        num_filters=out_channel,
+        filter_size=filter_size,
+        stride=stride,
+        padding=padding,
+        param_attr=ParamAttr(
+            initializer=fluid.initializer.Constant(value=0),
+            name=name + ".w_0"),
+        bias_attr=ParamAttr(
+            initializer=fluid.initializer.Constant(value=0),
+            name=name + ".b_0"),
+        act=act,
+        name=name)
+    return out
+
+
+def DeformConvNorm(input,
+                   num_filters,
+                   filter_size,
+                   stride=1,
+                   groups=1,
+                   norm_decay=0.,
+                   norm_type='affine_channel',
+                   norm_groups=32,
+                   dilation=1,
+                   lr_scale=1,
+                   freeze_norm=False,
+                   act=None,
+                   norm_name=None,
+                   initializer=None,
+                   bias_attr=False,
+                   name=None):
+    if bias_attr:
+        bias_para = ParamAttr(
+            name=name + "_bias",
+            initializer=fluid.initializer.Constant(value=0),
+            learning_rate=lr_scale * 2)
+    else:
+        bias_para = False
+    offset_mask = _conv_offset(
+        input=input,
+        filter_size=filter_size,
+        stride=stride,
+        padding=(filter_size - 1) // 2,
+        act=None,
+        name=name + "_conv_offset")
+    offset_channel = filter_size**2 * 2
+    mask_channel = filter_size**2
+    offset, mask = fluid.layers.split(
+        input=offset_mask,
+        num_or_sections=[offset_channel, mask_channel],
+        dim=1)
+    mask = fluid.layers.sigmoid(mask)
+    conv = fluid.layers.deformable_conv(
+        input=input,
+        offset=offset,
+        mask=mask,
+        num_filters=num_filters,
+        filter_size=filter_size,
+        stride=stride,
+        padding=(filter_size - 1) // 2 * dilation,
+        dilation=dilation,
+        groups=groups,
+        deformable_groups=1,
+        im2col_step=1,
+        param_attr=ParamAttr(
+            name=name + "_weights",
+            initializer=initializer,
+            learning_rate=lr_scale),
+        bias_attr=bias_para,
+        name=name + ".conv2d.output.1")
+
+    norm_lr = 0. if freeze_norm else 1.
+    pattr = ParamAttr(
+        name=norm_name + '_scale',
+        learning_rate=norm_lr * lr_scale,
+        regularizer=L2Decay(norm_decay))
+    battr = ParamAttr(
+        name=norm_name + '_offset',
+        learning_rate=norm_lr * lr_scale,
+        regularizer=L2Decay(norm_decay))
+
+    if norm_type in ['bn', 'sync_bn']:
+        global_stats = True if freeze_norm else False
+        out = fluid.layers.batch_norm(
+            input=conv,
+            act=act,
+            name=norm_name + '.output.1',
+            param_attr=pattr,
+            bias_attr=battr,
+            moving_mean_name=norm_name + '_mean',
+            moving_variance_name=norm_name + '_variance',
+            use_global_stats=global_stats)
+        scale = fluid.framework._get_var(pattr.name)
+        bias = fluid.framework._get_var(battr.name)
+    elif norm_type == 'gn':
+        out = fluid.layers.group_norm(
+            input=conv,
+            act=act,
+            name=norm_name + '.output.1',
+            groups=norm_groups,
+            param_attr=pattr,
+            bias_attr=battr)
+        scale = fluid.framework._get_var(pattr.name)
+        bias = fluid.framework._get_var(battr.name)
+    elif norm_type == 'affine_channel':
+        scale = fluid.layers.create_parameter(
+            shape=[conv.shape[1]],
+            dtype=conv.dtype,
+            attr=pattr,
+            default_initializer=fluid.initializer.Constant(1.))
+        bias = fluid.layers.create_parameter(
+            shape=[conv.shape[1]],
+            dtype=conv.dtype,
+            attr=battr,
+            default_initializer=fluid.initializer.Constant(0.))
+        out = fluid.layers.affine_channel(
+            x=conv, scale=scale, bias=bias, act=act)
+
+    if freeze_norm:
+        scale.stop_gradient = True
+        bias.stop_gradient = True
+    return out
+
+
 def ConvNorm(input,
              num_filters,
              filter_size,
-- 
GitLab