remove name and norm_name in head and fpn (#2660)

9fdcfbb1 · Feng Ni · GitHub · 6ab3856f · 9fdcfbb1 · 9fdcfbb1
14 changed file
--- a/configs/fcos/_base_/fcos_r50_fpn.yml
+++ b/configs/fcos/_base_/fcos_r50_fpn.yml
@@ -47,7 +47,6 @@ FCOSPostProcess:
  decode:
    name: FCOSBox
    num_classes: 80
-    batch_size: 1
  nms:
    name: MultiClassNMS
    nms_top_k: 1000

--- a/ppdet/modeling/backbones/darknet.py
+++ b/ppdet/modeling/backbones/darknet.py
@@ -35,8 +35,8 @@ class ConvBNLayer(nn.Layer):
                 norm_type='bn',
                 norm_decay=0.,
                 act="leaky",
-                 name=None,
-                 data_format='NCHW'):
+                 data_format='NCHW',
+                 name=''):
        """
        conv + bn + activation layer

@@ -50,7 +50,6 @@ class ConvBNLayer(nn.Layer):
            norm_type (str): batch norm type, default bn
            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
            act (str): activation function type, default 'leaky', which means leaky_relu
-            name (str): layer name
            data_format (str): data format, NCHW or NHWC
        """
        super(ConvBNLayer, self).__init__()
@@ -68,7 +67,6 @@ class ConvBNLayer(nn.Layer):
            ch_out,
            norm_type=norm_type,
            norm_decay=norm_decay,
-            name=name,
            data_format=data_format)
        self.act = act

@@ -91,7 +89,6 @@ class DownSample(nn.Layer):
                 padding=1,
                 norm_type='bn',
                 norm_decay=0.,
-                 name=None,
                 data_format='NCHW'):
        """
        downsample layer
@@ -104,7 +101,6 @@ class DownSample(nn.Layer):
            padding (int): padding size, default 1
            norm_type (str): batch norm type, default bn
            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
-            name (str): layer name
            data_format (str): data format, NCHW or NHWC
        """

@@ -118,8 +114,7 @@ class DownSample(nn.Layer):
            padding=padding,
            norm_type=norm_type,
            norm_decay=norm_decay,
-            data_format=data_format,
-            name=name)
+            data_format=data_format)
        self.ch_out = ch_out

    def forward(self, inputs):
@@ -133,7 +128,6 @@ class BasicBlock(nn.Layer):
                 ch_out,
                 norm_type='bn',
                 norm_decay=0.,
-                 name=None,
                 data_format='NCHW'):
        """
        BasicBlock layer of DarkNet
@@ -143,7 +137,6 @@ class BasicBlock(nn.Layer):
            ch_out (int): output channel
            norm_type (str): batch norm type, default bn
            norm_decay (str): decay for weight and bias of batch norm layer, default 0.
-            name (str): layer name
            data_format (str): data format, NCHW or NHWC
        """

@@ -157,8 +150,7 @@ class BasicBlock(nn.Layer):
            padding=0,
            norm_type=norm_type,
            norm_decay=norm_decay,
-            data_format=data_format,
-            name=name + '.0')
+            data_format=data_format)
        self.conv2 = ConvBNLayer(
            ch_in=ch_out,
            ch_out=ch_out * 2,
@@ -167,8 +159,7 @@ class BasicBlock(nn.Layer):
            padding=1,
            norm_type=norm_type,
            norm_decay=norm_decay,
-            data_format=data_format,
-            name=name + '.1')
+            data_format=data_format)

    def forward(self, inputs):
        conv1 = self.conv1(inputs)
@@ -205,8 +196,7 @@ class Blocks(nn.Layer):
            ch_out,
            norm_type=norm_type,
            norm_decay=norm_decay,
-            data_format=data_format,
-            name=name + '.0')
+            data_format=data_format)
        self.res_out_list = []
        for i in range(1, count):
            block_name = '{}.{}'.format(name, i)
@@ -217,8 +207,7 @@ class Blocks(nn.Layer):
                    ch_out,
                    norm_type=norm_type,
                    norm_decay=norm_decay,
-                    data_format=data_format,
-                    name=block_name))
+                    data_format=data_format))
            self.res_out_list.append(res_out)
        self.ch_out = ch_out

@@ -272,16 +261,14 @@ class DarkNet(nn.Layer):
            padding=1,
            norm_type=norm_type,
            norm_decay=norm_decay,
-            data_format=data_format,
-            name='yolo_input')
+            data_format=data_format)

        self.downsample0 = DownSample(
            ch_in=32,
            ch_out=32 * 2,
            norm_type=norm_type,
            norm_decay=norm_decay,
-            data_format=data_format,
-            name='yolo_input.downsample')
+            data_format=data_format)

        self._out_channels = []
        self.darknet_conv_block_list = []
@@ -311,8 +298,7 @@ class DarkNet(nn.Layer):
                    ch_out=32 * (2**(i + 2)),
                    norm_type=norm_type,
                    norm_decay=norm_decay,
-                    data_format=data_format,
-                    name=down_name))
+                    data_format=data_format))
            self.downsample_list.append(downsample)

    def forward(self, inputs):

--- a/ppdet/modeling/heads/bbox_head.py
+++ b/ppdet/modeling/heads/bbox_head.py
@@ -126,10 +126,8 @@ class XConvNormHead(nn.Layer):
                    filter_size=3,
                    stride=1,
                    norm_type=self.norm_type,
-                    norm_name=head_conv_name + '_norm',
                    freeze_norm=self.freeze_norm,
-                    initializer=initializer,
-                    name=head_conv_name))
+                    initializer=initializer))
            self.bbox_head_convs.append(head_conv)

        fan = conv_dim * resolution * resolution

--- a/ppdet/modeling/heads/fcos_head.py
+++ b/ppdet/modeling/heads/fcos_head.py
@@ -28,6 +28,10 @@ from ppdet.modeling.layers import ConvNormLayer


 class ScaleReg(nn.Layer):
+    """
+    Parameter for scaling the regression outputs.
+    """
+
    def __init__(self):
        super(ScaleReg, self).__init__()
        self.scale_reg = self.create_parameter(
@@ -77,10 +81,8 @@ class FCOSFeat(nn.Layer):
                    stride=1,
                    norm_type=norm_type,
                    use_dcn=use_dcn,
-                    norm_name=cls_conv_name + '_norm',
                    bias_on=True,
-                    lr_scale=2.,
-                    name=cls_conv_name))
+                    lr_scale=2.))
            self.cls_subnet_convs.append(cls_conv)

            reg_conv_name = 'fcos_head_reg_tower_conv_{}'.format(i)
@@ -93,10 +95,8 @@ class FCOSFeat(nn.Layer):
                    stride=1,
                    norm_type=norm_type,
                    use_dcn=use_dcn,
-                    norm_name=reg_conv_name + '_norm',
                    bias_on=True,
-                    lr_scale=2.,
-                    name=reg_conv_name))
+                    lr_scale=2.))
            self.reg_subnet_convs.append(reg_conv)

    def forward(self, fpn_feat):
@@ -113,12 +113,13 @@ class FCOSHead(nn.Layer):
    """
    FCOSHead
    Args:
-        num_classes(int): Number of classes
-        fpn_stride(list): The stride of each FPN Layer
-        prior_prob(float): Used to set the bias init for the class prediction layer
-        fcos_loss(object): Instance of 'FCOSLoss'
-        norm_reg_targets(bool): Normalization the regression target if true
-        centerness_on_reg(bool): The prediction of centerness on regression or clssification branch
+        fcos_feat (object): Instance of 'FCOSFeat'
+        num_classes (int): Number of classes
+        fpn_stride (list): The stride of each FPN Layer
+        prior_prob (float): Used to set the bias init for the class prediction layer
+        fcos_loss (object): Instance of 'FCOSLoss'
+        norm_reg_targets (bool): Normalization the regression target if true
+        centerness_on_reg (bool): The prediction of centerness on regression or clssification branch
    """
    __inject__ = ['fcos_feat', 'fcos_loss']
    __shared__ = ['num_classes']
@@ -199,7 +200,15 @@ class FCOSHead(nn.Layer):
            scale_reg = self.add_sublayer(feat_name, ScaleReg())
            self.scales_regs.append(scale_reg)

-    def _compute_locatioins_by_level(self, fpn_stride, feature):
+    def _compute_locations_by_level(self, fpn_stride, feature):
+        """
+        Compute locations of anchor points of each FPN layer
+        Args:
+            fpn_stride (int): The stride of current FPN feature map
+            feature (Tensor): Tensor of current FPN feature map
+        Return:
+            Anchor points locations of current FPN feature map
+        """
        shape_fm = paddle.shape(feature)
        shape_fm.stop_gradient = True
        h, w = shape_fm[2], shape_fm[3]
@@ -247,8 +256,7 @@ class FCOSHead(nn.Layer):
        if not is_training:
            locations_list = []
            for fpn_stride, feature in zip(self.fpn_stride, fpn_feats):
-                location = self._compute_locatioins_by_level(fpn_stride,
-                                                             feature)
+                location = self._compute_locations_by_level(fpn_stride, feature)
                locations_list.append(location)

            return locations_list, cls_logits_list, bboxes_reg_list, centerness_list

--- a/ppdet/modeling/heads/mask_head.py
+++ b/ppdet/modeling/heads/mask_head.py
@@ -63,22 +63,19 @@ class MaskFeat(nn.Layer):
                        filter_size=3,
                        stride=1,
                        norm_type=self.norm_type,
-                        norm_name=conv_name + '_norm',
-                        initializer=KaimingNormal(fan_in=fan_conv),
-                        name=conv_name))
+                        initializer=KaimingNormal(fan_in=fan_conv)))
                mask_conv.add_sublayer(conv_name + 'act', nn.ReLU())
        else:
            for i in range(self.num_convs):
                conv_name = 'mask_inter_feat_{}'.format(i + 1)
-                mask_conv.add_sublayer(
-                    conv_name,
-                    nn.Conv2D(
-                        in_channels=in_channel if i == 0 else out_channel,
-                        out_channels=out_channel,
-                        kernel_size=3,
-                        padding=1,
-                        weight_attr=paddle.ParamAttr(
-                            initializer=KaimingNormal(fan_in=fan_conv))))
+                conv = nn.Conv2D(
+                    in_channels=in_channel if i == 0 else out_channel,
+                    out_channels=out_channel,
+                    kernel_size=3,
+                    padding=1,
+                    weight_attr=paddle.ParamAttr(
+                        initializer=KaimingNormal(fan_in=fan_conv)))
+                mask_conv.add_sublayer(conv_name, conv)
                mask_conv.add_sublayer(conv_name + 'act', nn.ReLU())
        mask_conv.add_sublayer(
            'conv5_mask',

--- a/ppdet/modeling/heads/solov2_head.py
+++ b/ppdet/modeling/heads/solov2_head.py
@@ -75,9 +75,7 @@ class SOLOv2MaskHead(nn.Layer):
                        ch_out=self.mid_channels,
                        filter_size=3,
                        stride=1,
-                        norm_type='gn',
-                        norm_name=conv_feat_name + '.conv' + str(i) + '.gn',
-                        name=conv_feat_name + '.conv' + str(i)))
+                        norm_type='gn'))
                self.add_sublayer('conv_pre_feat' + str(i), conv_pre_feat)
                self.convs_all_levels.append(conv_pre_feat)
            else:
@@ -94,9 +92,7 @@ class SOLOv2MaskHead(nn.Layer):
                            ch_out=self.mid_channels,
                            filter_size=3,
                            stride=1,
-                            norm_type='gn',
-                            norm_name=conv_feat_name + '.conv' + str(j) + '.gn',
-                            name=conv_feat_name + '.conv' + str(j)))
+                            norm_type='gn'))
                    conv_pre_feat.add_sublayer(
                        conv_feat_name + '.conv' + str(j) + 'act', nn.ReLU())
                    conv_pre_feat.add_sublayer(
@@ -114,9 +110,7 @@ class SOLOv2MaskHead(nn.Layer):
                ch_out=self.out_channels,
                filter_size=1,
                stride=1,
-                norm_type='gn',
-                norm_name=conv_pred_name + '.gn',
-                name=conv_pred_name))
+                norm_type='gn'))

    def forward(self, inputs):
        """
@@ -216,9 +210,7 @@ class SOLOv2Head(nn.Layer):
                    ch_out=self.seg_feat_channels,
                    filter_size=3,
                    stride=1,
-                    norm_type='gn',
-                    norm_name='bbox_head.kernel_convs.{}.gn'.format(i),
-                    name='bbox_head.kernel_convs.{}'.format(i)))
+                    norm_type='gn'))
            self.kernel_pred_convs.append(kernel_conv)
            ch_in = self.in_channels if i == 0 else self.seg_feat_channels
            cate_conv = self.add_sublayer(
@@ -228,9 +220,7 @@ class SOLOv2Head(nn.Layer):
                    ch_out=self.seg_feat_channels,
                    filter_size=3,
                    stride=1,
-                    norm_type='gn',
-                    norm_name='bbox_head.cate_convs.{}.gn'.format(i),
-                    name='bbox_head.cate_convs.{}'.format(i)))
+                    norm_type='gn'))
            self.cate_pred_convs.append(cate_conv)

        self.solo_kernel = self.add_sublayer(
@@ -241,11 +231,9 @@ class SOLOv2Head(nn.Layer):
                kernel_size=3,
                stride=1,
                padding=1,
-                weight_attr=ParamAttr(
-                    name="bbox_head.solo_kernel.weight",
-                    initializer=Normal(
-                        mean=0., std=0.01)),
-                bias_attr=ParamAttr(name="bbox_head.solo_kernel.bias")))
+                weight_attr=ParamAttr(initializer=Normal(
+                    mean=0., std=0.01)),
+                bias_attr=True))
        self.solo_cate = self.add_sublayer(
            'bbox_head.solo_cate',
            nn.Conv2D(
@@ -254,14 +242,10 @@ class SOLOv2Head(nn.Layer):
                kernel_size=3,
                stride=1,
                padding=1,
-                weight_attr=ParamAttr(
-                    name="bbox_head.solo_cate.weight",
-                    initializer=Normal(
-                        mean=0., std=0.01)),
-                bias_attr=ParamAttr(
-                    name="bbox_head.solo_cate.bias",
-                    initializer=Constant(
-                        value=float(-np.log((1 - 0.01) / 0.01))))))
+                weight_attr=ParamAttr(initializer=Normal(
+                    mean=0., std=0.01)),
+                bias_attr=ParamAttr(initializer=Constant(
+                    value=float(-np.log((1 - 0.01) / 0.01))))))

    def _points_nms(self, heat, kernel_size=2):
        hmax = F.max_pool2d(heat, kernel_size=kernel_size, stride=1, padding=1)

--- a/ppdet/modeling/heads/ssd_head.py
+++ b/ppdet/modeling/heads/ssd_head.py
@@ -28,8 +28,7 @@ class SepConvLayer(nn.Layer):
                 out_channels,
                 kernel_size=3,
                 padding=1,
-                 conv_decay=0,
-                 name=None):
+                 conv_decay=0):
        super(SepConvLayer, self).__init__()
        self.dw_conv = nn.Conv2D(
            in_channels=in_channels,
@@ -38,16 +37,13 @@ class SepConvLayer(nn.Layer):
            stride=1,
            padding=padding,
            groups=in_channels,
-            weight_attr=ParamAttr(
-                name=name + "_dw_weights", regularizer=L2Decay(conv_decay)),
+            weight_attr=ParamAttr(regularizer=L2Decay(conv_decay)),
            bias_attr=False)

        self.bn = nn.BatchNorm2D(
            in_channels,
-            weight_attr=ParamAttr(
-                name=name + "_bn_scale", regularizer=L2Decay(0.)),
-            bias_attr=ParamAttr(
-                name=name + "_bn_offset", regularizer=L2Decay(0.)))
+            weight_attr=ParamAttr(regularizer=L2Decay(0.)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.)))

        self.pw_conv = nn.Conv2D(
            in_channels=in_channels,
@@ -55,8 +51,7 @@ class SepConvLayer(nn.Layer):
            kernel_size=1,
            stride=1,
            padding=0,
-            weight_attr=ParamAttr(
-                name=name + "_pw_weights", regularizer=L2Decay(conv_decay)),
+            weight_attr=ParamAttr(regularizer=L2Decay(conv_decay)),
            bias_attr=False)

    def forward(self, x):
@@ -125,8 +120,7 @@ class SSDHead(nn.Layer):
                        out_channels=num_prior * 4,
                        kernel_size=kernel_size,
                        padding=padding,
-                        conv_decay=conv_decay,
-                        name=box_conv_name))
+                        conv_decay=conv_decay))
            self.box_convs.append(box_conv)

            score_conv_name = "scores{}".format(i)
@@ -146,8 +140,7 @@ class SSDHead(nn.Layer):
                        out_channels=num_prior * self.num_classes,
                        kernel_size=kernel_size,
                        padding=padding,
-                        conv_decay=conv_decay,
-                        name=score_conv_name))
+                        conv_decay=conv_decay))
            self.score_convs.append(score_conv)

    @classmethod

--- a/ppdet/modeling/heads/ttf_head.py
+++ b/ppdet/modeling/heads/ttf_head.py
@@ -61,8 +61,7 @@ class HMHead(nn.Layer):
                    LiteConv(
                        in_channels=ch_in if i == 0 else ch_out,
                        out_channels=ch_out,
-                        norm_type=norm_type,
-                        name=lite_name))
+                        norm_type=norm_type))
                head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
            else:
                if dcn_head:
@@ -85,19 +84,17 @@ class HMHead(nn.Layer):
                            bias_attr=ParamAttr(
                                learning_rate=2., regularizer=L2Decay(0.))))
                head_conv.add_sublayer(name + '.act', nn.ReLU())
-        self.feat = self.add_sublayer('hm_feat', head_conv)
+        self.feat = head_conv
        bias_init = float(-np.log((1 - 0.01) / 0.01))
-        self.head = self.add_sublayer(
-            'hm_head',
-            nn.Conv2D(
-                in_channels=ch_out,
-                out_channels=num_classes,
-                kernel_size=1,
-                weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
-                bias_attr=ParamAttr(
-                    learning_rate=2.,
-                    regularizer=L2Decay(0.),
-                    initializer=Constant(bias_init))))
+        self.head = nn.Conv2D(
+            in_channels=ch_out,
+            out_channels=num_classes,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
+            bias_attr=ParamAttr(
+                learning_rate=2.,
+                regularizer=L2Decay(0.),
+                initializer=Constant(bias_init)))

    def forward(self, feat):
        out = self.feat(feat)
@@ -139,8 +136,7 @@ class WHHead(nn.Layer):
                    LiteConv(
                        in_channels=ch_in if i == 0 else ch_out,
                        out_channels=ch_out,
-                        norm_type=norm_type,
-                        name=lite_name))
+                        norm_type=norm_type))
                head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
            else:
                if dcn_head:
@@ -164,16 +160,14 @@ class WHHead(nn.Layer):
                                learning_rate=2., regularizer=L2Decay(0.))))
                head_conv.add_sublayer(name + '.act', nn.ReLU())

-        self.feat = self.add_sublayer('wh_feat', head_conv)
-        self.head = self.add_sublayer(
-            'wh_head',
-            nn.Conv2D(
-                in_channels=ch_out,
-                out_channels=4,
-                kernel_size=1,
-                weight_attr=ParamAttr(initializer=Normal(0, 0.001)),
-                bias_attr=ParamAttr(
-                    learning_rate=2., regularizer=L2Decay(0.))))
+        self.feat = head_conv
+        self.head = nn.Conv2D(
+            in_channels=ch_out,
+            out_channels=4,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=Normal(0, 0.001)),
+            bias_attr=ParamAttr(
+                learning_rate=2., regularizer=L2Decay(0.)))

    def forward(self, feat):
        out = self.feat(feat)
@@ -249,6 +243,9 @@ class TTFHead(nn.Layer):
        return hm, wh

    def filter_box_by_weight(self, pred, target, weight):
+        """
+        Filter out boxes where ttf_reg_weight is 0, only keep positive samples.
+        """
        index = paddle.nonzero(weight > 0)
        index.stop_gradient = True
        weight = paddle.gather_nd(weight, index)

--- a/ppdet/modeling/heads/yolo_head.py
+++ b/ppdet/modeling/heads/yolo_head.py
@@ -4,7 +4,6 @@ import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle.regularizer import L2Decay
 from ppdet.core.workspace import register
-from ..backbones.darknet import ConvBNLayer


 def _de_sigmoid(x, eps=1e-7):
@@ -70,7 +69,6 @@ class YOLOv3Head(nn.Layer):
                padding=0,
                data_format=data_format,
                bias_attr=ParamAttr(regularizer=L2Decay(0.)))
-            conv.skip_quant = True
            yolo_output = self.add_sublayer(name, conv)
            self.yolo_outputs.append(yolo_output)


--- a/ppdet/modeling/layers.py
+++ b/ppdet/modeling/layers.py
@@ -114,21 +114,17 @@ class ConvNormLayer(nn.Layer):
                 norm_decay=0.,
                 norm_groups=32,
                 use_dcn=False,
-                 norm_name=None,
                 bias_on=False,
                 lr_scale=1.,
                 freeze_norm=False,
                 initializer=Normal(
-                     mean=0., std=0.01),
-                 name=None):
+                     mean=0., std=0.01)):
        super(ConvNormLayer, self).__init__()
        assert norm_type in ['bn', 'sync_bn', 'gn']

        if bias_on:
            bias_attr = ParamAttr(
-                name=name + "_bias",
-                initializer=Constant(value=0.),
-                learning_rate=lr_scale)
+                initializer=Constant(value=0.), learning_rate=lr_scale)
        else:
            bias_attr = False

@@ -141,9 +137,7 @@ class ConvNormLayer(nn.Layer):
                padding=(filter_size - 1) // 2,
                groups=groups,
                weight_attr=ParamAttr(
-                    name=name + "_weight",
-                    initializer=initializer,
-                    learning_rate=1.),
+                    initializer=initializer, learning_rate=1.),
                bias_attr=bias_attr)
        else:
            # in FCOS-DCN head, specifically need learning_rate and regularizer
@@ -155,23 +149,16 @@ class ConvNormLayer(nn.Layer):
                padding=(filter_size - 1) // 2,
                groups=groups,
                weight_attr=ParamAttr(
-                    name=name + "_weight",
-                    initializer=initializer,
-                    learning_rate=1.),
+                    initializer=initializer, learning_rate=1.),
                bias_attr=True,
                lr_scale=2.,
-                regularizer=L2Decay(norm_decay),
-                name=name)
+                regularizer=L2Decay(norm_decay))

        norm_lr = 0. if freeze_norm else 1.
        param_attr = ParamAttr(
-            name=norm_name + "_scale",
-            learning_rate=norm_lr,
-            regularizer=L2Decay(norm_decay))
+            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
        bias_attr = ParamAttr(
-            name=norm_name + "_offset",
-            learning_rate=norm_lr,
-            regularizer=L2Decay(norm_decay))
+            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
        if norm_type == 'bn':
            self.norm = nn.BatchNorm2D(
                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
@@ -208,27 +195,21 @@ class LiteConv(nn.Layer):
            stride=stride,
            groups=in_channels,
            norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv1.norm',
-            name=name + '.conv1')
+            initializer=XavierUniform())
        conv2 = ConvNormLayer(
            in_channels,
            out_channels,
            filter_size=1,
            stride=stride,
            norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv2.norm',
-            name=name + '.conv2')
+            initializer=XavierUniform())
        conv3 = ConvNormLayer(
            out_channels,
            out_channels,
            filter_size=1,
            stride=stride,
            norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv3.norm',
-            name=name + '.conv3')
+            initializer=XavierUniform())
        conv4 = ConvNormLayer(
            out_channels,
            out_channels,
@@ -236,9 +217,7 @@ class LiteConv(nn.Layer):
            stride=stride,
            groups=out_channels,
            norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv4.norm',
-            name=name + '.conv4')
+            initializer=XavierUniform())
        conv_list = [conv1, conv2, conv3, conv4]
        self.lite_conv.add_sublayer('conv1', conv1)
        self.lite_conv.add_sublayer('relu6_1', nn.ReLU6())
@@ -675,20 +654,20 @@ class AnchorGrid(object):
 @register
 @serializable
 class FCOSBox(object):
-    __shared__ = ['num_classes', 'batch_size']
+    __shared__ = ['num_classes']

-    def __init__(self, num_classes=80, batch_size=1):
+    def __init__(self, num_classes=80):
        super(FCOSBox, self).__init__()
        self.num_classes = num_classes
-        self.batch_size = batch_size

    def _merge_hw(self, inputs, ch_type="channel_first"):
        """
+        Merge h and w of the feature map into one dimension.
        Args:
-            inputs (Variables): Feature map whose H and W will be merged into one dimension
-            ch_type     (str): channel_first / channel_last
+            inputs (Tensor): Tensor of the input feature map
+            ch_type (str): "channel_first" or "channel_last" style
        Return:
-            new_shape (Variables): The new shape after h and w merged into one dimension
+            new_shape (Tensor): The new shape after h and w merged
        """
        shape_ = paddle.shape(inputs)
        bs, ch, hi, wi = shape_[0], shape_[1], shape_[2], shape_[3]
@@ -706,16 +685,18 @@ class FCOSBox(object):
    def _postprocessing_by_level(self, locations, box_cls, box_reg, box_ctn,
                                 scale_factor):
        """
+        Postprocess each layer of the output with corresponding locations.
        Args:
-            locations (Variables): anchor points for current layer, [H*W, 2]
-            box_cls   (Variables): categories prediction, [N, C, H, W],  C is the number of classes 
-            box_reg   (Variables): bounding box prediction, [N, 4, H, W]
-            box_ctn   (Variables): centerness prediction, [N, 1, H, W]
-            scale_factor   (Variables): [h_scale, w_scale] for input images
+            locations (Tensor): anchor points for current layer, [H*W, 2]
+            box_cls (Tensor): categories prediction, [N, C, H, W], 
+                C is the number of classes
+            box_reg (Tensor): bounding box prediction, [N, 4, H, W]
+            box_ctn (Tensor): centerness prediction, [N, 1, H, W]
+            scale_factor (Tensor): [h_scale, w_scale] for input images
        Return:
-            box_cls_ch_last  (Variables): score for each category, in [N, C, M]
+            box_cls_ch_last (Tensor): score for each category, in [N, C, M]
                C is the number of classes and M is the number of anchor points
-            box_reg_decoding (Variables): decoded bounding box, in [N, M, 4]
+            box_reg_decoding (Tensor): decoded bounding box, in [N, M, 4]
                last dimension is [x1, y1, x2, y2]
        """
        act_shape_cls = self._merge_hw(box_cls)
@@ -771,12 +752,18 @@ class TTFBox(object):
        self.down_ratio = down_ratio

    def _simple_nms(self, heat, kernel=3):
+        """
+        Use maxpool to filter the max score, get local peaks.
+        """
        pad = (kernel - 1) // 2
        hmax = F.max_pool2d(heat, kernel, stride=1, padding=pad)
        keep = paddle.cast(hmax == heat, 'float32')
        return heat * keep

    def _topk(self, scores):
+        """
+        Select top k scores and decode to get xy coordinates.
+        """
        k = self.max_per_img
        shape_fm = paddle.shape(scores)
        shape_fm.stop_gradient = True

--- a/ppdet/modeling/necks/fpn.py
+++ b/ppdet/modeling/necks/fpn.py
@@ -105,10 +105,8 @@ class FPN(nn.Layer):
                        stride=1,
                        norm_type=self.norm_type,
                        norm_decay=self.norm_decay,
-                        norm_name=lateral_name + '_norm',
                        freeze_norm=self.freeze_norm,
-                        initializer=XavierUniform(fan_out=in_c),
-                        name=lateral_name))
+                        initializer=XavierUniform(fan_out=in_c)))
            else:
                lateral = self.add_sublayer(
                    lateral_name,
@@ -131,10 +129,8 @@ class FPN(nn.Layer):
                        stride=1,
                        norm_type=self.norm_type,
                        norm_decay=self.norm_decay,
-                        norm_name=fpn_name + '_norm',
                        freeze_norm=self.freeze_norm,
-                        initializer=XavierUniform(fan_out=fan),
-                        name=fpn_name))
+                        initializer=XavierUniform(fan_out=fan)))
            else:
                fpn_conv = self.add_sublayer(
                    fpn_name,
@@ -166,10 +162,8 @@ class FPN(nn.Layer):
                            stride=2,
                            norm_type=self.norm_type,
                            norm_decay=self.norm_decay,
-                            norm_name=extra_fpn_name + '_norm',
                            freeze_norm=self.freeze_norm,
-                            initializer=XavierUniform(fan_out=fan),
-                            name=extra_fpn_name))
+                            initializer=XavierUniform(fan_out=fan)))
                else:
                    extra_fpn_conv = self.add_sublayer(
                        extra_fpn_name,

--- a/ppdet/modeling/necks/ttf_fpn.py
+++ b/ppdet/modeling/necks/ttf_fpn.py
@@ -16,7 +16,6 @@ import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
-from paddle.nn.initializer import Constant, Uniform, Normal
 from paddle.nn.initializer import Constant, Uniform, Normal, XavierUniform
 from paddle import ParamAttr
 from ppdet.core.workspace import register, serializable
@@ -28,11 +27,9 @@ from ..shape_spec import ShapeSpec

 __all__ = ['TTFFPN']

-__all__ = ['TTFFPN']
-

 class Upsample(nn.Layer):
-    def __init__(self, ch_in, ch_out, norm_type='bn', name=None):
+    def __init__(self, ch_in, ch_out, norm_type='bn'):
        super(Upsample, self).__init__()
        fan_in = ch_in * 3 * 3
        stdv = 1. / math.sqrt(fan_in)
@@ -49,7 +46,7 @@ class Upsample(nn.Layer):
            regularizer=L2Decay(0.))

        self.bn = batch_norm(
-            ch_out, norm_type=norm_type, initializer=Constant(1.), name=name)
+            ch_out, norm_type=norm_type, initializer=Constant(1.))

    def forward(self, feat):
        dcn = self.dcn(feat)
@@ -60,7 +57,7 @@ class Upsample(nn.Layer):


 class DeConv(nn.Layer):
-    def __init__(self, ch_in, ch_out, norm_type='bn', name=None):
+    def __init__(self, ch_in, ch_out, norm_type='bn'):
        super(DeConv, self).__init__()
        self.deconv = nn.Sequential()
        conv1 = ConvNormLayer(
@@ -69,9 +66,7 @@ class DeConv(nn.Layer):
            stride=1,
            filter_size=1,
            norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv1.norm',
-            name=name + '.conv1')
+            initializer=XavierUniform())
        conv2 = nn.Conv2DTranspose(
            in_channels=ch_out,
            out_channels=ch_out,
@@ -81,17 +76,14 @@ class DeConv(nn.Layer):
            groups=ch_out,
            weight_attr=ParamAttr(initializer=XavierUniform()),
            bias_attr=False)
-        bn = batch_norm(
-            ch_out, norm_type=norm_type, norm_decay=0., name=name + '.bn')
+        bn = batch_norm(ch_out, norm_type=norm_type, norm_decay=0.)
        conv3 = ConvNormLayer(
            ch_in=ch_out,
            ch_out=ch_out,
            stride=1,
            filter_size=1,
            norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv3.norm',
-            name=name + '.conv3')
+            initializer=XavierUniform())

        self.deconv.add_sublayer('conv1', conv1)
        self.deconv.add_sublayer('relu6_1', nn.ReLU6())
@@ -106,12 +98,10 @@ class DeConv(nn.Layer):


 class LiteUpsample(nn.Layer):
-    def __init__(self, ch_in, ch_out, norm_type='bn', name=None):
+    def __init__(self, ch_in, ch_out, norm_type='bn'):
        super(LiteUpsample, self).__init__()
-        self.deconv = DeConv(
-            ch_in, ch_out, norm_type=norm_type, name=name + '.deconv')
-        self.conv = LiteConv(
-            ch_in, ch_out, norm_type=norm_type, name=name + '.liteconv')
+        self.deconv = DeConv(ch_in, ch_out, norm_type=norm_type)
+        self.conv = LiteConv(ch_in, ch_out, norm_type=norm_type)

    def forward(self, inputs):
        deconv_up = self.deconv(inputs)
@@ -142,8 +132,7 @@ class ShortCut(nn.Layer):
                        in_channels=in_channels,
                        out_channels=ch_out,
                        with_act=i < layer_num - 1,
-                        norm_type=norm_type,
-                        name=shortcut_name))
+                        norm_type=norm_type))
            else:
                shortcut_conv.add_sublayer(
                    shortcut_name,
@@ -158,7 +147,7 @@ class ShortCut(nn.Layer):
                if i < layer_num - 1:
                    shortcut_conv.add_sublayer(shortcut_name + '.act',
                                               nn.ReLU())
-        self.shortcut = self.add_sublayer('short', shortcut_conv)
+        self.shortcut = self.add_sublayer('shortcut', shortcut_conv)

    def forward(self, feat):
        out = self.shortcut(feat)
@@ -211,10 +200,7 @@ class TTFFPN(nn.Layer):
            upsample = self.add_sublayer(
                'upsample.' + str(i),
                upsample_module(
-                    in_c,
-                    out_c,
-                    norm_type=norm_type,
-                    name='deconv_layers.' + str(i)))
+                    in_c, out_c, norm_type=norm_type))
            self.upsample_list.append(upsample)
            if i < self.shortcut_len:
                shortcut = self.add_sublayer(

--- a/ppdet/modeling/necks/yolo_fpn.py
+++ b/ppdet/modeling/necks/yolo_fpn.py
@@ -25,9 +25,9 @@ from ..shape_spec import ShapeSpec
 __all__ = ['YOLOv3FPN', 'PPYOLOFPN']


-def add_coord(x):
+def add_coord(x, data_format):
    b = x.shape[0]
-    if self.data_format == 'NCHW':
+    if data_format == 'NCHW':
        h = x.shape[2]
        w = x.shape[3]
    else:
@@ -35,14 +35,14 @@ def add_coord(x):
        w = x.shape[2]

    gx = paddle.arange(w, dtype='float32') / (w - 1.) * 2.0 - 1.
-    if self.data_format == 'NCHW':
+    if data_format == 'NCHW':
        gx = gx.reshape([1, 1, 1, w]).expand([b, 1, h, w])
    else:
        gx = gx.reshape([1, 1, w, 1]).expand([b, h, w, 1])
    gx.stop_gradient = True

    gy = paddle.arange(h, dtype='float32') / (h - 1.) * 2.0 - 1.
-    if self.data_format == 'NCHW':
+    if data_format == 'NCHW':
        gy = gy.reshape([1, 1, h, 1]).expand([b, 1, h, w])
    else:
        gy = gy.reshape([1, h, 1, 1]).expand([b, h, w, 1])
@@ -237,7 +237,7 @@ class CoordConv(nn.Layer):
        self.data_format = data_format

    def forward(self, x):
-        gx, gy = add_coord(x)
+        gx, gy = add_coord(x, self.data_format)
        if self.data_format == 'NCHW':
            y = paddle.concat([x, gx, gy], axis=1)
        else:
@@ -509,7 +509,7 @@ class PPYOLOFPN(nn.Layer):
                 norm_type='bn',
                 data_format='NCHW',
                 coord_conv=False,
-                 conv_block_num=3,
+                 conv_block_num=2,
                 drop_block=False,
                 block_size=3,
                 keep_prob=0.9,

--- a/ppdet/modeling/ops.py
+++ b/ppdet/modeling/ops.py
@@ -53,9 +53,7 @@ def batch_norm(ch,
               norm_type='bn',
               norm_decay=0.,
               initializer=None,
-               name=None,
               data_format='NCHW'):
-    bn_name = name + '.bn'
    if norm_type == 'sync_bn':
        batch_norm = nn.SyncBatchNorm
    else: