diff --git a/configs/fcos/_base_/fcos_r50_fpn.yml b/configs/fcos/_base_/fcos_r50_fpn.yml
index 1124082ee32ded9e4ad74f2700918c030f2cad44..64a275d88023030b2299b0c3932b1c3fc9ce1e34 100644
--- a/configs/fcos/_base_/fcos_r50_fpn.yml
+++ b/configs/fcos/_base_/fcos_r50_fpn.yml
@@ -47,7 +47,6 @@ FCOSPostProcess:
   decode:
     name: FCOSBox
     num_classes: 80
-    batch_size: 1
   nms:
     name: MultiClassNMS
     nms_top_k: 1000
diff --git a/ppdet/modeling/backbones/darknet.py b/ppdet/modeling/backbones/darknet.py
index 13af903d4c9da549af7a7581571a9a16246ee095..8d3d07a25fc07f86ad5e32ea201f2a14b5e32476 100755
--- a/ppdet/modeling/backbones/darknet.py
+++ b/ppdet/modeling/backbones/darknet.py
@@ -35,8 +35,8 @@ class ConvBNLayer(nn.Layer):
                  norm_type='bn',
                  norm_decay=0.,
                  act="leaky",
-                 name=None,
-                 data_format='NCHW'):
+                 data_format='NCHW',
+                 name=''):
         """
         conv + bn + activation layer
 
@@ -50,7 +50,6 @@ class ConvBNLayer(nn.Layer):
             norm_type (str): batch norm type, default bn
             norm_decay (str): decay for weight and bias of batch norm layer, default 0.
             act (str): activation function type, default 'leaky', which means leaky_relu
-            name (str): layer name
             data_format (str): data format, NCHW or NHWC
         """
         super(ConvBNLayer, self).__init__()
@@ -68,7 +67,6 @@ class ConvBNLayer(nn.Layer):
             ch_out,
             norm_type=norm_type,
             norm_decay=norm_decay,
-            name=name,
             data_format=data_format)
         self.act = act
 
@@ -91,7 +89,6 @@ class DownSample(nn.Layer):
                  padding=1,
                  norm_type='bn',
                  norm_decay=0.,
-                 name=None,
                  data_format='NCHW'):
         """
         downsample layer
@@ -104,7 +101,6 @@ class DownSample(nn.Layer):
             padding (int): padding size, default 1
             norm_type (str): batch norm type, default bn
             norm_decay (str): decay for weight and bias of batch norm layer, default 0.
-            name (str): layer name
             data_format (str): data format, NCHW or NHWC
         """
 
@@ -118,8 +114,7 @@ class DownSample(nn.Layer):
             padding=padding,
             norm_type=norm_type,
             norm_decay=norm_decay,
-            data_format=data_format,
-            name=name)
+            data_format=data_format)
         self.ch_out = ch_out
 
     def forward(self, inputs):
@@ -133,7 +128,6 @@ class BasicBlock(nn.Layer):
                  ch_out,
                  norm_type='bn',
                  norm_decay=0.,
-                 name=None,
                  data_format='NCHW'):
         """
         BasicBlock layer of DarkNet
@@ -143,7 +137,6 @@ class BasicBlock(nn.Layer):
             ch_out (int): output channel
             norm_type (str): batch norm type, default bn
             norm_decay (str): decay for weight and bias of batch norm layer, default 0.
-            name (str): layer name
             data_format (str): data format, NCHW or NHWC
         """
 
@@ -157,8 +150,7 @@ class BasicBlock(nn.Layer):
             padding=0,
             norm_type=norm_type,
             norm_decay=norm_decay,
-            data_format=data_format,
-            name=name + '.0')
+            data_format=data_format)
         self.conv2 = ConvBNLayer(
             ch_in=ch_out,
             ch_out=ch_out * 2,
@@ -167,8 +159,7 @@ class BasicBlock(nn.Layer):
             padding=1,
             norm_type=norm_type,
             norm_decay=norm_decay,
-            data_format=data_format,
-            name=name + '.1')
+            data_format=data_format)
 
     def forward(self, inputs):
         conv1 = self.conv1(inputs)
@@ -205,8 +196,7 @@ class Blocks(nn.Layer):
             ch_out,
             norm_type=norm_type,
             norm_decay=norm_decay,
-            data_format=data_format,
-            name=name + '.0')
+            data_format=data_format)
         self.res_out_list = []
         for i in range(1, count):
             block_name = '{}.{}'.format(name, i)
@@ -217,8 +207,7 @@ class Blocks(nn.Layer):
                     ch_out,
                     norm_type=norm_type,
                     norm_decay=norm_decay,
-                    data_format=data_format,
-                    name=block_name))
+                    data_format=data_format))
             self.res_out_list.append(res_out)
         self.ch_out = ch_out
 
@@ -272,16 +261,14 @@ class DarkNet(nn.Layer):
             padding=1,
             norm_type=norm_type,
             norm_decay=norm_decay,
-            data_format=data_format,
-            name='yolo_input')
+            data_format=data_format)
 
         self.downsample0 = DownSample(
             ch_in=32,
             ch_out=32 * 2,
             norm_type=norm_type,
             norm_decay=norm_decay,
-            data_format=data_format,
-            name='yolo_input.downsample')
+            data_format=data_format)
 
         self._out_channels = []
         self.darknet_conv_block_list = []
@@ -311,8 +298,7 @@ class DarkNet(nn.Layer):
                     ch_out=32 * (2**(i + 2)),
                     norm_type=norm_type,
                     norm_decay=norm_decay,
-                    data_format=data_format,
-                    name=down_name))
+                    data_format=data_format))
             self.downsample_list.append(downsample)
 
     def forward(self, inputs):
diff --git a/ppdet/modeling/heads/bbox_head.py b/ppdet/modeling/heads/bbox_head.py
index 26b81d18ea6f0d0452c90dd14265718fe3c9d2f2..09796372ef81a911543374ff68b7bf16d7e64b53 100644
--- a/ppdet/modeling/heads/bbox_head.py
+++ b/ppdet/modeling/heads/bbox_head.py
@@ -126,10 +126,8 @@ class XConvNormHead(nn.Layer):
                     filter_size=3,
                     stride=1,
                     norm_type=self.norm_type,
-                    norm_name=head_conv_name + '_norm',
                     freeze_norm=self.freeze_norm,
-                    initializer=initializer,
-                    name=head_conv_name))
+                    initializer=initializer))
             self.bbox_head_convs.append(head_conv)
 
         fan = conv_dim * resolution * resolution
diff --git a/ppdet/modeling/heads/fcos_head.py b/ppdet/modeling/heads/fcos_head.py
index 1776d8c3810784df3d1052109c91d70fc5e4b675..3b8fd7f785d77ee8c18576cc4d7d71b44e86c509 100644
--- a/ppdet/modeling/heads/fcos_head.py
+++ b/ppdet/modeling/heads/fcos_head.py
@@ -28,6 +28,10 @@ from ppdet.modeling.layers import ConvNormLayer
 
 
 class ScaleReg(nn.Layer):
+    """
+    Parameter for scaling the regression outputs.
+    """
+
     def __init__(self):
         super(ScaleReg, self).__init__()
         self.scale_reg = self.create_parameter(
@@ -77,10 +81,8 @@ class FCOSFeat(nn.Layer):
                     stride=1,
                     norm_type=norm_type,
                     use_dcn=use_dcn,
-                    norm_name=cls_conv_name + '_norm',
                     bias_on=True,
-                    lr_scale=2.,
-                    name=cls_conv_name))
+                    lr_scale=2.))
             self.cls_subnet_convs.append(cls_conv)
 
             reg_conv_name = 'fcos_head_reg_tower_conv_{}'.format(i)
@@ -93,10 +95,8 @@ class FCOSFeat(nn.Layer):
                     stride=1,
                     norm_type=norm_type,
                     use_dcn=use_dcn,
-                    norm_name=reg_conv_name + '_norm',
                     bias_on=True,
-                    lr_scale=2.,
-                    name=reg_conv_name))
+                    lr_scale=2.))
             self.reg_subnet_convs.append(reg_conv)
 
     def forward(self, fpn_feat):
@@ -113,12 +113,13 @@ class FCOSHead(nn.Layer):
     """
     FCOSHead
     Args:
-        num_classes(int): Number of classes
-        fpn_stride(list): The stride of each FPN Layer
-        prior_prob(float): Used to set the bias init for the class prediction layer
-        fcos_loss(object): Instance of 'FCOSLoss'
-        norm_reg_targets(bool): Normalization the regression target if true
-        centerness_on_reg(bool): The prediction of centerness on regression or clssification branch
+        fcos_feat (object): Instance of 'FCOSFeat'
+        num_classes (int): Number of classes
+        fpn_stride (list): The stride of each FPN Layer
+        prior_prob (float): Used to set the bias init for the class prediction layer
+        fcos_loss (object): Instance of 'FCOSLoss'
+        norm_reg_targets (bool): Normalization the regression target if true
+        centerness_on_reg (bool): The prediction of centerness on regression or clssification branch
     """
     __inject__ = ['fcos_feat', 'fcos_loss']
     __shared__ = ['num_classes']
@@ -199,7 +200,15 @@ class FCOSHead(nn.Layer):
             scale_reg = self.add_sublayer(feat_name, ScaleReg())
             self.scales_regs.append(scale_reg)
 
-    def _compute_locatioins_by_level(self, fpn_stride, feature):
+    def _compute_locations_by_level(self, fpn_stride, feature):
+        """
+        Compute locations of anchor points of each FPN layer
+        Args:
+            fpn_stride (int): The stride of current FPN feature map
+            feature (Tensor): Tensor of current FPN feature map
+        Return:
+            Anchor points locations of current FPN feature map
+        """
         shape_fm = paddle.shape(feature)
         shape_fm.stop_gradient = True
         h, w = shape_fm[2], shape_fm[3]
@@ -247,8 +256,7 @@ class FCOSHead(nn.Layer):
         if not is_training:
             locations_list = []
             for fpn_stride, feature in zip(self.fpn_stride, fpn_feats):
-                location = self._compute_locatioins_by_level(fpn_stride,
-                                                             feature)
+                location = self._compute_locations_by_level(fpn_stride, feature)
                 locations_list.append(location)
 
             return locations_list, cls_logits_list, bboxes_reg_list, centerness_list
diff --git a/ppdet/modeling/heads/mask_head.py b/ppdet/modeling/heads/mask_head.py
index eea70922a483e16cc379e394235b396307391e4c..e5df8d234e1696456eca945a7a732437a1917106 100644
--- a/ppdet/modeling/heads/mask_head.py
+++ b/ppdet/modeling/heads/mask_head.py
@@ -63,22 +63,19 @@ class MaskFeat(nn.Layer):
                         filter_size=3,
                         stride=1,
                         norm_type=self.norm_type,
-                        norm_name=conv_name + '_norm',
-                        initializer=KaimingNormal(fan_in=fan_conv),
-                        name=conv_name))
+                        initializer=KaimingNormal(fan_in=fan_conv)))
                 mask_conv.add_sublayer(conv_name + 'act', nn.ReLU())
         else:
             for i in range(self.num_convs):
                 conv_name = 'mask_inter_feat_{}'.format(i + 1)
-                mask_conv.add_sublayer(
-                    conv_name,
-                    nn.Conv2D(
-                        in_channels=in_channel if i == 0 else out_channel,
-                        out_channels=out_channel,
-                        kernel_size=3,
-                        padding=1,
-                        weight_attr=paddle.ParamAttr(
-                            initializer=KaimingNormal(fan_in=fan_conv))))
+                conv = nn.Conv2D(
+                    in_channels=in_channel if i == 0 else out_channel,
+                    out_channels=out_channel,
+                    kernel_size=3,
+                    padding=1,
+                    weight_attr=paddle.ParamAttr(
+                        initializer=KaimingNormal(fan_in=fan_conv)))
+                mask_conv.add_sublayer(conv_name, conv)
                 mask_conv.add_sublayer(conv_name + 'act', nn.ReLU())
         mask_conv.add_sublayer(
             'conv5_mask',
diff --git a/ppdet/modeling/heads/solov2_head.py b/ppdet/modeling/heads/solov2_head.py
index d24b0b029fc3a5a15ee4831451c918f42b2a88f6..5f15461fa7fac5b2b8ba2b642fc8082fdaa15e53 100644
--- a/ppdet/modeling/heads/solov2_head.py
+++ b/ppdet/modeling/heads/solov2_head.py
@@ -75,9 +75,7 @@ class SOLOv2MaskHead(nn.Layer):
                         ch_out=self.mid_channels,
                         filter_size=3,
                         stride=1,
-                        norm_type='gn',
-                        norm_name=conv_feat_name + '.conv' + str(i) + '.gn',
-                        name=conv_feat_name + '.conv' + str(i)))
+                        norm_type='gn'))
                 self.add_sublayer('conv_pre_feat' + str(i), conv_pre_feat)
                 self.convs_all_levels.append(conv_pre_feat)
             else:
@@ -94,9 +92,7 @@ class SOLOv2MaskHead(nn.Layer):
                             ch_out=self.mid_channels,
                             filter_size=3,
                             stride=1,
-                            norm_type='gn',
-                            norm_name=conv_feat_name + '.conv' + str(j) + '.gn',
-                            name=conv_feat_name + '.conv' + str(j)))
+                            norm_type='gn'))
                     conv_pre_feat.add_sublayer(
                         conv_feat_name + '.conv' + str(j) + 'act', nn.ReLU())
                     conv_pre_feat.add_sublayer(
@@ -114,9 +110,7 @@ class SOLOv2MaskHead(nn.Layer):
                 ch_out=self.out_channels,
                 filter_size=1,
                 stride=1,
-                norm_type='gn',
-                norm_name=conv_pred_name + '.gn',
-                name=conv_pred_name))
+                norm_type='gn'))
 
     def forward(self, inputs):
         """
@@ -216,9 +210,7 @@ class SOLOv2Head(nn.Layer):
                     ch_out=self.seg_feat_channels,
                     filter_size=3,
                     stride=1,
-                    norm_type='gn',
-                    norm_name='bbox_head.kernel_convs.{}.gn'.format(i),
-                    name='bbox_head.kernel_convs.{}'.format(i)))
+                    norm_type='gn'))
             self.kernel_pred_convs.append(kernel_conv)
             ch_in = self.in_channels if i == 0 else self.seg_feat_channels
             cate_conv = self.add_sublayer(
@@ -228,9 +220,7 @@ class SOLOv2Head(nn.Layer):
                     ch_out=self.seg_feat_channels,
                     filter_size=3,
                     stride=1,
-                    norm_type='gn',
-                    norm_name='bbox_head.cate_convs.{}.gn'.format(i),
-                    name='bbox_head.cate_convs.{}'.format(i)))
+                    norm_type='gn'))
             self.cate_pred_convs.append(cate_conv)
 
         self.solo_kernel = self.add_sublayer(
@@ -241,11 +231,9 @@ class SOLOv2Head(nn.Layer):
                 kernel_size=3,
                 stride=1,
                 padding=1,
-                weight_attr=ParamAttr(
-                    name="bbox_head.solo_kernel.weight",
-                    initializer=Normal(
-                        mean=0., std=0.01)),
-                bias_attr=ParamAttr(name="bbox_head.solo_kernel.bias")))
+                weight_attr=ParamAttr(initializer=Normal(
+                    mean=0., std=0.01)),
+                bias_attr=True))
         self.solo_cate = self.add_sublayer(
             'bbox_head.solo_cate',
             nn.Conv2D(
@@ -254,14 +242,10 @@ class SOLOv2Head(nn.Layer):
                 kernel_size=3,
                 stride=1,
                 padding=1,
-                weight_attr=ParamAttr(
-                    name="bbox_head.solo_cate.weight",
-                    initializer=Normal(
-                        mean=0., std=0.01)),
-                bias_attr=ParamAttr(
-                    name="bbox_head.solo_cate.bias",
-                    initializer=Constant(
-                        value=float(-np.log((1 - 0.01) / 0.01))))))
+                weight_attr=ParamAttr(initializer=Normal(
+                    mean=0., std=0.01)),
+                bias_attr=ParamAttr(initializer=Constant(
+                    value=float(-np.log((1 - 0.01) / 0.01))))))
 
     def _points_nms(self, heat, kernel_size=2):
         hmax = F.max_pool2d(heat, kernel_size=kernel_size, stride=1, padding=1)
diff --git a/ppdet/modeling/heads/ssd_head.py b/ppdet/modeling/heads/ssd_head.py
index 8cbbe0a460441b5414fec5826f21699834c960c0..96ed5e424d659f96778b66fe95b2c799a1dfb92f 100644
--- a/ppdet/modeling/heads/ssd_head.py
+++ b/ppdet/modeling/heads/ssd_head.py
@@ -28,8 +28,7 @@ class SepConvLayer(nn.Layer):
                  out_channels,
                  kernel_size=3,
                  padding=1,
-                 conv_decay=0,
-                 name=None):
+                 conv_decay=0):
         super(SepConvLayer, self).__init__()
         self.dw_conv = nn.Conv2D(
             in_channels=in_channels,
@@ -38,16 +37,13 @@ class SepConvLayer(nn.Layer):
             stride=1,
             padding=padding,
             groups=in_channels,
-            weight_attr=ParamAttr(
-                name=name + "_dw_weights", regularizer=L2Decay(conv_decay)),
+            weight_attr=ParamAttr(regularizer=L2Decay(conv_decay)),
             bias_attr=False)
 
         self.bn = nn.BatchNorm2D(
             in_channels,
-            weight_attr=ParamAttr(
-                name=name + "_bn_scale", regularizer=L2Decay(0.)),
-            bias_attr=ParamAttr(
-                name=name + "_bn_offset", regularizer=L2Decay(0.)))
+            weight_attr=ParamAttr(regularizer=L2Decay(0.)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.)))
 
         self.pw_conv = nn.Conv2D(
             in_channels=in_channels,
@@ -55,8 +51,7 @@ class SepConvLayer(nn.Layer):
             kernel_size=1,
             stride=1,
             padding=0,
-            weight_attr=ParamAttr(
-                name=name + "_pw_weights", regularizer=L2Decay(conv_decay)),
+            weight_attr=ParamAttr(regularizer=L2Decay(conv_decay)),
             bias_attr=False)
 
     def forward(self, x):
@@ -125,8 +120,7 @@ class SSDHead(nn.Layer):
                         out_channels=num_prior * 4,
                         kernel_size=kernel_size,
                         padding=padding,
-                        conv_decay=conv_decay,
-                        name=box_conv_name))
+                        conv_decay=conv_decay))
             self.box_convs.append(box_conv)
 
             score_conv_name = "scores{}".format(i)
@@ -146,8 +140,7 @@ class SSDHead(nn.Layer):
                         out_channels=num_prior * self.num_classes,
                         kernel_size=kernel_size,
                         padding=padding,
-                        conv_decay=conv_decay,
-                        name=score_conv_name))
+                        conv_decay=conv_decay))
             self.score_convs.append(score_conv)
 
     @classmethod
diff --git a/ppdet/modeling/heads/ttf_head.py b/ppdet/modeling/heads/ttf_head.py
index 2a676fc7f9bc8efc18c3c759836764b0d7cac62b..9e2eb6add8c4d0e4c7ea9a19a654d9d67de07e78 100644
--- a/ppdet/modeling/heads/ttf_head.py
+++ b/ppdet/modeling/heads/ttf_head.py
@@ -61,8 +61,7 @@ class HMHead(nn.Layer):
                     LiteConv(
                         in_channels=ch_in if i == 0 else ch_out,
                         out_channels=ch_out,
-                        norm_type=norm_type,
-                        name=lite_name))
+                        norm_type=norm_type))
                 head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
             else:
                 if dcn_head:
@@ -85,19 +84,17 @@ class HMHead(nn.Layer):
                             bias_attr=ParamAttr(
                                 learning_rate=2., regularizer=L2Decay(0.))))
                 head_conv.add_sublayer(name + '.act', nn.ReLU())
-        self.feat = self.add_sublayer('hm_feat', head_conv)
+        self.feat = head_conv
         bias_init = float(-np.log((1 - 0.01) / 0.01))
-        self.head = self.add_sublayer(
-            'hm_head',
-            nn.Conv2D(
-                in_channels=ch_out,
-                out_channels=num_classes,
-                kernel_size=1,
-                weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
-                bias_attr=ParamAttr(
-                    learning_rate=2.,
-                    regularizer=L2Decay(0.),
-                    initializer=Constant(bias_init))))
+        self.head = nn.Conv2D(
+            in_channels=ch_out,
+            out_channels=num_classes,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=Normal(0, 0.01)),
+            bias_attr=ParamAttr(
+                learning_rate=2.,
+                regularizer=L2Decay(0.),
+                initializer=Constant(bias_init)))
 
     def forward(self, feat):
         out = self.feat(feat)
@@ -139,8 +136,7 @@ class WHHead(nn.Layer):
                     LiteConv(
                         in_channels=ch_in if i == 0 else ch_out,
                         out_channels=ch_out,
-                        norm_type=norm_type,
-                        name=lite_name))
+                        norm_type=norm_type))
                 head_conv.add_sublayer(lite_name + '.act', nn.ReLU6())
             else:
                 if dcn_head:
@@ -164,16 +160,14 @@ class WHHead(nn.Layer):
                                 learning_rate=2., regularizer=L2Decay(0.))))
                 head_conv.add_sublayer(name + '.act', nn.ReLU())
 
-        self.feat = self.add_sublayer('wh_feat', head_conv)
-        self.head = self.add_sublayer(
-            'wh_head',
-            nn.Conv2D(
-                in_channels=ch_out,
-                out_channels=4,
-                kernel_size=1,
-                weight_attr=ParamAttr(initializer=Normal(0, 0.001)),
-                bias_attr=ParamAttr(
-                    learning_rate=2., regularizer=L2Decay(0.))))
+        self.feat = head_conv
+        self.head = nn.Conv2D(
+            in_channels=ch_out,
+            out_channels=4,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=Normal(0, 0.001)),
+            bias_attr=ParamAttr(
+                learning_rate=2., regularizer=L2Decay(0.)))
 
     def forward(self, feat):
         out = self.feat(feat)
@@ -249,6 +243,9 @@ class TTFHead(nn.Layer):
         return hm, wh
 
     def filter_box_by_weight(self, pred, target, weight):
+        """
+        Filter out boxes where ttf_reg_weight is 0, only keep positive samples.
+        """
         index = paddle.nonzero(weight > 0)
         index.stop_gradient = True
         weight = paddle.gather_nd(weight, index)
diff --git a/ppdet/modeling/heads/yolo_head.py b/ppdet/modeling/heads/yolo_head.py
index 033089ab2fa889c9c6c0f80e1e1ed09079686be1..a0817747f68c04743afc6e7da20d1485a0fcc196 100644
--- a/ppdet/modeling/heads/yolo_head.py
+++ b/ppdet/modeling/heads/yolo_head.py
@@ -4,7 +4,6 @@ import paddle.nn.functional as F
 from paddle import ParamAttr
 from paddle.regularizer import L2Decay
 from ppdet.core.workspace import register
-from ..backbones.darknet import ConvBNLayer
 
 
 def _de_sigmoid(x, eps=1e-7):
@@ -70,7 +69,6 @@ class YOLOv3Head(nn.Layer):
                 padding=0,
                 data_format=data_format,
                 bias_attr=ParamAttr(regularizer=L2Decay(0.)))
-            conv.skip_quant = True
             yolo_output = self.add_sublayer(name, conv)
             self.yolo_outputs.append(yolo_output)
 
diff --git a/ppdet/modeling/layers.py b/ppdet/modeling/layers.py
index a4eb3b9c2510f1359d1b399efb5e11bc568d5771..5877b5f37566e9f2e58213e785e56bdea9d330f9 100644
--- a/ppdet/modeling/layers.py
+++ b/ppdet/modeling/layers.py
@@ -114,21 +114,17 @@ class ConvNormLayer(nn.Layer):
                  norm_decay=0.,
                  norm_groups=32,
                  use_dcn=False,
-                 norm_name=None,
                  bias_on=False,
                  lr_scale=1.,
                  freeze_norm=False,
                  initializer=Normal(
-                     mean=0., std=0.01),
-                 name=None):
+                     mean=0., std=0.01)):
         super(ConvNormLayer, self).__init__()
         assert norm_type in ['bn', 'sync_bn', 'gn']
 
         if bias_on:
             bias_attr = ParamAttr(
-                name=name + "_bias",
-                initializer=Constant(value=0.),
-                learning_rate=lr_scale)
+                initializer=Constant(value=0.), learning_rate=lr_scale)
         else:
             bias_attr = False
 
@@ -141,9 +137,7 @@ class ConvNormLayer(nn.Layer):
                 padding=(filter_size - 1) // 2,
                 groups=groups,
                 weight_attr=ParamAttr(
-                    name=name + "_weight",
-                    initializer=initializer,
-                    learning_rate=1.),
+                    initializer=initializer, learning_rate=1.),
                 bias_attr=bias_attr)
         else:
             # in FCOS-DCN head, specifically need learning_rate and regularizer
@@ -155,23 +149,16 @@ class ConvNormLayer(nn.Layer):
                 padding=(filter_size - 1) // 2,
                 groups=groups,
                 weight_attr=ParamAttr(
-                    name=name + "_weight",
-                    initializer=initializer,
-                    learning_rate=1.),
+                    initializer=initializer, learning_rate=1.),
                 bias_attr=True,
                 lr_scale=2.,
-                regularizer=L2Decay(norm_decay),
-                name=name)
+                regularizer=L2Decay(norm_decay))
 
         norm_lr = 0. if freeze_norm else 1.
         param_attr = ParamAttr(
-            name=norm_name + "_scale",
-            learning_rate=norm_lr,
-            regularizer=L2Decay(norm_decay))
+            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
         bias_attr = ParamAttr(
-            name=norm_name + "_offset",
-            learning_rate=norm_lr,
-            regularizer=L2Decay(norm_decay))
+            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
         if norm_type == 'bn':
             self.norm = nn.BatchNorm2D(
                 ch_out, weight_attr=param_attr, bias_attr=bias_attr)
@@ -208,27 +195,21 @@ class LiteConv(nn.Layer):
             stride=stride,
             groups=in_channels,
             norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv1.norm',
-            name=name + '.conv1')
+            initializer=XavierUniform())
         conv2 = ConvNormLayer(
             in_channels,
             out_channels,
             filter_size=1,
             stride=stride,
             norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv2.norm',
-            name=name + '.conv2')
+            initializer=XavierUniform())
         conv3 = ConvNormLayer(
             out_channels,
             out_channels,
             filter_size=1,
             stride=stride,
             norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv3.norm',
-            name=name + '.conv3')
+            initializer=XavierUniform())
         conv4 = ConvNormLayer(
             out_channels,
             out_channels,
@@ -236,9 +217,7 @@ class LiteConv(nn.Layer):
             stride=stride,
             groups=out_channels,
             norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv4.norm',
-            name=name + '.conv4')
+            initializer=XavierUniform())
         conv_list = [conv1, conv2, conv3, conv4]
         self.lite_conv.add_sublayer('conv1', conv1)
         self.lite_conv.add_sublayer('relu6_1', nn.ReLU6())
@@ -675,20 +654,20 @@ class AnchorGrid(object):
 @register
 @serializable
 class FCOSBox(object):
-    __shared__ = ['num_classes', 'batch_size']
+    __shared__ = ['num_classes']
 
-    def __init__(self, num_classes=80, batch_size=1):
+    def __init__(self, num_classes=80):
         super(FCOSBox, self).__init__()
         self.num_classes = num_classes
-        self.batch_size = batch_size
 
     def _merge_hw(self, inputs, ch_type="channel_first"):
         """
+        Merge h and w of the feature map into one dimension.
         Args:
-            inputs (Variables): Feature map whose H and W will be merged into one dimension
-            ch_type     (str): channel_first / channel_last
+            inputs (Tensor): Tensor of the input feature map
+            ch_type (str): "channel_first" or "channel_last" style
         Return:
-            new_shape (Variables): The new shape after h and w merged into one dimension
+            new_shape (Tensor): The new shape after h and w merged
         """
         shape_ = paddle.shape(inputs)
         bs, ch, hi, wi = shape_[0], shape_[1], shape_[2], shape_[3]
@@ -706,16 +685,18 @@ class FCOSBox(object):
     def _postprocessing_by_level(self, locations, box_cls, box_reg, box_ctn,
                                  scale_factor):
         """
+        Postprocess each layer of the output with corresponding locations.
         Args:
-            locations (Variables): anchor points for current layer, [H*W, 2]
-            box_cls   (Variables): categories prediction, [N, C, H, W],  C is the number of classes 
-            box_reg   (Variables): bounding box prediction, [N, 4, H, W]
-            box_ctn   (Variables): centerness prediction, [N, 1, H, W]
-            scale_factor   (Variables): [h_scale, w_scale] for input images
+            locations (Tensor): anchor points for current layer, [H*W, 2]
+            box_cls (Tensor): categories prediction, [N, C, H, W], 
+                C is the number of classes
+            box_reg (Tensor): bounding box prediction, [N, 4, H, W]
+            box_ctn (Tensor): centerness prediction, [N, 1, H, W]
+            scale_factor (Tensor): [h_scale, w_scale] for input images
         Return:
-            box_cls_ch_last  (Variables): score for each category, in [N, C, M]
+            box_cls_ch_last (Tensor): score for each category, in [N, C, M]
                 C is the number of classes and M is the number of anchor points
-            box_reg_decoding (Variables): decoded bounding box, in [N, M, 4]
+            box_reg_decoding (Tensor): decoded bounding box, in [N, M, 4]
                 last dimension is [x1, y1, x2, y2]
         """
         act_shape_cls = self._merge_hw(box_cls)
@@ -771,12 +752,18 @@ class TTFBox(object):
         self.down_ratio = down_ratio
 
     def _simple_nms(self, heat, kernel=3):
+        """
+        Use maxpool to filter the max score, get local peaks.
+        """
         pad = (kernel - 1) // 2
         hmax = F.max_pool2d(heat, kernel, stride=1, padding=pad)
         keep = paddle.cast(hmax == heat, 'float32')
         return heat * keep
 
     def _topk(self, scores):
+        """
+        Select top k scores and decode to get xy coordinates.
+        """
         k = self.max_per_img
         shape_fm = paddle.shape(scores)
         shape_fm.stop_gradient = True
diff --git a/ppdet/modeling/necks/fpn.py b/ppdet/modeling/necks/fpn.py
index 0b9f6a798bdc0d87630d96135f86cc8dc2802506..867b7dc451a85773a1e902232c260b47d08ece4a 100644
--- a/ppdet/modeling/necks/fpn.py
+++ b/ppdet/modeling/necks/fpn.py
@@ -105,10 +105,8 @@ class FPN(nn.Layer):
                         stride=1,
                         norm_type=self.norm_type,
                         norm_decay=self.norm_decay,
-                        norm_name=lateral_name + '_norm',
                         freeze_norm=self.freeze_norm,
-                        initializer=XavierUniform(fan_out=in_c),
-                        name=lateral_name))
+                        initializer=XavierUniform(fan_out=in_c)))
             else:
                 lateral = self.add_sublayer(
                     lateral_name,
@@ -131,10 +129,8 @@ class FPN(nn.Layer):
                         stride=1,
                         norm_type=self.norm_type,
                         norm_decay=self.norm_decay,
-                        norm_name=fpn_name + '_norm',
                         freeze_norm=self.freeze_norm,
-                        initializer=XavierUniform(fan_out=fan),
-                        name=fpn_name))
+                        initializer=XavierUniform(fan_out=fan)))
             else:
                 fpn_conv = self.add_sublayer(
                     fpn_name,
@@ -166,10 +162,8 @@ class FPN(nn.Layer):
                             stride=2,
                             norm_type=self.norm_type,
                             norm_decay=self.norm_decay,
-                            norm_name=extra_fpn_name + '_norm',
                             freeze_norm=self.freeze_norm,
-                            initializer=XavierUniform(fan_out=fan),
-                            name=extra_fpn_name))
+                            initializer=XavierUniform(fan_out=fan)))
                 else:
                     extra_fpn_conv = self.add_sublayer(
                         extra_fpn_name,
diff --git a/ppdet/modeling/necks/ttf_fpn.py b/ppdet/modeling/necks/ttf_fpn.py
index ef83a8b838b0e571e1348bc2e9410e970743f81d..9c7f3924f0c2f611be5aab73cfd23921226e5eec 100644
--- a/ppdet/modeling/necks/ttf_fpn.py
+++ b/ppdet/modeling/necks/ttf_fpn.py
@@ -16,7 +16,6 @@ import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
-from paddle.nn.initializer import Constant, Uniform, Normal
 from paddle.nn.initializer import Constant, Uniform, Normal, XavierUniform
 from paddle import ParamAttr
 from ppdet.core.workspace import register, serializable
@@ -28,11 +27,9 @@ from ..shape_spec import ShapeSpec
 
 __all__ = ['TTFFPN']
 
-__all__ = ['TTFFPN']
-
 
 class Upsample(nn.Layer):
-    def __init__(self, ch_in, ch_out, norm_type='bn', name=None):
+    def __init__(self, ch_in, ch_out, norm_type='bn'):
         super(Upsample, self).__init__()
         fan_in = ch_in * 3 * 3
         stdv = 1. / math.sqrt(fan_in)
@@ -49,7 +46,7 @@ class Upsample(nn.Layer):
             regularizer=L2Decay(0.))
 
         self.bn = batch_norm(
-            ch_out, norm_type=norm_type, initializer=Constant(1.), name=name)
+            ch_out, norm_type=norm_type, initializer=Constant(1.))
 
     def forward(self, feat):
         dcn = self.dcn(feat)
@@ -60,7 +57,7 @@ class Upsample(nn.Layer):
 
 
 class DeConv(nn.Layer):
-    def __init__(self, ch_in, ch_out, norm_type='bn', name=None):
+    def __init__(self, ch_in, ch_out, norm_type='bn'):
         super(DeConv, self).__init__()
         self.deconv = nn.Sequential()
         conv1 = ConvNormLayer(
@@ -69,9 +66,7 @@ class DeConv(nn.Layer):
             stride=1,
             filter_size=1,
             norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv1.norm',
-            name=name + '.conv1')
+            initializer=XavierUniform())
         conv2 = nn.Conv2DTranspose(
             in_channels=ch_out,
             out_channels=ch_out,
@@ -81,17 +76,14 @@ class DeConv(nn.Layer):
             groups=ch_out,
             weight_attr=ParamAttr(initializer=XavierUniform()),
             bias_attr=False)
-        bn = batch_norm(
-            ch_out, norm_type=norm_type, norm_decay=0., name=name + '.bn')
+        bn = batch_norm(ch_out, norm_type=norm_type, norm_decay=0.)
         conv3 = ConvNormLayer(
             ch_in=ch_out,
             ch_out=ch_out,
             stride=1,
             filter_size=1,
             norm_type=norm_type,
-            initializer=XavierUniform(),
-            norm_name=name + '.conv3.norm',
-            name=name + '.conv3')
+            initializer=XavierUniform())
 
         self.deconv.add_sublayer('conv1', conv1)
         self.deconv.add_sublayer('relu6_1', nn.ReLU6())
@@ -106,12 +98,10 @@ class DeConv(nn.Layer):
 
 
 class LiteUpsample(nn.Layer):
-    def __init__(self, ch_in, ch_out, norm_type='bn', name=None):
+    def __init__(self, ch_in, ch_out, norm_type='bn'):
         super(LiteUpsample, self).__init__()
-        self.deconv = DeConv(
-            ch_in, ch_out, norm_type=norm_type, name=name + '.deconv')
-        self.conv = LiteConv(
-            ch_in, ch_out, norm_type=norm_type, name=name + '.liteconv')
+        self.deconv = DeConv(ch_in, ch_out, norm_type=norm_type)
+        self.conv = LiteConv(ch_in, ch_out, norm_type=norm_type)
 
     def forward(self, inputs):
         deconv_up = self.deconv(inputs)
@@ -142,8 +132,7 @@ class ShortCut(nn.Layer):
                         in_channels=in_channels,
                         out_channels=ch_out,
                         with_act=i < layer_num - 1,
-                        norm_type=norm_type,
-                        name=shortcut_name))
+                        norm_type=norm_type))
             else:
                 shortcut_conv.add_sublayer(
                     shortcut_name,
@@ -158,7 +147,7 @@ class ShortCut(nn.Layer):
                 if i < layer_num - 1:
                     shortcut_conv.add_sublayer(shortcut_name + '.act',
                                                nn.ReLU())
-        self.shortcut = self.add_sublayer('short', shortcut_conv)
+        self.shortcut = self.add_sublayer('shortcut', shortcut_conv)
 
     def forward(self, feat):
         out = self.shortcut(feat)
@@ -211,10 +200,7 @@ class TTFFPN(nn.Layer):
             upsample = self.add_sublayer(
                 'upsample.' + str(i),
                 upsample_module(
-                    in_c,
-                    out_c,
-                    norm_type=norm_type,
-                    name='deconv_layers.' + str(i)))
+                    in_c, out_c, norm_type=norm_type))
             self.upsample_list.append(upsample)
             if i < self.shortcut_len:
                 shortcut = self.add_sublayer(
diff --git a/ppdet/modeling/necks/yolo_fpn.py b/ppdet/modeling/necks/yolo_fpn.py
index 873e43f0e068705cc5a5c05a294db6491d952d12..25458974aa21c10d4b3635aba05dccebd2dfd141 100644
--- a/ppdet/modeling/necks/yolo_fpn.py
+++ b/ppdet/modeling/necks/yolo_fpn.py
@@ -25,9 +25,9 @@ from ..shape_spec import ShapeSpec
 __all__ = ['YOLOv3FPN', 'PPYOLOFPN']
 
 
-def add_coord(x):
+def add_coord(x, data_format):
     b = x.shape[0]
-    if self.data_format == 'NCHW':
+    if data_format == 'NCHW':
         h = x.shape[2]
         w = x.shape[3]
     else:
@@ -35,14 +35,14 @@ def add_coord(x):
         w = x.shape[2]
 
     gx = paddle.arange(w, dtype='float32') / (w - 1.) * 2.0 - 1.
-    if self.data_format == 'NCHW':
+    if data_format == 'NCHW':
         gx = gx.reshape([1, 1, 1, w]).expand([b, 1, h, w])
     else:
         gx = gx.reshape([1, 1, w, 1]).expand([b, h, w, 1])
     gx.stop_gradient = True
 
     gy = paddle.arange(h, dtype='float32') / (h - 1.) * 2.0 - 1.
-    if self.data_format == 'NCHW':
+    if data_format == 'NCHW':
         gy = gy.reshape([1, 1, h, 1]).expand([b, 1, h, w])
     else:
         gy = gy.reshape([1, h, 1, 1]).expand([b, h, w, 1])
@@ -237,7 +237,7 @@ class CoordConv(nn.Layer):
         self.data_format = data_format
 
     def forward(self, x):
-        gx, gy = add_coord(x)
+        gx, gy = add_coord(x, self.data_format)
         if self.data_format == 'NCHW':
             y = paddle.concat([x, gx, gy], axis=1)
         else:
@@ -509,7 +509,7 @@ class PPYOLOFPN(nn.Layer):
                  norm_type='bn',
                  data_format='NCHW',
                  coord_conv=False,
-                 conv_block_num=3,
+                 conv_block_num=2,
                  drop_block=False,
                  block_size=3,
                  keep_prob=0.9,
diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py
index e2c193030f41f72a8de80ed9c5a74bf30b12525a..f190a489580e114d06b39bc10bd9868833ed5bec 100644
--- a/ppdet/modeling/ops.py
+++ b/ppdet/modeling/ops.py
@@ -53,9 +53,7 @@ def batch_norm(ch,
                norm_type='bn',
                norm_decay=0.,
                initializer=None,
-               name=None,
                data_format='NCHW'):
-    bn_name = name + '.bn'
     if norm_type == 'sync_bn':
         batch_norm = nn.SyncBatchNorm
     else: