diff --git a/configs/fcos/_base_/fcos_r50_fpn.yml b/configs/fcos/_base_/fcos_r50_fpn.yml index 1124082ee32ded9e4ad74f2700918c030f2cad44..64a275d88023030b2299b0c3932b1c3fc9ce1e34 100644 --- a/configs/fcos/_base_/fcos_r50_fpn.yml +++ b/configs/fcos/_base_/fcos_r50_fpn.yml @@ -47,7 +47,6 @@ FCOSPostProcess: decode: name: FCOSBox num_classes: 80 - batch_size: 1 nms: name: MultiClassNMS nms_top_k: 1000 diff --git a/ppdet/modeling/backbones/darknet.py b/ppdet/modeling/backbones/darknet.py index 13af903d4c9da549af7a7581571a9a16246ee095..8d3d07a25fc07f86ad5e32ea201f2a14b5e32476 100755 --- a/ppdet/modeling/backbones/darknet.py +++ b/ppdet/modeling/backbones/darknet.py @@ -35,8 +35,8 @@ class ConvBNLayer(nn.Layer): norm_type='bn', norm_decay=0., act="leaky", - name=None, - data_format='NCHW'): + data_format='NCHW', + name=''): """ conv + bn + activation layer @@ -50,7 +50,6 @@ class ConvBNLayer(nn.Layer): norm_type (str): batch norm type, default bn norm_decay (str): decay for weight and bias of batch norm layer, default 0. act (str): activation function type, default 'leaky', which means leaky_relu - name (str): layer name data_format (str): data format, NCHW or NHWC """ super(ConvBNLayer, self).__init__() @@ -68,7 +67,6 @@ class ConvBNLayer(nn.Layer): ch_out, norm_type=norm_type, norm_decay=norm_decay, - name=name, data_format=data_format) self.act = act @@ -91,7 +89,6 @@ class DownSample(nn.Layer): padding=1, norm_type='bn', norm_decay=0., - name=None, data_format='NCHW'): """ downsample layer @@ -104,7 +101,6 @@ class DownSample(nn.Layer): padding (int): padding size, default 1 norm_type (str): batch norm type, default bn norm_decay (str): decay for weight and bias of batch norm layer, default 0. - name (str): layer name data_format (str): data format, NCHW or NHWC """ @@ -118,8 +114,7 @@ class DownSample(nn.Layer): padding=padding, norm_type=norm_type, norm_decay=norm_decay, - data_format=data_format, - name=name) + data_format=data_format) self.ch_out = ch_out def forward(self, inputs): @@ -133,7 +128,6 @@ class BasicBlock(nn.Layer): ch_out, norm_type='bn', norm_decay=0., - name=None, data_format='NCHW'): """ BasicBlock layer of DarkNet @@ -143,7 +137,6 @@ class BasicBlock(nn.Layer): ch_out (int): output channel norm_type (str): batch norm type, default bn norm_decay (str): decay for weight and bias of batch norm layer, default 0. - name (str): layer name data_format (str): data format, NCHW or NHWC """ @@ -157,8 +150,7 @@ class BasicBlock(nn.Layer): padding=0, norm_type=norm_type, norm_decay=norm_decay, - data_format=data_format, - name=name + '.0') + data_format=data_format) self.conv2 = ConvBNLayer( ch_in=ch_out, ch_out=ch_out * 2, @@ -167,8 +159,7 @@ class BasicBlock(nn.Layer): padding=1, norm_type=norm_type, norm_decay=norm_decay, - data_format=data_format, - name=name + '.1') + data_format=data_format) def forward(self, inputs): conv1 = self.conv1(inputs) @@ -205,8 +196,7 @@ class Blocks(nn.Layer): ch_out, norm_type=norm_type, norm_decay=norm_decay, - data_format=data_format, - name=name + '.0') + data_format=data_format) self.res_out_list = [] for i in range(1, count): block_name = '{}.{}'.format(name, i) @@ -217,8 +207,7 @@ class Blocks(nn.Layer): ch_out, norm_type=norm_type, norm_decay=norm_decay, - data_format=data_format, - name=block_name)) + data_format=data_format)) self.res_out_list.append(res_out) self.ch_out = ch_out @@ -272,16 +261,14 @@ class DarkNet(nn.Layer): padding=1, norm_type=norm_type, norm_decay=norm_decay, - data_format=data_format, - name='yolo_input') + data_format=data_format) self.downsample0 = DownSample( ch_in=32, ch_out=32 * 2, norm_type=norm_type, norm_decay=norm_decay, - data_format=data_format, - name='yolo_input.downsample') + data_format=data_format) self._out_channels = [] self.darknet_conv_block_list = [] @@ -311,8 +298,7 @@ class DarkNet(nn.Layer): ch_out=32 * (2**(i + 2)), norm_type=norm_type, norm_decay=norm_decay, - data_format=data_format, - name=down_name)) + data_format=data_format)) self.downsample_list.append(downsample) def forward(self, inputs): diff --git a/ppdet/modeling/heads/bbox_head.py b/ppdet/modeling/heads/bbox_head.py index 26b81d18ea6f0d0452c90dd14265718fe3c9d2f2..09796372ef81a911543374ff68b7bf16d7e64b53 100644 --- a/ppdet/modeling/heads/bbox_head.py +++ b/ppdet/modeling/heads/bbox_head.py @@ -126,10 +126,8 @@ class XConvNormHead(nn.Layer): filter_size=3, stride=1, norm_type=self.norm_type, - norm_name=head_conv_name + '_norm', freeze_norm=self.freeze_norm, - initializer=initializer, - name=head_conv_name)) + initializer=initializer)) self.bbox_head_convs.append(head_conv) fan = conv_dim * resolution * resolution diff --git a/ppdet/modeling/heads/fcos_head.py b/ppdet/modeling/heads/fcos_head.py index 1776d8c3810784df3d1052109c91d70fc5e4b675..3b8fd7f785d77ee8c18576cc4d7d71b44e86c509 100644 --- a/ppdet/modeling/heads/fcos_head.py +++ b/ppdet/modeling/heads/fcos_head.py @@ -28,6 +28,10 @@ from ppdet.modeling.layers import ConvNormLayer class ScaleReg(nn.Layer): + """ + Parameter for scaling the regression outputs. + """ + def __init__(self): super(ScaleReg, self).__init__() self.scale_reg = self.create_parameter( @@ -77,10 +81,8 @@ class FCOSFeat(nn.Layer): stride=1, norm_type=norm_type, use_dcn=use_dcn, - norm_name=cls_conv_name + '_norm', bias_on=True, - lr_scale=2., - name=cls_conv_name)) + lr_scale=2.)) self.cls_subnet_convs.append(cls_conv) reg_conv_name = 'fcos_head_reg_tower_conv_{}'.format(i) @@ -93,10 +95,8 @@ class FCOSFeat(nn.Layer): stride=1, norm_type=norm_type, use_dcn=use_dcn, - norm_name=reg_conv_name + '_norm', bias_on=True, - lr_scale=2., - name=reg_conv_name)) + lr_scale=2.)) self.reg_subnet_convs.append(reg_conv) def forward(self, fpn_feat): @@ -113,12 +113,13 @@ class FCOSHead(nn.Layer): """ FCOSHead Args: - num_classes(int): Number of classes - fpn_stride(list): The stride of each FPN Layer - prior_prob(float): Used to set the bias init for the class prediction layer - fcos_loss(object): Instance of 'FCOSLoss' - norm_reg_targets(bool): Normalization the regression target if true - centerness_on_reg(bool): The prediction of centerness on regression or clssification branch + fcos_feat (object): Instance of 'FCOSFeat' + num_classes (int): Number of classes + fpn_stride (list): The stride of each FPN Layer + prior_prob (float): Used to set the bias init for the class prediction layer + fcos_loss (object): Instance of 'FCOSLoss' + norm_reg_targets (bool): Normalization the regression target if true + centerness_on_reg (bool): The prediction of centerness on regression or clssification branch """ __inject__ = ['fcos_feat', 'fcos_loss'] __shared__ = ['num_classes'] @@ -199,7 +200,15 @@ class FCOSHead(nn.Layer): scale_reg = self.add_sublayer(feat_name, ScaleReg()) self.scales_regs.append(scale_reg) - def _compute_locatioins_by_level(self, fpn_stride, feature): + def _compute_locations_by_level(self, fpn_stride, feature): + """ + Compute locations of anchor points of each FPN layer + Args: + fpn_stride (int): The stride of current FPN feature map + feature (Tensor): Tensor of current FPN feature map + Return: + Anchor points locations of current FPN feature map + """ shape_fm = paddle.shape(feature) shape_fm.stop_gradient = True h, w = shape_fm[2], shape_fm[3] @@ -247,8 +256,7 @@ class FCOSHead(nn.Layer): if not is_training: locations_list = [] for fpn_stride, feature in zip(self.fpn_stride, fpn_feats): - location = self._compute_locatioins_by_level(fpn_stride, - feature) + location = self._compute_locations_by_level(fpn_stride, feature) locations_list.append(location) return locations_list, cls_logits_list, bboxes_reg_list, centerness_list diff --git a/ppdet/modeling/heads/mask_head.py b/ppdet/modeling/heads/mask_head.py index eea70922a483e16cc379e394235b396307391e4c..e5df8d234e1696456eca945a7a732437a1917106 100644 --- a/ppdet/modeling/heads/mask_head.py +++ b/ppdet/modeling/heads/mask_head.py @@ -63,22 +63,19 @@ class MaskFeat(nn.Layer): filter_size=3, stride=1, norm_type=self.norm_type, - norm_name=conv_name + '_norm', - initializer=KaimingNormal(fan_in=fan_conv), - name=conv_name)) + initializer=KaimingNormal(fan_in=fan_conv))) mask_conv.add_sublayer(conv_name + 'act', nn.ReLU()) else: for i in range(self.num_convs): conv_name = 'mask_inter_feat_{}'.format(i + 1) - mask_conv.add_sublayer( - conv_name, - nn.Conv2D( - in_channels=in_channel if i == 0 else out_channel, - out_channels=out_channel, - kernel_size=3, - padding=1, - weight_attr=paddle.ParamAttr( - initializer=KaimingNormal(fan_in=fan_conv)))) + conv = nn.Conv2D( + in_channels=in_channel if i == 0 else out_channel, + out_channels=out_channel, + kernel_size=3, + padding=1, + weight_attr=paddle.ParamAttr( + initializer=KaimingNormal(fan_in=fan_conv))) + mask_conv.add_sublayer(conv_name, conv) mask_conv.add_sublayer(conv_name + 'act', nn.ReLU()) mask_conv.add_sublayer( 'conv5_mask', diff --git a/ppdet/modeling/heads/solov2_head.py b/ppdet/modeling/heads/solov2_head.py index d24b0b029fc3a5a15ee4831451c918f42b2a88f6..5f15461fa7fac5b2b8ba2b642fc8082fdaa15e53 100644 --- a/ppdet/modeling/heads/solov2_head.py +++ b/ppdet/modeling/heads/solov2_head.py @@ -75,9 +75,7 @@ class SOLOv2MaskHead(nn.Layer): ch_out=self.mid_channels, filter_size=3, stride=1, - norm_type='gn', - norm_name=conv_feat_name + '.conv' + str(i) + '.gn', - name=conv_feat_name + '.conv' + str(i))) + norm_type='gn')) self.add_sublayer('conv_pre_feat' + str(i), conv_pre_feat) self.convs_all_levels.append(conv_pre_feat) else: @@ -94,9 +92,7 @@ class SOLOv2MaskHead(nn.Layer): ch_out=self.mid_channels, filter_size=3, stride=1, - norm_type='gn', - norm_name=conv_feat_name + '.conv' + str(j) + '.gn', - name=conv_feat_name + '.conv' + str(j))) + norm_type='gn')) conv_pre_feat.add_sublayer( conv_feat_name + '.conv' + str(j) + 'act', nn.ReLU()) conv_pre_feat.add_sublayer( @@ -114,9 +110,7 @@ class SOLOv2MaskHead(nn.Layer): ch_out=self.out_channels, filter_size=1, stride=1, - norm_type='gn', - norm_name=conv_pred_name + '.gn', - name=conv_pred_name)) + norm_type='gn')) def forward(self, inputs): """ @@ -216,9 +210,7 @@ class SOLOv2Head(nn.Layer): ch_out=self.seg_feat_channels, filter_size=3, stride=1, - norm_type='gn', - norm_name='bbox_head.kernel_convs.{}.gn'.format(i), - name='bbox_head.kernel_convs.{}'.format(i))) + norm_type='gn')) self.kernel_pred_convs.append(kernel_conv) ch_in = self.in_channels if i == 0 else self.seg_feat_channels cate_conv = self.add_sublayer( @@ -228,9 +220,7 @@ class SOLOv2Head(nn.Layer): ch_out=self.seg_feat_channels, filter_size=3, stride=1, - norm_type='gn', - norm_name='bbox_head.cate_convs.{}.gn'.format(i), - name='bbox_head.cate_convs.{}'.format(i))) + norm_type='gn')) self.cate_pred_convs.append(cate_conv) self.solo_kernel = self.add_sublayer( @@ -241,11 +231,9 @@ class SOLOv2Head(nn.Layer): kernel_size=3, stride=1, padding=1, - weight_attr=ParamAttr( - name="bbox_head.solo_kernel.weight", - initializer=Normal( - mean=0., std=0.01)), - bias_attr=ParamAttr(name="bbox_head.solo_kernel.bias"))) + weight_attr=ParamAttr(initializer=Normal( + mean=0., std=0.01)), + bias_attr=True)) self.solo_cate = self.add_sublayer( 'bbox_head.solo_cate', nn.Conv2D( @@ -254,14 +242,10 @@ class SOLOv2Head(nn.Layer): kernel_size=3, stride=1, padding=1, - weight_attr=ParamAttr( - name="bbox_head.solo_cate.weight", - initializer=Normal( - mean=0., std=0.01)), - bias_attr=ParamAttr( - name="bbox_head.solo_cate.bias", - initializer=Constant( - value=float(-np.log((1 - 0.01) / 0.01)))))) + weight_attr=ParamAttr(initializer=Normal( + mean=0., std=0.01)), + bias_attr=ParamAttr(initializer=Constant( + value=float(-np.log((1 - 0.01) / 0.01)))))) def _points_nms(self, heat, kernel_size=2): hmax = F.max_pool2d(heat, kernel_size=kernel_size, stride=1, padding=1) diff --git a/ppdet/modeling/heads/ssd_head.py b/ppdet/modeling/heads/ssd_head.py index 8cbbe0a460441b5414fec5826f21699834c960c0..96ed5e424d659f96778b66fe95b2c799a1dfb92f 100644 --- a/ppdet/modeling/heads/ssd_head.py +++ b/ppdet/modeling/heads/ssd_head.py @@ -28,8 +28,7 @@ class SepConvLayer(nn.Layer): out_channels, kernel_size=3, padding=1, - conv_decay=0, - name=None): + conv_decay=0): super(SepConvLayer, self).__init__() self.dw_conv = nn.Conv2D( in_channels=in_channels, @@ -38,16 +37,13 @@ class SepConvLayer(nn.Layer): stride=1, padding=padding, groups=in_channels, - weight_attr=ParamAttr( - name=name + "_dw_weights", regularizer=L2Decay(conv_decay)), + weight_attr=ParamAttr(regularizer=L2Decay(conv_decay)), bias_attr=False) self.bn = nn.BatchNorm2D( in_channels, - weight_attr=ParamAttr( - name=name + "_bn_scale", regularizer=L2Decay(0.)), - bias_attr=ParamAttr( - name=name + "_bn_offset", regularizer=L2Decay(0.))) + weight_attr=ParamAttr(regularizer=L2Decay(0.)), + bias_attr=ParamAttr(regularizer=L2Decay(0.))) self.pw_conv = nn.Conv2D( in_channels=in_channels, @@ -55,8 +51,7 @@ class SepConvLayer(nn.Layer): kernel_size=1, stride=1, padding=0, - weight_attr=ParamAttr( - name=name + "_pw_weights", regularizer=L2Decay(conv_decay)), + weight_attr=ParamAttr(regularizer=L2Decay(conv_decay)), bias_attr=False) def forward(self, x): @@ -125,8 +120,7 @@ class SSDHead(nn.Layer): out_channels=num_prior * 4, kernel_size=kernel_size, padding=padding, - conv_decay=conv_decay, - name=box_conv_name)) + conv_decay=conv_decay)) self.box_convs.append(box_conv) score_conv_name = "scores{}".format(i) @@ -146,8 +140,7 @@ class SSDHead(nn.Layer): out_channels=num_prior * self.num_classes, kernel_size=kernel_size, padding=padding, - conv_decay=conv_decay, - name=score_conv_name)) + conv_decay=conv_decay)) self.score_convs.append(score_conv) @classmethod diff --git a/ppdet/modeling/heads/ttf_head.py b/ppdet/modeling/heads/ttf_head.py index 2a676fc7f9bc8efc18c3c759836764b0d7cac62b..9e2eb6add8c4d0e4c7ea9a19a654d9d67de07e78 100644 --- a/ppdet/modeling/heads/ttf_head.py +++ b/ppdet/modeling/heads/ttf_head.py @@ -61,8 +61,7 @@ class HMHead(nn.Layer): LiteConv( in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, - norm_type=norm_type, - name=lite_name)) + norm_type=norm_type)) head_conv.add_sublayer(lite_name + '.act', nn.ReLU6()) else: if dcn_head: @@ -85,19 +84,17 @@ class HMHead(nn.Layer): bias_attr=ParamAttr( learning_rate=2., regularizer=L2Decay(0.)))) head_conv.add_sublayer(name + '.act', nn.ReLU()) - self.feat = self.add_sublayer('hm_feat', head_conv) + self.feat = head_conv bias_init = float(-np.log((1 - 0.01) / 0.01)) - self.head = self.add_sublayer( - 'hm_head', - nn.Conv2D( - in_channels=ch_out, - out_channels=num_classes, - kernel_size=1, - weight_attr=ParamAttr(initializer=Normal(0, 0.01)), - bias_attr=ParamAttr( - learning_rate=2., - regularizer=L2Decay(0.), - initializer=Constant(bias_init)))) + self.head = nn.Conv2D( + in_channels=ch_out, + out_channels=num_classes, + kernel_size=1, + weight_attr=ParamAttr(initializer=Normal(0, 0.01)), + bias_attr=ParamAttr( + learning_rate=2., + regularizer=L2Decay(0.), + initializer=Constant(bias_init))) def forward(self, feat): out = self.feat(feat) @@ -139,8 +136,7 @@ class WHHead(nn.Layer): LiteConv( in_channels=ch_in if i == 0 else ch_out, out_channels=ch_out, - norm_type=norm_type, - name=lite_name)) + norm_type=norm_type)) head_conv.add_sublayer(lite_name + '.act', nn.ReLU6()) else: if dcn_head: @@ -164,16 +160,14 @@ class WHHead(nn.Layer): learning_rate=2., regularizer=L2Decay(0.)))) head_conv.add_sublayer(name + '.act', nn.ReLU()) - self.feat = self.add_sublayer('wh_feat', head_conv) - self.head = self.add_sublayer( - 'wh_head', - nn.Conv2D( - in_channels=ch_out, - out_channels=4, - kernel_size=1, - weight_attr=ParamAttr(initializer=Normal(0, 0.001)), - bias_attr=ParamAttr( - learning_rate=2., regularizer=L2Decay(0.)))) + self.feat = head_conv + self.head = nn.Conv2D( + in_channels=ch_out, + out_channels=4, + kernel_size=1, + weight_attr=ParamAttr(initializer=Normal(0, 0.001)), + bias_attr=ParamAttr( + learning_rate=2., regularizer=L2Decay(0.))) def forward(self, feat): out = self.feat(feat) @@ -249,6 +243,9 @@ class TTFHead(nn.Layer): return hm, wh def filter_box_by_weight(self, pred, target, weight): + """ + Filter out boxes where ttf_reg_weight is 0, only keep positive samples. + """ index = paddle.nonzero(weight > 0) index.stop_gradient = True weight = paddle.gather_nd(weight, index) diff --git a/ppdet/modeling/heads/yolo_head.py b/ppdet/modeling/heads/yolo_head.py index 033089ab2fa889c9c6c0f80e1e1ed09079686be1..a0817747f68c04743afc6e7da20d1485a0fcc196 100644 --- a/ppdet/modeling/heads/yolo_head.py +++ b/ppdet/modeling/heads/yolo_head.py @@ -4,7 +4,6 @@ import paddle.nn.functional as F from paddle import ParamAttr from paddle.regularizer import L2Decay from ppdet.core.workspace import register -from ..backbones.darknet import ConvBNLayer def _de_sigmoid(x, eps=1e-7): @@ -70,7 +69,6 @@ class YOLOv3Head(nn.Layer): padding=0, data_format=data_format, bias_attr=ParamAttr(regularizer=L2Decay(0.))) - conv.skip_quant = True yolo_output = self.add_sublayer(name, conv) self.yolo_outputs.append(yolo_output) diff --git a/ppdet/modeling/layers.py b/ppdet/modeling/layers.py index a4eb3b9c2510f1359d1b399efb5e11bc568d5771..5877b5f37566e9f2e58213e785e56bdea9d330f9 100644 --- a/ppdet/modeling/layers.py +++ b/ppdet/modeling/layers.py @@ -114,21 +114,17 @@ class ConvNormLayer(nn.Layer): norm_decay=0., norm_groups=32, use_dcn=False, - norm_name=None, bias_on=False, lr_scale=1., freeze_norm=False, initializer=Normal( - mean=0., std=0.01), - name=None): + mean=0., std=0.01)): super(ConvNormLayer, self).__init__() assert norm_type in ['bn', 'sync_bn', 'gn'] if bias_on: bias_attr = ParamAttr( - name=name + "_bias", - initializer=Constant(value=0.), - learning_rate=lr_scale) + initializer=Constant(value=0.), learning_rate=lr_scale) else: bias_attr = False @@ -141,9 +137,7 @@ class ConvNormLayer(nn.Layer): padding=(filter_size - 1) // 2, groups=groups, weight_attr=ParamAttr( - name=name + "_weight", - initializer=initializer, - learning_rate=1.), + initializer=initializer, learning_rate=1.), bias_attr=bias_attr) else: # in FCOS-DCN head, specifically need learning_rate and regularizer @@ -155,23 +149,16 @@ class ConvNormLayer(nn.Layer): padding=(filter_size - 1) // 2, groups=groups, weight_attr=ParamAttr( - name=name + "_weight", - initializer=initializer, - learning_rate=1.), + initializer=initializer, learning_rate=1.), bias_attr=True, lr_scale=2., - regularizer=L2Decay(norm_decay), - name=name) + regularizer=L2Decay(norm_decay)) norm_lr = 0. if freeze_norm else 1. param_attr = ParamAttr( - name=norm_name + "_scale", - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) + learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) bias_attr = ParamAttr( - name=norm_name + "_offset", - learning_rate=norm_lr, - regularizer=L2Decay(norm_decay)) + learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) if norm_type == 'bn': self.norm = nn.BatchNorm2D( ch_out, weight_attr=param_attr, bias_attr=bias_attr) @@ -208,27 +195,21 @@ class LiteConv(nn.Layer): stride=stride, groups=in_channels, norm_type=norm_type, - initializer=XavierUniform(), - norm_name=name + '.conv1.norm', - name=name + '.conv1') + initializer=XavierUniform()) conv2 = ConvNormLayer( in_channels, out_channels, filter_size=1, stride=stride, norm_type=norm_type, - initializer=XavierUniform(), - norm_name=name + '.conv2.norm', - name=name + '.conv2') + initializer=XavierUniform()) conv3 = ConvNormLayer( out_channels, out_channels, filter_size=1, stride=stride, norm_type=norm_type, - initializer=XavierUniform(), - norm_name=name + '.conv3.norm', - name=name + '.conv3') + initializer=XavierUniform()) conv4 = ConvNormLayer( out_channels, out_channels, @@ -236,9 +217,7 @@ class LiteConv(nn.Layer): stride=stride, groups=out_channels, norm_type=norm_type, - initializer=XavierUniform(), - norm_name=name + '.conv4.norm', - name=name + '.conv4') + initializer=XavierUniform()) conv_list = [conv1, conv2, conv3, conv4] self.lite_conv.add_sublayer('conv1', conv1) self.lite_conv.add_sublayer('relu6_1', nn.ReLU6()) @@ -675,20 +654,20 @@ class AnchorGrid(object): @register @serializable class FCOSBox(object): - __shared__ = ['num_classes', 'batch_size'] + __shared__ = ['num_classes'] - def __init__(self, num_classes=80, batch_size=1): + def __init__(self, num_classes=80): super(FCOSBox, self).__init__() self.num_classes = num_classes - self.batch_size = batch_size def _merge_hw(self, inputs, ch_type="channel_first"): """ + Merge h and w of the feature map into one dimension. Args: - inputs (Variables): Feature map whose H and W will be merged into one dimension - ch_type (str): channel_first / channel_last + inputs (Tensor): Tensor of the input feature map + ch_type (str): "channel_first" or "channel_last" style Return: - new_shape (Variables): The new shape after h and w merged into one dimension + new_shape (Tensor): The new shape after h and w merged """ shape_ = paddle.shape(inputs) bs, ch, hi, wi = shape_[0], shape_[1], shape_[2], shape_[3] @@ -706,16 +685,18 @@ class FCOSBox(object): def _postprocessing_by_level(self, locations, box_cls, box_reg, box_ctn, scale_factor): """ + Postprocess each layer of the output with corresponding locations. Args: - locations (Variables): anchor points for current layer, [H*W, 2] - box_cls (Variables): categories prediction, [N, C, H, W], C is the number of classes - box_reg (Variables): bounding box prediction, [N, 4, H, W] - box_ctn (Variables): centerness prediction, [N, 1, H, W] - scale_factor (Variables): [h_scale, w_scale] for input images + locations (Tensor): anchor points for current layer, [H*W, 2] + box_cls (Tensor): categories prediction, [N, C, H, W], + C is the number of classes + box_reg (Tensor): bounding box prediction, [N, 4, H, W] + box_ctn (Tensor): centerness prediction, [N, 1, H, W] + scale_factor (Tensor): [h_scale, w_scale] for input images Return: - box_cls_ch_last (Variables): score for each category, in [N, C, M] + box_cls_ch_last (Tensor): score for each category, in [N, C, M] C is the number of classes and M is the number of anchor points - box_reg_decoding (Variables): decoded bounding box, in [N, M, 4] + box_reg_decoding (Tensor): decoded bounding box, in [N, M, 4] last dimension is [x1, y1, x2, y2] """ act_shape_cls = self._merge_hw(box_cls) @@ -771,12 +752,18 @@ class TTFBox(object): self.down_ratio = down_ratio def _simple_nms(self, heat, kernel=3): + """ + Use maxpool to filter the max score, get local peaks. + """ pad = (kernel - 1) // 2 hmax = F.max_pool2d(heat, kernel, stride=1, padding=pad) keep = paddle.cast(hmax == heat, 'float32') return heat * keep def _topk(self, scores): + """ + Select top k scores and decode to get xy coordinates. + """ k = self.max_per_img shape_fm = paddle.shape(scores) shape_fm.stop_gradient = True diff --git a/ppdet/modeling/necks/fpn.py b/ppdet/modeling/necks/fpn.py index 0b9f6a798bdc0d87630d96135f86cc8dc2802506..867b7dc451a85773a1e902232c260b47d08ece4a 100644 --- a/ppdet/modeling/necks/fpn.py +++ b/ppdet/modeling/necks/fpn.py @@ -105,10 +105,8 @@ class FPN(nn.Layer): stride=1, norm_type=self.norm_type, norm_decay=self.norm_decay, - norm_name=lateral_name + '_norm', freeze_norm=self.freeze_norm, - initializer=XavierUniform(fan_out=in_c), - name=lateral_name)) + initializer=XavierUniform(fan_out=in_c))) else: lateral = self.add_sublayer( lateral_name, @@ -131,10 +129,8 @@ class FPN(nn.Layer): stride=1, norm_type=self.norm_type, norm_decay=self.norm_decay, - norm_name=fpn_name + '_norm', freeze_norm=self.freeze_norm, - initializer=XavierUniform(fan_out=fan), - name=fpn_name)) + initializer=XavierUniform(fan_out=fan))) else: fpn_conv = self.add_sublayer( fpn_name, @@ -166,10 +162,8 @@ class FPN(nn.Layer): stride=2, norm_type=self.norm_type, norm_decay=self.norm_decay, - norm_name=extra_fpn_name + '_norm', freeze_norm=self.freeze_norm, - initializer=XavierUniform(fan_out=fan), - name=extra_fpn_name)) + initializer=XavierUniform(fan_out=fan))) else: extra_fpn_conv = self.add_sublayer( extra_fpn_name, diff --git a/ppdet/modeling/necks/ttf_fpn.py b/ppdet/modeling/necks/ttf_fpn.py index ef83a8b838b0e571e1348bc2e9410e970743f81d..9c7f3924f0c2f611be5aab73cfd23921226e5eec 100644 --- a/ppdet/modeling/necks/ttf_fpn.py +++ b/ppdet/modeling/necks/ttf_fpn.py @@ -16,7 +16,6 @@ import paddle import paddle.nn as nn import paddle.nn.functional as F from paddle import ParamAttr -from paddle.nn.initializer import Constant, Uniform, Normal from paddle.nn.initializer import Constant, Uniform, Normal, XavierUniform from paddle import ParamAttr from ppdet.core.workspace import register, serializable @@ -28,11 +27,9 @@ from ..shape_spec import ShapeSpec __all__ = ['TTFFPN'] -__all__ = ['TTFFPN'] - class Upsample(nn.Layer): - def __init__(self, ch_in, ch_out, norm_type='bn', name=None): + def __init__(self, ch_in, ch_out, norm_type='bn'): super(Upsample, self).__init__() fan_in = ch_in * 3 * 3 stdv = 1. / math.sqrt(fan_in) @@ -49,7 +46,7 @@ class Upsample(nn.Layer): regularizer=L2Decay(0.)) self.bn = batch_norm( - ch_out, norm_type=norm_type, initializer=Constant(1.), name=name) + ch_out, norm_type=norm_type, initializer=Constant(1.)) def forward(self, feat): dcn = self.dcn(feat) @@ -60,7 +57,7 @@ class Upsample(nn.Layer): class DeConv(nn.Layer): - def __init__(self, ch_in, ch_out, norm_type='bn', name=None): + def __init__(self, ch_in, ch_out, norm_type='bn'): super(DeConv, self).__init__() self.deconv = nn.Sequential() conv1 = ConvNormLayer( @@ -69,9 +66,7 @@ class DeConv(nn.Layer): stride=1, filter_size=1, norm_type=norm_type, - initializer=XavierUniform(), - norm_name=name + '.conv1.norm', - name=name + '.conv1') + initializer=XavierUniform()) conv2 = nn.Conv2DTranspose( in_channels=ch_out, out_channels=ch_out, @@ -81,17 +76,14 @@ class DeConv(nn.Layer): groups=ch_out, weight_attr=ParamAttr(initializer=XavierUniform()), bias_attr=False) - bn = batch_norm( - ch_out, norm_type=norm_type, norm_decay=0., name=name + '.bn') + bn = batch_norm(ch_out, norm_type=norm_type, norm_decay=0.) conv3 = ConvNormLayer( ch_in=ch_out, ch_out=ch_out, stride=1, filter_size=1, norm_type=norm_type, - initializer=XavierUniform(), - norm_name=name + '.conv3.norm', - name=name + '.conv3') + initializer=XavierUniform()) self.deconv.add_sublayer('conv1', conv1) self.deconv.add_sublayer('relu6_1', nn.ReLU6()) @@ -106,12 +98,10 @@ class DeConv(nn.Layer): class LiteUpsample(nn.Layer): - def __init__(self, ch_in, ch_out, norm_type='bn', name=None): + def __init__(self, ch_in, ch_out, norm_type='bn'): super(LiteUpsample, self).__init__() - self.deconv = DeConv( - ch_in, ch_out, norm_type=norm_type, name=name + '.deconv') - self.conv = LiteConv( - ch_in, ch_out, norm_type=norm_type, name=name + '.liteconv') + self.deconv = DeConv(ch_in, ch_out, norm_type=norm_type) + self.conv = LiteConv(ch_in, ch_out, norm_type=norm_type) def forward(self, inputs): deconv_up = self.deconv(inputs) @@ -142,8 +132,7 @@ class ShortCut(nn.Layer): in_channels=in_channels, out_channels=ch_out, with_act=i < layer_num - 1, - norm_type=norm_type, - name=shortcut_name)) + norm_type=norm_type)) else: shortcut_conv.add_sublayer( shortcut_name, @@ -158,7 +147,7 @@ class ShortCut(nn.Layer): if i < layer_num - 1: shortcut_conv.add_sublayer(shortcut_name + '.act', nn.ReLU()) - self.shortcut = self.add_sublayer('short', shortcut_conv) + self.shortcut = self.add_sublayer('shortcut', shortcut_conv) def forward(self, feat): out = self.shortcut(feat) @@ -211,10 +200,7 @@ class TTFFPN(nn.Layer): upsample = self.add_sublayer( 'upsample.' + str(i), upsample_module( - in_c, - out_c, - norm_type=norm_type, - name='deconv_layers.' + str(i))) + in_c, out_c, norm_type=norm_type)) self.upsample_list.append(upsample) if i < self.shortcut_len: shortcut = self.add_sublayer( diff --git a/ppdet/modeling/necks/yolo_fpn.py b/ppdet/modeling/necks/yolo_fpn.py index 873e43f0e068705cc5a5c05a294db6491d952d12..25458974aa21c10d4b3635aba05dccebd2dfd141 100644 --- a/ppdet/modeling/necks/yolo_fpn.py +++ b/ppdet/modeling/necks/yolo_fpn.py @@ -25,9 +25,9 @@ from ..shape_spec import ShapeSpec __all__ = ['YOLOv3FPN', 'PPYOLOFPN'] -def add_coord(x): +def add_coord(x, data_format): b = x.shape[0] - if self.data_format == 'NCHW': + if data_format == 'NCHW': h = x.shape[2] w = x.shape[3] else: @@ -35,14 +35,14 @@ def add_coord(x): w = x.shape[2] gx = paddle.arange(w, dtype='float32') / (w - 1.) * 2.0 - 1. - if self.data_format == 'NCHW': + if data_format == 'NCHW': gx = gx.reshape([1, 1, 1, w]).expand([b, 1, h, w]) else: gx = gx.reshape([1, 1, w, 1]).expand([b, h, w, 1]) gx.stop_gradient = True gy = paddle.arange(h, dtype='float32') / (h - 1.) * 2.0 - 1. - if self.data_format == 'NCHW': + if data_format == 'NCHW': gy = gy.reshape([1, 1, h, 1]).expand([b, 1, h, w]) else: gy = gy.reshape([1, h, 1, 1]).expand([b, h, w, 1]) @@ -237,7 +237,7 @@ class CoordConv(nn.Layer): self.data_format = data_format def forward(self, x): - gx, gy = add_coord(x) + gx, gy = add_coord(x, self.data_format) if self.data_format == 'NCHW': y = paddle.concat([x, gx, gy], axis=1) else: @@ -509,7 +509,7 @@ class PPYOLOFPN(nn.Layer): norm_type='bn', data_format='NCHW', coord_conv=False, - conv_block_num=3, + conv_block_num=2, drop_block=False, block_size=3, keep_prob=0.9, diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index e2c193030f41f72a8de80ed9c5a74bf30b12525a..f190a489580e114d06b39bc10bd9868833ed5bec 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -53,9 +53,7 @@ def batch_norm(ch, norm_type='bn', norm_decay=0., initializer=None, - name=None, data_format='NCHW'): - bn_name = name + '.bn' if norm_type == 'sync_bn': batch_norm = nn.SyncBatchNorm else: