import paddle.fluid as fluid import paddle import paddle.nn as nn import paddle.nn.functional as F from paddle import ParamAttr from paddle.fluid.regularizer import L2Decay from ppdet.core.workspace import register from ..backbone.darknet import ConvBNLayer class YoloDetBlock(nn.Layer): def __init__(self, ch_in, channel, name): super(YoloDetBlock, self).__init__() self.ch_in = ch_in self.channel = channel assert channel % 2 == 0, \ "channel {} cannot be divided by 2".format(channel) conv_def = [ ['conv0', ch_in, channel, 1, '.0.0'], ['conv1', channel, channel * 2, 3, '.0.1'], ['conv2', channel * 2, channel, 1, '.1.0'], ['conv3', channel, channel * 2, 3, '.1.1'], ['route', channel * 2, channel, 1, '.2'], #['tip', channel, channel * 2, 3], ] self.conv_module = nn.Sequential() for idx, (conv_name, ch_in, ch_out, filter_size, post_name) in enumerate(conv_def): self.conv_module.add_sublayer( conv_name, ConvBNLayer( ch_in=ch_in, ch_out=ch_out, filter_size=filter_size, padding=(filter_size - 1) // 2, name=name + post_name)) self.tip = ConvBNLayer( ch_in=channel, ch_out=channel * 2, filter_size=3, padding=1, name=name + '.tip') def forward(self, inputs): route = self.conv_module(inputs) tip = self.tip(route) return route, tip @register class YOLOFeat(nn.Layer): __shared__ = ['num_levels'] def __init__(self, feat_in_list=[1024, 768, 384], num_levels=3): super(YOLOFeat, self).__init__() self.feat_in_list = feat_in_list self.yolo_blocks = [] self.route_blocks = [] self.num_levels = num_levels for i in range(self.num_levels): name = 'yolo_block.{}'.format(i) yolo_block = self.add_sublayer( name, YoloDetBlock( feat_in_list[i], channel=512 // (2**i), name=name)) self.yolo_blocks.append(yolo_block) if i < self.num_levels - 1: name = 'yolo_transition.{}'.format(i) route = self.add_sublayer( name, ConvBNLayer( ch_in=512 // (2**i), ch_out=256 // (2**i), filter_size=1, stride=1, padding=0, name=name)) self.route_blocks.append(route) def forward(self, body_feats): assert len(body_feats) == self.num_levels body_feats = body_feats[::-1] yolo_feats = [] for i, block in enumerate(body_feats): if i > 0: block = paddle.concat([route, block], axis=1) route, tip = self.yolo_blocks[i](block) yolo_feats.append(tip) if i < self.num_levels - 1: route = self.route_blocks[i](route) route = F.interpolate(route, scale_factor=2.) return yolo_feats @register class YOLOv3Head(nn.Layer): __shared__ = ['num_classes', 'num_levels', 'use_fine_grained_loss'] __inject__ = ['yolo_feat'] def __init__(self, yolo_feat, num_classes=80, anchor_per_position=3, num_levels=3, use_fine_grained_loss=False, ignore_thresh=0.7, downsample=32, label_smooth=True): super(YOLOv3Head, self).__init__() self.num_classes = num_classes self.anchor_per_position = anchor_per_position self.yolo_feat = yolo_feat self.num_levels = num_levels self.use_fine_grained_loss = use_fine_grained_loss self.ignore_thresh = ignore_thresh self.downsample = downsample self.label_smooth = label_smooth self.yolo_out_list = [] for i in range(num_levels): # TODO: optim here #num_filters = len(cfg.anchor_masks[i]) * (self.num_classes + 5) num_filters = self.anchor_per_position * (self.num_classes + 5) name = 'yolo_output.{}'.format(i) yolo_out = self.add_sublayer( name, nn.Conv2d( in_channels=1024 // (2**i), out_channels=num_filters, kernel_size=1, stride=1, padding=0, weight_attr=ParamAttr(name=name + '.conv.weights'), bias_attr=ParamAttr( name=name + '.conv.bias', regularizer=L2Decay(0.)))) self.yolo_out_list.append(yolo_out) def forward(self, body_feats): assert len(body_feats) == self.num_levels yolo_feats = self.yolo_feat(body_feats) yolo_head_out = [] for i, feat in enumerate(yolo_feats): yolo_out = self.yolo_out_list[i](feat) yolo_head_out.append(yolo_out) return yolo_head_out def loss(self, inputs, head_out, anchors, anchor_masks, mask_anchors): if self.use_fine_grained_loss: raise NotImplementedError yolo_losses = [] for i, out in enumerate(head_out): loss = fluid.layers.yolov3_loss( x=out, gt_box=inputs['gt_bbox'], gt_label=inputs['gt_class'], gt_score=inputs['gt_score'], anchors=anchors, anchor_mask=anchor_masks[i], class_num=self.num_classes, ignore_thresh=self.ignore_thresh, downsample_ratio=self.downsample // 2**i, use_label_smooth=self.label_smooth, name='yolo_loss_' + str(i)) loss = fluid.layers.reduce_mean(loss) yolo_losses.append(loss) return {'loss': sum(yolo_losses)}