未验证 提交 24f582fd 编写于 作者: F Feng Ni 提交者: GitHub

[doc] add fcos ttfnet coments (#2456)

* add fcos ttfnet coment, update modelzoo

* fix ttfnet coment
上级 3b02c71e
......@@ -12,9 +12,9 @@ FCOS (Fully Convolutional One-Stage Object Detection) is a fast anchor-free obje
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
| :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| ResNet50-FPN | FCOS | 2 | 1x | ---- | 39.6 | [下载链接](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/fcos/fcos_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | FCOS+DCN | 2 | 1x | ---- | 44.3 | [下载链接](https://paddledet.bj.bcebos.com/models/fcos_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/fcos/fcos_dcn_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | FCOS+multiscale_train | 2 | 2x | ---- | 41.8 | [下载链接](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_multiscale_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml) |
| ResNet50-FPN | FCOS | 2 | 1x | ---- | 39.6 | [下载链接](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/fcos/fcos_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | FCOS+DCN | 2 | 1x | ---- | 44.3 | [下载链接](https://paddledet.bj.bcebos.com/models/fcos_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/fcos/fcos_dcn_r50_fpn_1x_coco.yml) |
| ResNet50-FPN | FCOS+multiscale_train | 2 | 2x | ---- | 41.8 | [下载链接](https://paddledet.bj.bcebos.com/models/fcos_r50_fpn_multiscale_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml) |
**Notes:**
......
......@@ -47,7 +47,6 @@ FCOSPostProcess:
decode:
name: FCOSBox
num_classes: 80
batch_size: 1
nms:
name: MultiClassNMS
nms_top_k: 1000
......
......@@ -13,7 +13,7 @@ TTFNet是一种用于实时目标检测且对训练时间友好的网络,对Ce
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
| :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| DarkNet53 | TTFNet | 12 | 1x | ---- | 33.5 | [下载链接](https://paddledet.bj.bcebos.com/models/ttfnet_darknet53_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ttfnet/ttfnet_darknet53_1x_coco.yml) |
| DarkNet53 | TTFNet | 12 | 1x | ---- | 33.5 | [下载链接](https://paddledet.bj.bcebos.com/models/ttfnet_darknet53_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ttfnet/ttfnet_darknet53_1x_coco.yml) |
## Citations
```
......
......@@ -25,6 +25,16 @@ __all__ = ['FCOS']
@register
class FCOS(BaseArch):
"""
FCOS network, see https://arxiv.org/abs/1904.01355
Args:
backbone (object): backbone instance
neck (object): 'FPN' instance
fcos_head (object): 'FCOSHead' instance
post_process (object): 'FCOSPostProcess' instance
"""
__category__ = 'architecture'
__inject__ = ['fcos_post_process']
......@@ -70,7 +80,7 @@ class FCOS(BaseArch):
loss = {}
tag_labels, tag_bboxes, tag_centerness = [], [], []
for i in range(len(self.fcos_head.fpn_stride)):
# reg_target, labels, scores, centerness
# labels, reg_target, centerness
k_lbl = 'labels{}'.format(i)
if k_lbl in self.inputs:
tag_labels.append(self.inputs[k_lbl])
......@@ -90,6 +100,6 @@ class FCOS(BaseArch):
return loss
def get_pred(self):
bboxes, bbox_num = self._forward()
output = {'bbox': bboxes, 'bbox_num': bbox_num}
bbox_pred, bbox_num = self._forward()
output = {'bbox': bbox_pred, 'bbox_num': bbox_num}
return output
......@@ -28,6 +28,10 @@ from ppdet.modeling.layers import ConvNormLayer
class ScaleReg(nn.Layer):
"""
Parameter for scaling the regression outputs.
"""
def __init__(self):
super(ScaleReg, self).__init__()
self.scale_reg = self.create_parameter(
......@@ -113,12 +117,13 @@ class FCOSHead(nn.Layer):
"""
FCOSHead
Args:
num_classes(int): Number of classes
fpn_stride(list): The stride of each FPN Layer
prior_prob(float): Used to set the bias init for the class prediction layer
fcos_loss(object): Instance of 'FCOSLoss'
norm_reg_targets(bool): Normalization the regression target if true
centerness_on_reg(bool): The prediction of centerness on regression or clssification branch
fcos_feat (object): Instance of 'FCOSFeat'
num_classes (int): Number of classes
fpn_stride (list): The stride of each FPN Layer
prior_prob (float): Used to set the bias init for the class prediction layer
fcos_loss (object): Instance of 'FCOSLoss'
norm_reg_targets (bool): Normalization the regression target if true
centerness_on_reg (bool): The prediction of centerness on regression or clssification branch
"""
__inject__ = ['fcos_feat', 'fcos_loss']
__shared__ = ['num_classes']
......@@ -199,7 +204,15 @@ class FCOSHead(nn.Layer):
scale_reg = self.add_sublayer(feat_name, ScaleReg())
self.scales_regs.append(scale_reg)
def _compute_locatioins_by_level(self, fpn_stride, feature):
def _compute_locations_by_level(self, fpn_stride, feature):
"""
Compute locations of anchor points of each FPN layer
Args:
fpn_stride (int): The stride of current FPN feature map
feature (Tensor): Tensor of current FPN feature map
Return:
Anchor points locations of current FPN feature map
"""
shape_fm = paddle.shape(feature)
shape_fm.stop_gradient = True
h, w = shape_fm[2], shape_fm[3]
......@@ -247,8 +260,7 @@ class FCOSHead(nn.Layer):
if not is_training:
locations_list = []
for fpn_stride, feature in zip(self.fpn_stride, fpn_feats):
location = self._compute_locatioins_by_level(fpn_stride,
feature)
location = self._compute_locations_by_level(fpn_stride, feature)
locations_list.append(location)
return locations_list, cls_logits_list, bboxes_reg_list, centerness_list
......
......@@ -24,7 +24,15 @@ import numpy as np
@register
class HMHead(nn.Layer):
"""
Args:
ch_in (int): The channel number of input Tensor.
ch_out (int): The channel number of output Tensor.
num_classes (int): Number of classes.
conv_num (int): The convolution number of hm_feat.
Return:
Heatmap head output
"""
__shared__ = ['num_classes']
def __init__(self, ch_in, ch_out=128, num_classes=80, conv_num=2):
......@@ -65,6 +73,15 @@ class HMHead(nn.Layer):
@register
class WHHead(nn.Layer):
"""
Args:
ch_in (int): The channel number of input Tensor.
ch_out (int): The channel number of output Tensor.
conv_num (int): The convolution number of wh_feat.
Return:
Width & Height head output
"""
def __init__(self, ch_in, ch_out=64, conv_num=2):
super(WHHead, self).__init__()
head_conv = nn.Sequential()
......@@ -104,17 +121,22 @@ class TTFHead(nn.Layer):
"""
TTFHead
Args:
in_channels(int): the channel number of input to TTFHead.
num_classes(int): the number of classes, 80 by default.
hm_head_planes(int): the channel number in wh head, 128 by default.
wh_head_planes(int): the channel number in wh head, 64 by default.
hm_head_conv_num(int): the number of convolution in wh head, 2 by default.
wh_head_conv_num(int): the number of convolution in wh head, 2 by default.
hm_loss(object): Instance of 'CTFocalLoss'.
wh_loss(object): Instance of 'GIoULoss'.
wh_offset_base(flaot): the base offset of width and height, 16. by default.
down_ratio(int): the actual down_ratio is calculated by base_down_ratio(default 16)
and the number of upsample layers.
in_channels (int): the channel number of input to TTFHead.
num_classes (int): the number of classes, 80 by default.
hm_head_planes (int): the channel number in heatmap head,
128 by default.
wh_head_planes (int): the channel number in width & height head,
64 by default.
hm_head_conv_num (int): the number of convolution in heatmap head,
2 by default.
wh_head_conv_num (int): the number of convolution in width & height
head, 2 by default.
hm_loss (object): Instance of 'CTFocalLoss'.
wh_loss (object): Instance of 'GIoULoss'.
wh_offset_base (float): the base offset of width and height,
16.0 by default.
down_ratio (int): the actual down_ratio is calculated by base_down_ratio
(default 16) and the number of upsample layers.
"""
__shared__ = ['num_classes', 'down_ratio']
......@@ -154,6 +176,9 @@ class TTFHead(nn.Layer):
return hm, wh
def filter_box_by_weight(self, pred, target, weight):
"""
Filter out boxes where ttf_reg_weight is 0, only keep positive samples.
"""
index = paddle.nonzero(weight > 0)
index.stop_gradient = True
weight = paddle.gather_nd(weight, index)
......
......@@ -616,20 +616,20 @@ class AnchorGrid(object):
@register
@serializable
class FCOSBox(object):
__shared__ = ['num_classes', 'batch_size']
__shared__ = ['num_classes']
def __init__(self, num_classes=80, batch_size=1):
def __init__(self, num_classes=80):
super(FCOSBox, self).__init__()
self.num_classes = num_classes
self.batch_size = batch_size
def _merge_hw(self, inputs, ch_type="channel_first"):
"""
Merge h and w of the feature map into one dimension.
Args:
inputs (Variables): Feature map whose H and W will be merged into one dimension
ch_type (str): channel_first / channel_last
inputs (Tensor): Tensor of the input feature map
ch_type (str): "channel_first" or "channel_last" style
Return:
new_shape (Variables): The new shape after h and w merged into one dimension
new_shape (Tensor): The new shape after h and w merged
"""
shape_ = paddle.shape(inputs)
bs, ch, hi, wi = shape_[0], shape_[1], shape_[2], shape_[3]
......@@ -647,16 +647,18 @@ class FCOSBox(object):
def _postprocessing_by_level(self, locations, box_cls, box_reg, box_ctn,
scale_factor):
"""
Postprocess each layer of the output with corresponding locations.
Args:
locations (Variables): anchor points for current layer, [H*W, 2]
box_cls (Variables): categories prediction, [N, C, H, W], C is the number of classes
box_reg (Variables): bounding box prediction, [N, 4, H, W]
box_ctn (Variables): centerness prediction, [N, 1, H, W]
scale_factor (Variables): [h_scale, w_scale] for input images
locations (Tensor): anchor points for current layer, [H*W, 2]
box_cls (Tensor): categories prediction, [N, C, H, W],
C is the number of classes
box_reg (Tensor): bounding box prediction, [N, 4, H, W]
box_ctn (Tensor): centerness prediction, [N, 1, H, W]
scale_factor (Tensor): [h_scale, w_scale] for input images
Return:
box_cls_ch_last (Variables): score for each category, in [N, C, M]
box_cls_ch_last (Tensor): score for each category, in [N, C, M]
C is the number of classes and M is the number of anchor points
box_reg_decoding (Variables): decoded bounding box, in [N, M, 4]
box_reg_decoding (Tensor): decoded bounding box, in [N, M, 4]
last dimension is [x1, y1, x2, y2]
"""
act_shape_cls = self._merge_hw(box_cls)
......@@ -712,12 +714,18 @@ class TTFBox(object):
self.down_ratio = down_ratio
def _simple_nms(self, heat, kernel=3):
"""
Use maxpool to filter the max score, get local peaks.
"""
pad = (kernel - 1) // 2
hmax = F.max_pool2d(heat, kernel, stride=1, padding=pad)
keep = paddle.cast(hmax == heat, 'float32')
return heat * keep
def _topk(self, scores):
"""
Select top k scores and decode to get xy coordinates.
"""
k = self.max_per_img
shape_fm = paddle.shape(scores)
shape_fm.stop_gradient = True
......
......@@ -27,10 +27,10 @@ __all__ = ['CTFocalLoss']
@serializable
class CTFocalLoss(object):
"""
CTFocalLoss
CTFocalLoss: CornerNet & CenterNet Focal Loss
Args:
loss_weight (float): loss weight
gamma (float): gamma parameter for Focal Loss
loss_weight (float): loss weight
gamma (float): gamma parameter for Focal Loss
"""
def __init__(self, loss_weight=1., gamma=2.0):
......@@ -41,8 +41,8 @@ class CTFocalLoss(object):
"""
Calculate the loss
Args:
pred(Tensor): heatmap prediction
target(Tensor): target for positive samples
pred (Tensor): heatmap prediction
target (Tensor): target for positive samples
Return:
ct_focal_loss (Tensor): Focal Loss used in CornerNet & CenterNet.
Note that the values in target are in [0, 1] since gaussian is
......
......@@ -20,8 +20,8 @@ import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register
from ppdet.modeling import ops
INF = 1e8
__all__ = ['FCOSLoss']
......@@ -29,34 +29,21 @@ def flatten_tensor(inputs, channel_first=False):
"""
Flatten a Tensor
Args:
inputs (Tensor): 4-D Tensor with shape [N, C, H, W] or [N, H, W, C]
channel_first(bool): if true the dimension order of
Tensor is [N, C, H, W], otherwise is [N, H, W, C]
inputs (Tensor): 4-D Tensor with shape [N, C, H, W] or [N, H, W, C]
channel_first (bool): If true the dimension order of Tensor is
[N, C, H, W], otherwise is [N, H, W, C]
Return:
input_channel_last (Tensor): The flattened Tensor in channel_last style
output_channel_last (Tensor): The flattened Tensor in channel_last style
"""
if channel_first:
input_channel_last = paddle.transpose(inputs, perm=[0, 2, 3, 1])
else:
input_channel_last = inputs
output_channel_last = paddle.flatten(
input_channel_last, start_axis=0, stop_axis=2) # [N*H*W, C]
input_channel_last, start_axis=0, stop_axis=2)
return output_channel_last
def sigmoid_cross_entropy_with_logits_loss(inputs,
label,
ignore_index=-100,
normalize=False):
output = F.binary_cross_entropy_with_logits(inputs, label, reduction='none')
mask_tensor = paddle.cast(label != ignore_index, 'float32')
output = paddle.multiply(output, mask_tensor)
if normalize:
sum_valid_mask = paddle.sum(mask_tensor)
output = output / sum_valid_mask
return output
@register
class FCOSLoss(nn.Layer):
"""
......@@ -64,8 +51,8 @@ class FCOSLoss(nn.Layer):
Args:
loss_alpha (float): alpha in focal loss
loss_gamma (float): gamma in focal loss
iou_loss_type(str): location loss type, IoU/GIoU/LINEAR_IoU
reg_weights(float): weight for location loss
iou_loss_type (str): location loss type, IoU/GIoU/LINEAR_IoU
reg_weights (float): weight for location loss
"""
def __init__(self,
......@@ -83,10 +70,10 @@ class FCOSLoss(nn.Layer):
"""
Calculate the loss for location prediction
Args:
pred (Tensor): bounding boxes prediction
targets (Tensor): targets for positive samples
pred (Tensor): bounding boxes prediction
targets (Tensor): targets for positive samples
positive_mask (Tensor): mask of positive samples
weights (Tensor): weights for each positive samples
weights (Tensor): weights for each positive samples
Return:
loss (Tensor): location loss
"""
......@@ -226,8 +213,8 @@ class FCOSLoss(nn.Layer):
# 3. centerness: sigmoid_cross_entropy_with_logits_loss
centerness_flatten = paddle.squeeze(centerness_flatten, axis=-1)
ctn_loss = sigmoid_cross_entropy_with_logits_loss(centerness_flatten,
tag_center_flatten)
ctn_loss = ops.sigmoid_cross_entropy_with_logits(centerness_flatten,
tag_center_flatten)
ctn_loss = ctn_loss * mask_positive_float / num_positive_fp32
loss_all = {
......
......@@ -17,7 +17,6 @@ import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import Constant, Uniform, Normal
from paddle.nn import Conv2D, ReLU, Sequential
from paddle import ParamAttr
from ppdet.core.workspace import register, serializable
from paddle.regularizer import L2Decay
......@@ -28,8 +27,6 @@ from ..shape_spec import ShapeSpec
__all__ = ['TTFFPN']
__all__ = ['TTFFPN']
class Upsample(nn.Layer):
def __init__(self, ch_in, ch_out, name=None):
......@@ -63,7 +60,7 @@ class Upsample(nn.Layer):
class ShortCut(nn.Layer):
def __init__(self, layer_num, ch_out, name=None):
super(ShortCut, self).__init__()
shortcut_conv = Sequential()
shortcut_conv = nn.Sequential()
ch_in = ch_out * 2
for i in range(layer_num):
fan_out = 3 * 3 * ch_out
......@@ -72,7 +69,7 @@ class ShortCut(nn.Layer):
shortcut_name = name + '.conv.{}'.format(i)
shortcut_conv.add_sublayer(
shortcut_name,
Conv2D(
nn.Conv2D(
in_channels=in_channels,
out_channels=ch_out,
kernel_size=3,
......@@ -81,7 +78,7 @@ class ShortCut(nn.Layer):
bias_attr=ParamAttr(
learning_rate=2., regularizer=L2Decay(0.))))
if i < layer_num - 1:
shortcut_conv.add_sublayer(shortcut_name + '.act', ReLU())
shortcut_conv.add_sublayer(shortcut_name + '.act', nn.ReLU())
self.shortcut = self.add_sublayer('short', shortcut_conv)
def forward(self, feat):
......
......@@ -1558,7 +1558,6 @@ def sigmoid_cross_entropy_with_logits(input,
output = F.binary_cross_entropy_with_logits(input, label, reduction='none')
mask_tensor = paddle.cast(label != ignore_index, 'float32')
output = paddle.multiply(output, mask_tensor)
output = paddle.reshape(output, shape=[output.shape[0], -1])
if normalize:
sum_valid_mask = paddle.sum(mask_tensor)
output = output / sum_valid_mask
......
......@@ -181,6 +181,9 @@ class FCOSPostProcess(object):
self.nms = nms
def __call__(self, fcos_head_outs, scale_factor):
"""
Decode the bbox and do NMS in FCOS.
"""
locations, cls_logits, bboxes_reg, centerness = fcos_head_outs
bboxes, score = self.decode(locations, cls_logits, bboxes_reg,
centerness, scale_factor)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册