未验证 提交 f87637de 编写于 作者: F Feng Ni 提交者: GitHub

[Dygraph] from config for TTFNet (#2132)

* fit ttfnet
上级 d48b3ff7
...@@ -13,7 +13,7 @@ TTFNet是一种用于实时目标检测且对训练时间友好的网络,对Ce ...@@ -13,7 +13,7 @@ TTFNet是一种用于实时目标检测且对训练时间友好的网络,对Ce
| 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 | | 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
| :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: | | :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
| DarkNet53 | TTFNet | 12 | 1x | ---- | 33.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ttfnet_darknet53_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ttfnet/ttfnet_darknet53_1x_coco.yml) | | DarkNet53 | TTFNet | 12 | 1x | ---- | 33.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ttfnet_darknet53_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ttfnet/ttfnet_darknet53_1x_coco.yml) |
## Citations ## Citations
``` ```
......
...@@ -11,26 +11,14 @@ TTFNet: ...@@ -11,26 +11,14 @@ TTFNet:
DarkNet: DarkNet:
depth: 53 depth: 53
freeze_at: 0 freeze_at: 0
return_idx: [0, 1, 2, 3, 4] return_idx: [1, 2, 3, 4]
norm_type: bn norm_type: bn
norm_decay: 0.0004 norm_decay: 0.0004
TTFFPN: # use default config
planes: [256, 128, 64] # TTFFPN:
shortcut_num: [1, 2, 3]
ch_in: [1024, 256, 128]
TTFHead: TTFHead:
hm_head:
name: HMHead
ch_in: 64
ch_out: 128
conv_num: 2
wh_head:
name: WHHead
ch_in: 64
ch_out: 64
conv_num: 2
hm_loss: hm_loss:
name: CTFocalLoss name: CTFocalLoss
loss_weight: 1. loss_weight: 1.
...@@ -39,7 +27,6 @@ TTFHead: ...@@ -39,7 +27,6 @@ TTFHead:
loss_weight: 5. loss_weight: 5.
reduction: sum reduction: sum
BBoxPostProcess: BBoxPostProcess:
decode: decode:
name: TTFBox name: TTFBox
......
...@@ -17,7 +17,7 @@ from __future__ import division ...@@ -17,7 +17,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle import paddle
from ppdet.core.workspace import register from ppdet.core.workspace import register, create
from .meta_arch import BaseArch from .meta_arch import BaseArch
__all__ = ['TTFNet'] __all__ = ['TTFNet']
...@@ -36,12 +36,7 @@ class TTFNet(BaseArch): ...@@ -36,12 +36,7 @@ class TTFNet(BaseArch):
""" """
__category__ = 'architecture' __category__ = 'architecture'
__inject__ = [ __inject__ = ['post_process']
'backbone',
'neck',
'ttf_head',
'post_process',
]
def __init__(self, def __init__(self,
backbone='DarkNet', backbone='DarkNet',
...@@ -54,32 +49,55 @@ class TTFNet(BaseArch): ...@@ -54,32 +49,55 @@ class TTFNet(BaseArch):
self.ttf_head = ttf_head self.ttf_head = ttf_head
self.post_process = post_process self.post_process = post_process
def model_arch(self, ): @classmethod
# Backbone def from_config(cls, cfg, *args, **kwargs):
backbone = create(cfg['backbone'])
kwargs = {'input_shape': backbone.out_shape}
neck = create(cfg['neck'], **kwargs)
kwargs = {'input_shape': neck.out_shape}
ttf_head = create(cfg['ttf_head'], **kwargs)
return {
'backbone': backbone,
'neck': neck,
"ttf_head": ttf_head,
}
def _forward(self):
body_feats = self.backbone(self.inputs) body_feats = self.backbone(self.inputs)
# neck
body_feats = self.neck(body_feats) body_feats = self.neck(body_feats)
# TTF Head hm, wh = self.ttf_head(body_feats)
self.hm, self.wh = self.ttf_head(body_feats) if self.training:
return hm, wh
else:
bbox, bbox_num = self.post_process(hm, wh, self.inputs['im_shape'],
self.inputs['scale_factor'])
return bbox, bbox_num
def get_loss(self, ): def get_loss(self, ):
loss = {} loss = {}
heatmap = self.inputs['ttf_heatmap'] heatmap = self.inputs['ttf_heatmap']
box_target = self.inputs['ttf_box_target'] box_target = self.inputs['ttf_box_target']
reg_weight = self.inputs['ttf_reg_weight'] reg_weight = self.inputs['ttf_reg_weight']
head_loss = self.ttf_head.get_loss(self.hm, self.wh, heatmap, hm, wh = self._forward()
box_target, reg_weight) head_loss = self.ttf_head.get_loss(hm, wh, heatmap, box_target,
reg_weight)
loss.update(head_loss) loss.update(head_loss)
total_loss = paddle.add_n(list(loss.values())) total_loss = paddle.add_n(list(loss.values()))
loss.update({'loss': total_loss}) loss.update({'loss': total_loss})
return loss return loss
def get_pred(self): def get_pred(self):
bbox, bbox_num = self.post_process(self.hm, self.wh, bbox_pred, bbox_num = self._forward()
self.inputs['im_shape'], label = bbox_pred[:, 0]
self.inputs['scale_factor']) score = bbox_pred[:, 1]
outs = { bbox = bbox_pred[:, 2:]
output = {
"bbox": bbox, "bbox": bbox,
'score': score,
'label': label,
"bbox_num": bbox_num, "bbox_num": bbox_num,
} }
return outs return output
...@@ -104,34 +104,50 @@ class TTFHead(nn.Layer): ...@@ -104,34 +104,50 @@ class TTFHead(nn.Layer):
""" """
TTFHead TTFHead
Args: Args:
hm_head(object): Instance of 'HMHead', heatmap branch. in_channels(int): the channel number of input to TTFHead.
wh_head(object): Instance of 'WHHead', wh branch. num_classes(int): the number of classes, 80 by default.
hm_head_planes(int): the channel number in wh head, 128 by default.
wh_head_planes(int): the channel number in wh head, 64 by default.
hm_head_conv_num(int): the number of convolution in wh head, 2 by default.
wh_head_conv_num(int): the number of convolution in wh head, 2 by default.
hm_loss(object): Instance of 'CTFocalLoss'. hm_loss(object): Instance of 'CTFocalLoss'.
wh_loss(object): Instance of 'GIoULoss'. wh_loss(object): Instance of 'GIoULoss'.
wh_offset_base(flaot): the base offset of width and height, 16. by default. wh_offset_base(flaot): the base offset of width and height, 16. by default.
down_ratio(int): the actual down_ratio is calculated by base_down_ratio(default 16) a down_ratio(int): the actual down_ratio is calculated by base_down_ratio(default 16)
nd the number of upsample layers. and the number of upsample layers.
""" """
__shared__ = ['down_ratio'] __shared__ = ['num_classes', 'down_ratio']
__inject__ = ['hm_head', 'wh_head', 'hm_loss', 'wh_loss'] __inject__ = ['hm_loss', 'wh_loss']
def __init__(self, def __init__(self,
hm_head='HMHead', in_channels,
wh_head='WHHead', num_classes=80,
hm_head_planes=128,
wh_head_planes=64,
hm_head_conv_num=2,
wh_head_conv_num=2,
hm_loss='CTFocalLoss', hm_loss='CTFocalLoss',
wh_loss='GIoULoss', wh_loss='GIoULoss',
wh_offset_base=16., wh_offset_base=16.,
down_ratio=4): down_ratio=4):
super(TTFHead, self).__init__() super(TTFHead, self).__init__()
self.hm_head = hm_head self.in_channels = in_channels
self.wh_head = wh_head self.hm_head = HMHead(in_channels, hm_head_planes, num_classes,
hm_head_conv_num)
self.wh_head = WHHead(in_channels, wh_head_planes, wh_head_conv_num)
self.hm_loss = hm_loss self.hm_loss = hm_loss
self.wh_loss = wh_loss self.wh_loss = wh_loss
self.wh_offset_base = wh_offset_base self.wh_offset_base = wh_offset_base
self.down_ratio = down_ratio self.down_ratio = down_ratio
@classmethod
def from_config(cls, cfg, input_shape):
if isinstance(input_shape, (list, tuple)):
input_shape = input_shape[0]
return {'in_channels': input_shape.channels, }
def forward(self, feats): def forward(self, feats):
hm = self.hm_head(feats) hm = self.hm_head(feats)
wh = self.wh_head(feats) * self.wh_offset_base wh = self.wh_head(feats) * self.wh_offset_base
......
...@@ -24,6 +24,9 @@ from paddle.regularizer import L2Decay ...@@ -24,6 +24,9 @@ from paddle.regularizer import L2Decay
from ppdet.modeling.layers import DeformableConvV2 from ppdet.modeling.layers import DeformableConvV2
import math import math
from ppdet.modeling.ops import batch_norm from ppdet.modeling.ops import batch_norm
from ..shape_spec import ShapeSpec
__all__ = ['TTFFPN']
__all__ = ['TTFFPN'] __all__ = ['TTFFPN']
...@@ -89,22 +92,33 @@ class ShortCut(nn.Layer): ...@@ -89,22 +92,33 @@ class ShortCut(nn.Layer):
@register @register
@serializable @serializable
class TTFFPN(nn.Layer): class TTFFPN(nn.Layer):
"""
Args:
in_channels (list): number of input feature channels from backbone.
[128,256,512,1024] by default, means the channels of DarkNet53
backbone return_idx [1,2,3,4].
shortcut_num (list): the number of convolution layers in each shortcut.
[3,2,1] by default, means DarkNet53 backbone return_idx_1 has 3 convs
in its shortcut, return_idx_2 has 2 convs and return_idx_3 has 1 conv.
"""
def __init__(self, def __init__(self,
planes=[256, 128, 64], in_channels=[128, 256, 512, 1024],
shortcut_num=[1, 2, 3], shortcut_num=[3, 2, 1]):
ch_in=[1024, 256, 128]):
super(TTFFPN, self).__init__() super(TTFFPN, self).__init__()
self.planes = planes self.planes = [c // 2 for c in in_channels[:-1]][::-1]
self.shortcut_num = shortcut_num self.shortcut_num = shortcut_num[::-1]
self.shortcut_len = len(shortcut_num) self.shortcut_len = len(shortcut_num)
self.ch_in = ch_in self.ch_in = in_channels[::-1]
self.upsample_list = [] self.upsample_list = []
self.shortcut_list = [] self.shortcut_list = []
for i, out_c in enumerate(self.planes): for i, out_c in enumerate(self.planes):
in_c = self.ch_in[i] if i == 0 else self.ch_in[i] // 2
upsample = self.add_sublayer( upsample = self.add_sublayer(
'upsample.' + str(i), 'upsample.' + str(i),
Upsample( Upsample(
self.ch_in[i], out_c, name='upsample.' + str(i))) in_c, out_c, name='upsample.' + str(i)))
self.upsample_list.append(upsample) self.upsample_list.append(upsample)
if i < self.shortcut_len: if i < self.shortcut_len:
shortcut = self.add_sublayer( shortcut = self.add_sublayer(
...@@ -121,3 +135,11 @@ class TTFFPN(nn.Layer): ...@@ -121,3 +135,11 @@ class TTFFPN(nn.Layer):
shortcut = self.shortcut_list[i](inputs[-i - 2]) shortcut = self.shortcut_list[i](inputs[-i - 2])
feat = feat + shortcut feat = feat + shortcut
return feat return feat
@classmethod
def from_config(cls, cfg, input_shape):
return {'in_channels': [i.channels for i in input_shape], }
@property
def out_shape(self):
return [ShapeSpec(channels=self.planes[-1], )]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册