未验证 提交 df151054 编写于 作者: F FDInSky 提交者: GitHub

test=dygraph add cascade rcnn/mask (#1013)

add cascade rcnn and modify make net method
add new cfg method 
add auto debug tool
上级 90e8a508
from . import ops from . import ops
from . import anchor from . import bbox
from . import mask
from . import backbone from . import backbone
from . import head from . import head
from . import architecture from . import architecture
from .ops import * from .ops import *
from .anchor import * from .bbox import *
from .mask import *
from .backbone import * from .backbone import *
from .head import * from .head import *
from .architecture import * from .architecture import *
...@@ -9,8 +9,10 @@ from . import meta_arch ...@@ -9,8 +9,10 @@ from . import meta_arch
from . import faster_rcnn from . import faster_rcnn
from . import mask_rcnn from . import mask_rcnn
from . import yolo from . import yolo
from . import cascade_rcnn
from .meta_arch import * from .meta_arch import *
from .faster_rcnn import * from .faster_rcnn import *
from .mask_rcnn import * from .mask_rcnn import *
from .yolo import * from .yolo import *
from .cascade_rcnn import *
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from ppdet.core.workspace import register
from .meta_arch import BaseArch
__all__ = ['CascadeRCNN']
@register
class CascadeRCNN(BaseArch):
__category__ = 'architecture'
__shared__ = ['num_stages']
__inject__ = [
'anchor',
'proposal',
'mask',
'backbone',
'rpn_head',
'bbox_head',
'mask_head',
]
def __init__(self,
anchor,
proposal,
mask,
backbone,
rpn_head,
bbox_head,
mask_head,
num_stages=3,
*args,
**kwargs):
super(CascadeRCNN, self).__init__(*args, **kwargs)
self.anchor = anchor
self.proposal = proposal
self.mask = mask
self.backbone = backbone
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.mask_head = mask_head
self.num_stages = num_stages
def model_arch(self, ):
# Backbone
bb_out = self.backbone(self.gbd)
self.gbd.update(bb_out)
# RPN
rpn_head_out = self.rpn_head(self.gbd)
self.gbd.update(rpn_head_out)
# Anchor
anchor_out = self.anchor(self.gbd)
self.gbd.update(anchor_out)
self.gbd['stage'] = 0
for i in range(self.num_stages):
self.gbd.update_v('stage', i)
# Proposal BBox
proposal_out = self.proposal(self.gbd)
self.gbd.update({"proposal_" + str(i): proposal_out})
# BBox Head
bbox_head_out = self.bbox_head(self.gbd)
self.gbd.update({'bbox_head_' + str(i): bbox_head_out})
refine_bbox_out = self.proposal.refine_bbox(self.gbd)
self.gbd['proposal_' + str(i)].update(refine_bbox_out)
if self.gbd['mode'] == 'infer':
bbox_out = self.proposal.post_process(self.gbd)
self.gbd.update(bbox_out)
# Mask
mask_out = self.mask(self.gbd)
self.gbd.update(mask_out)
# Mask Head
mask_head_out = self.mask_head(self.gbd)
self.gbd.update(mask_head_out)
if self.gbd['mode'] == 'infer':
mask_out = self.mask.post_process(self.gbd)
self.gbd.update(mask_out)
def loss(self, ):
outs = {}
losses = []
rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd)
outs['loss_rpn_cls'] = rpn_cls_loss
outs['loss_rpn_reg'] = rpn_reg_loss
losses.extend([rpn_cls_loss, rpn_reg_loss])
bbox_cls_loss_list = []
bbox_reg_loss_list = []
for i in range(self.num_stages):
self.gbd.update_v('stage', i)
bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd)
bbox_cls_loss_list.append(bbox_cls_loss)
bbox_reg_loss_list.append(bbox_reg_loss)
outs['loss_bbox_cls_' + str(i)] = bbox_cls_loss
outs['loss_bbox_reg_' + str(i)] = bbox_reg_loss
losses.extend(bbox_cls_loss_list)
losses.extend(bbox_reg_loss_list)
mask_loss = self.mask_head.loss(self.gbd)
outs['mask_loss'] = mask_loss
losses.append(mask_loss)
loss = fluid.layers.sum(losses)
outs['loss'] = loss
return outs
def infer(self, ):
outs = {
'bbox': self.gbd['predicted_bbox'].numpy(),
'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(),
'mask': self.gbd['predicted_mask'].numpy(),
'im_id': self.gbd['im_id'].numpy(),
'im_shape': self.gbd['im_shape'].numpy()
}
return inputs
...@@ -3,7 +3,6 @@ from __future__ import division ...@@ -3,7 +3,6 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from paddle import fluid from paddle import fluid
from ppdet.core.workspace import register from ppdet.core.workspace import register
from .meta_arch import BaseArch from .meta_arch import BaseArch
...@@ -21,27 +20,16 @@ class FasterRCNN(BaseArch): ...@@ -21,27 +20,16 @@ class FasterRCNN(BaseArch):
'bbox_head', 'bbox_head',
] ]
def __init__(self, def __init__(self, anchor, proposal, backbone, rpn_head, bbox_head, *args,
anchor, **kwargs):
proposal, super(FasterRCNN, self).__init__(*args, **kwargs)
backbone,
rpn_head,
bbox_head,
rpn_only=False,
mode='train'):
super(FasterRCNN, self).__init__()
self.anchor = anchor self.anchor = anchor
self.proposal = proposal self.proposal = proposal
self.backbone = backbone self.backbone = backbone
self.rpn_head = rpn_head self.rpn_head = rpn_head
self.bbox_head = bbox_head self.bbox_head = bbox_head
self.rpn_only = rpn_only
self.mode = mode
def forward(self, inputs, inputs_keys):
self.gbd = self.build_inputs(inputs, inputs_keys)
self.gbd['mode'] = self.mode
def model_arch(self, ):
# Backbone # Backbone
bb_out = self.backbone(self.gbd) bb_out = self.backbone(self.gbd)
self.gbd.update(bb_out) self.gbd.update(bb_out)
...@@ -55,29 +43,21 @@ class FasterRCNN(BaseArch): ...@@ -55,29 +43,21 @@ class FasterRCNN(BaseArch):
self.gbd.update(anchor_out) self.gbd.update(anchor_out)
# Proposal BBox # Proposal BBox
self.gbd['stage'] = 0
proposal_out = self.proposal(self.gbd) proposal_out = self.proposal(self.gbd)
self.gbd.update(proposal_out) self.gbd.update({'proposal_0': proposal_out})
# BBox Head # BBox Head
bbox_head_out = self.bbox_head(self.gbd) bboxhead_out = self.bbox_head(self.gbd)
self.gbd.update(bbox_head_out) self.gbd.update({'bbox_head_0': bboxhead_out})
if self.gbd['mode'] == 'infer': if self.gbd['mode'] == 'infer':
bbox_out = self.proposal.post_process(self.gbd) bbox_out = self.proposal.post_process(self.gbd)
self.gbd.update(bbox_out) self.gbd.update(bbox_out)
# result def loss(self, ):
if self.gbd['mode'] == 'train': rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd)
return self.loss(self.gbd) bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd)
elif self.gbd['mode'] == 'infer':
return self.infer(self.gbd)
else:
raise "Now, only support train or infer mode!"
def loss(self, inputs):
losses = []
rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(inputs)
bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(inputs)
losses = [rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss] losses = [rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss]
loss = fluid.layers.sum(losses) loss = fluid.layers.sum(losses)
out = { out = {
...@@ -89,11 +69,11 @@ class FasterRCNN(BaseArch): ...@@ -89,11 +69,11 @@ class FasterRCNN(BaseArch):
} }
return out return out
def infer(self, inputs): def infer(self, ):
outs = { outs = {
"bbox": inputs['predicted_bbox'].numpy(), "bbox": self.gbd['predicted_bbox'].numpy(),
"bbox_nums": inputs['predicted_bbox_nums'].numpy(), "bbox_nums": self.gbd['predicted_bbox_nums'].numpy(),
'im_id': inputs['im_id'].numpy(), 'im_id': self.gbd['im_id'].numpy(),
'im_shape': inputs['im_shape'].numpy() 'im_shape': self.gbd['im_shape'].numpy()
} }
return outs return outs
...@@ -3,11 +3,10 @@ from __future__ import division ...@@ -3,11 +3,10 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from paddle import fluid from paddle import fluid
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ppdet.utils.data_structure import BufferDict from ppdet.utils.data_structure import BufferDict
from .meta_arch import BaseArch from .meta_arch import BaseArch
__all__ = ['MaskRCNN'] __all__ = ['MaskRCNN']
...@@ -24,18 +23,9 @@ class MaskRCNN(BaseArch): ...@@ -24,18 +23,9 @@ class MaskRCNN(BaseArch):
'mask_head', 'mask_head',
] ]
def __init__(self, def __init__(self, anchor, proposal, mask, backbone, rpn_head, bbox_head,
anchor, mask_head, *args, **kwargs):
proposal, super(MaskRCNN, self).__init__(*args, **kwargs)
mask,
backbone,
rpn_head,
bbox_head,
mask_head,
rpn_only=False,
mode='train'):
super(MaskRCNN, self).__init__()
self.anchor = anchor self.anchor = anchor
self.proposal = proposal self.proposal = proposal
self.mask = mask self.mask = mask
...@@ -43,12 +33,8 @@ class MaskRCNN(BaseArch): ...@@ -43,12 +33,8 @@ class MaskRCNN(BaseArch):
self.rpn_head = rpn_head self.rpn_head = rpn_head
self.bbox_head = bbox_head self.bbox_head = bbox_head
self.mask_head = mask_head self.mask_head = mask_head
self.mode = mode
def forward(self, inputs, inputs_keys):
self.gbd = self.build_inputs(inputs, inputs_keys)
self.gbd['mode'] = mode
def model_arch(self, ):
# Backbone # Backbone
bb_out = self.backbone(self.gbd) bb_out = self.backbone(self.gbd)
self.gbd.update(bb_out) self.gbd.update(bb_out)
...@@ -62,12 +48,13 @@ class MaskRCNN(BaseArch): ...@@ -62,12 +48,13 @@ class MaskRCNN(BaseArch):
self.gbd.update(anchor_out) self.gbd.update(anchor_out)
# Proposal BBox # Proposal BBox
self.gbd['stage'] = 0
proposal_out = self.proposal(self.gbd) proposal_out = self.proposal(self.gbd)
self.gbd.update(proposal_out) self.gbd.update({'proposal_0': proposal_out})
# BBox Head # BBox Head
bbox_head_out = self.bbox_head(self.gbd) bboxhead_out = self.bbox_head(self.gbd)
self.gbd.update(bbox_head_out) self.gbd.update({'bbox_head_0': bboxhead_out})
if self.gbd['mode'] == 'infer': if self.gbd['mode'] == 'infer':
bbox_out = self.proposal.post_process(self.gbd) bbox_out = self.proposal.post_process(self.gbd)
...@@ -85,19 +72,11 @@ class MaskRCNN(BaseArch): ...@@ -85,19 +72,11 @@ class MaskRCNN(BaseArch):
mask_out = self.mask.post_process(self.gbd) mask_out = self.mask.post_process(self.gbd)
self.gbd.update(mask_out) self.gbd.update(mask_out)
# result def loss(self, ):
if self.gbd['mode'] == 'train':
return self.loss(self.gbd)
elif self.gbd['mode'] == 'infer':
self.infer(self.gbd)
else:
raise "Now, only support train or infer mode!"
def loss(self, inputs):
losses = [] losses = []
rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(inputs) rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd)
bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(inputs) bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd)
mask_loss = self.mask_head.loss(inputs) mask_loss = self.mask_head.loss(self.gbd)
losses = [ losses = [
rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss, mask_loss rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss, mask_loss
] ]
...@@ -112,12 +91,12 @@ class MaskRCNN(BaseArch): ...@@ -112,12 +91,12 @@ class MaskRCNN(BaseArch):
} }
return out return out
def infer(self, inputs): def infer(self, ):
outs = { outs = {
'bbox': inputs['predicted_bbox'].numpy(), 'bbox': self.gbd['predicted_bbox'].numpy(),
'bbox_nums': inputs['predicted_bbox_nums'].numpy(), 'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(),
'mask': inputs['predicted_mask'].numpy(), 'mask': self.gbd['predicted_mask'].numpy(),
'im_id': inputs['im_id'].numpy(), 'im_id': self.gbd['im_id'].numpy(),
'im_shape': inputs['im_shape'].numpy() 'im_shape': self.gbd['im_shape'].numpy()
} }
return inputs return inputs
...@@ -3,11 +3,8 @@ from __future__ import division ...@@ -3,11 +3,8 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
from paddle import fluid
from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ppdet.utils.data_structure import BufferDict from ppdet.utils.data_structure import BufferDict
...@@ -16,22 +13,43 @@ __all__ = ['BaseArch'] ...@@ -16,22 +13,43 @@ __all__ = ['BaseArch']
@register @register
class BaseArch(Layer): class BaseArch(Layer):
def __init__(self, mode='train', *args, **kwargs): def __init__(self, *args, **kwargs):
super(BaseArch, self).__init__() super(BaseArch, self).__init__()
self.mode = mode self.args = args
self.kwargs = kwargs
def forward(self, inputs, inputs_keys, mode='train'): def forward(self, inputs, inputs_keys):
raise NotImplementedError("Should implement forward method!") self.gbd = BufferDict()
self.gbd.update(self.kwargs)
assert self.gbd[
'mode'] is not None, "Please specify mode train or infer in config file!"
if self.kwargs['open_debug'] is None:
self.gbd['open_debug'] = False
def loss(self, inputs): self.build_inputs(inputs, inputs_keys)
raise NotImplementedError("Should implement loss method!")
def infer(self, inputs): self.model_arch()
raise NotImplementedError("Should implement infer method!")
self.gbd.debug()
if self.gbd['mode'] == 'train':
out = self.loss()
elif self.gbd['mode'] == 'infer':
out = self.infer()
else:
raise "Now, only support train or infer mode!"
return out
def build_inputs(self, inputs, inputs_keys): def build_inputs(self, inputs, inputs_keys):
gbd = BufferDict()
for i, k in enumerate(inputs_keys): for i, k in enumerate(inputs_keys):
v = to_variable(np.array([x[i] for x in inputs])) v = to_variable(np.array([x[i] for x in inputs]))
gbd.set(k, v) self.gbd.set(k, v)
return gbd
def model_arch(self, ):
raise NotImplementedError("Should implement model_arch method!")
def loss(self, ):
raise NotImplementedError("Should implement loss method!")
def infer(self, ):
raise NotImplementedError("Should implement infer method!")
...@@ -2,8 +2,6 @@ from __future__ import absolute_import ...@@ -2,8 +2,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from paddle import fluid
from ppdet.core.workspace import register from ppdet.core.workspace import register
from .meta_arch import BaseArch from .meta_arch import BaseArch
...@@ -19,17 +17,13 @@ class YOLOv3(BaseArch): ...@@ -19,17 +17,13 @@ class YOLOv3(BaseArch):
'yolo_head', 'yolo_head',
] ]
def __init__(self, anchor, backbone, yolo_head, mode='train'): def __init__(self, anchor, backbone, yolo_head, *args, **kwargs):
super(YOLOv3, self).__init__() super(YOLOv3, self).__init__(*args, **kwargs)
self.anchor = anchor self.anchor = anchor
self.backbone = backbone self.backbone = backbone
self.yolo_head = yolo_head self.yolo_head = yolo_head
self.mode = mode
def forward(self, inputs, inputs_keys):
self.gbd = self.build_inputs(inputs, inputs_keys)
self.gbd['mode'] = self.mode
def model_arch(self, ):
# Backbone # Backbone
bb_out = self.backbone(self.gbd) bb_out = self.backbone(self.gbd)
self.gbd.update(bb_out) self.gbd.update(bb_out)
...@@ -46,23 +40,14 @@ class YOLOv3(BaseArch): ...@@ -46,23 +40,14 @@ class YOLOv3(BaseArch):
bbox_out = self.anchor.post_process(self.gbd) bbox_out = self.anchor.post_process(self.gbd)
self.gbd.update(bbox_out) self.gbd.update(bbox_out)
# result def loss(self, ):
if self.gbd['mode'] == 'train': yolo_loss = self.yolo_head.loss(self.gbd)
return self.loss(self.gbd) out = {'loss': yolo_loss}
elif self.gbd['mode'] == 'infer':
return self.infer(self.gbd)
else:
raise "Now, only support train or infer mode!"
def loss(self, inputs):
yolo_loss = self.yolo_head.loss(inputs)
out = {'loss': yolo_loss, }
return out return out
def infer(self, inputs): def infer(self, ):
outs = { outs = {
"bbox": inputs['predicted_bbox'].numpy(), "bbox": self.gbd['predicted_bbox'].numpy(),
"bbox_nums": inputs['predicted_bbox_nums'] "bbox_nums": self.gbd['predicted_bbox_nums']
} }
print(outs['bbox_nums'])
return outs return outs
...@@ -4,7 +4,6 @@ from paddle.fluid.dygraph import Layer ...@@ -4,7 +4,6 @@ from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant from paddle.fluid.initializer import Constant
from ppdet.core.workspace import register, serializable from ppdet.core.workspace import register, serializable
...@@ -17,10 +16,10 @@ class ConvBNLayer(Layer): ...@@ -17,10 +16,10 @@ class ConvBNLayer(Layer):
stride, stride,
padding, padding,
act='relu', act='relu',
learning_rate=1.0): lr=1.0):
super(ConvBNLayer, self).__init__() super(ConvBNLayer, self).__init__()
self._conv = Conv2D( self.conv = Conv2D(
num_channels=ch_in, num_channels=ch_in,
num_filters=ch_out, num_filters=ch_out,
filter_size=filter_size, filter_size=filter_size,
...@@ -29,26 +28,23 @@ class ConvBNLayer(Layer): ...@@ -29,26 +28,23 @@ class ConvBNLayer(Layer):
groups=1, groups=1,
act=act, act=act,
param_attr=ParamAttr( param_attr=ParamAttr(
name=name_scope + "_weights", learning_rate=learning_rate), name=name_scope + "_weights", learning_rate=lr),
bias_attr=ParamAttr(name=name_scope + "_bias")) bias_attr=ParamAttr(name=name_scope + "_bias"))
if name_scope == "conv1": if name_scope == "conv1":
bn_name = "bn_" + name_scope bn_name = "bn_" + name_scope
else: else:
bn_name = "bn" + name_scope[3:] bn_name = "bn" + name_scope[3:]
self.bn = BatchNorm(
self._bn = BatchNorm(
num_channels=ch_out, num_channels=ch_out,
act=act, act=act,
param_attr=ParamAttr(name=bn_name + '_scale'), param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'), bias_attr=ParamAttr(name=bn_name + '_offset'),
moving_mean_name=bn_name + '_mean', moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance', moving_variance_name=bn_name + '_variance')
is_test=True)
def forward(self, inputs): def forward(self, inputs):
x = self._conv(inputs) out = self.conv(inputs)
out = self._bn(x) out = self.bn(out)
return out return out
...@@ -60,11 +56,11 @@ class ConvAffineLayer(Layer): ...@@ -60,11 +56,11 @@ class ConvAffineLayer(Layer):
filter_size, filter_size,
stride, stride,
padding, padding,
learning_rate=1.0, lr=1.0,
act='relu'): act='relu'):
super(ConvAffineLayer, self).__init__() super(ConvAffineLayer, self).__init__()
self._conv = Conv2D( self.conv = Conv2D(
num_channels=ch_in, num_channels=ch_in,
num_filters=ch_out, num_filters=ch_out,
filter_size=filter_size, filter_size=filter_size,
...@@ -72,36 +68,34 @@ class ConvAffineLayer(Layer): ...@@ -72,36 +68,34 @@ class ConvAffineLayer(Layer):
padding=padding, padding=padding,
act=None, act=None,
param_attr=ParamAttr( param_attr=ParamAttr(
name=name_scope + "_weights", learning_rate=learning_rate), name=name_scope + "_weights", learning_rate=lr),
bias_attr=False) bias_attr=False)
if name_scope == "conv1": if name_scope == "conv1":
bn_name = "bn_" + name_scope bn_name = "bn_" + name_scope
else: else:
bn_name = "bn" + name_scope[3:] bn_name = "bn" + name_scope[3:]
self.name_scope = name_scope self.scale = fluid.layers.create_parameter(
self.scale = fluid.Layer.create_parameter(
shape=[ch_out], shape=[ch_out],
dtype='float32', dtype='float32',
attr=ParamAttr( attr=ParamAttr(
name=bn_name + '_scale', learning_rate=0.), name=bn_name + '_scale', learning_rate=0.),
default_initializer=Constant(1.)) default_initializer=Constant(1.))
self.bias = fluid.layers.create_parameter(
self.offset = fluid.layers.create_parameter(
shape=[ch_out], shape=[ch_out],
dtype='float32', dtype='float32',
attr=ParamAttr( attr=ParamAttr(
bn_name + '_offset', learning_rate=0.), name=bn_name + '_offset', learning_rate=0.),
default_initializer=Constant(0.)) default_initializer=Constant(0.))
self.act = act self.act = act
def forward(self, inputs): def forward(self, inputs):
conv = self._conv(inputs) out = self.conv(inputs)
out = fluid.layers.affine_channel( out = fluid.layers.affine_channel(
x=conv, scale=self.scale, bias=self.bias) out, scale=self.scale, bias=self.offset)
if self.act == 'relu': if self.act == 'relu':
out = fluid.layers.relu(x=out) out = fluid.layers.relu(out)
return out return out
...@@ -112,12 +106,21 @@ class BottleNeck(Layer): ...@@ -112,12 +106,21 @@ class BottleNeck(Layer):
ch_out, ch_out,
stride, stride,
shortcut=True, shortcut=True,
learning_rate=1.0): lr=1.0,
norm_type='bn'):
super(BottleNeck, self).__init__() super(BottleNeck, self).__init__()
self.name_scope = name_scope
if norm_type == 'bn':
atom_block = ConvBNLayer
elif norm_type == 'affine':
atom_block = ConvAffineLayer
else:
atom_block = None
assert atom_block != None, 'NormType only support BatchNorm and Affine!'
self.shortcut = shortcut self.shortcut = shortcut
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.branch1 = atom_block(
name_scope + "_branch1", name_scope + "_branch1",
ch_in=ch_in, ch_in=ch_in,
ch_out=ch_out * 4, ch_out=ch_out * 4,
...@@ -125,52 +128,48 @@ class BottleNeck(Layer): ...@@ -125,52 +128,48 @@ class BottleNeck(Layer):
stride=stride, stride=stride,
padding=0, padding=0,
act=None, act=None,
learning_rate=learning_rate) lr=lr)
self.conv1 = ConvBNLayer( self.branch2a = atom_block(
name_scope + "_branch2a", name_scope + "_branch2a",
ch_in=ch_in, ch_in=ch_in,
ch_out=ch_out, ch_out=ch_out,
filter_size=1, filter_size=1,
stride=stride, stride=stride,
padding=0, padding=0,
learning_rate=learning_rate, ) lr=lr)
self.conv2 = ConvBNLayer( self.branch2b = atom_block(
name_scope + "_branch2b", name_scope + "_branch2b",
ch_in=ch_out, ch_in=ch_out,
ch_out=ch_out, ch_out=ch_out,
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1, padding=1,
learning_rate=learning_rate) lr=lr)
self.conv3 = ConvBNLayer( self.branch2c = atom_block(
name_scope + "_branch2c", name_scope + "_branch2c",
ch_in=ch_out, ch_in=ch_out,
ch_out=ch_out * 4, ch_out=ch_out * 4,
filter_size=1, filter_size=1,
stride=1, stride=1,
padding=0, padding=0,
learning_rate=learning_rate, lr=lr,
act=None) act=None)
self.name_scope = name_scope
def forward(self, inputs): def forward(self, inputs):
if self.shortcut: if self.shortcut:
short = inputs short = inputs
else: else:
short = self.short(inputs) short = self.branch1(inputs)
conv1 = self.conv1(inputs) out = self.branch2a(inputs)
conv2 = self.conv2(conv1) out = self.branch2b(out)
conv3 = self.conv3(conv2) out = self.branch2c(out)
out = fluid.layers.elementwise_add( out = fluid.layers.elementwise_add(
x=short, x=short, y=out, act='relu', name=self.name_scope + ".add.output.5")
y=conv3,
act='relu',
name=self.name_scope + ".add.output.5")
return out return out
...@@ -182,7 +181,8 @@ class Blocks(Layer): ...@@ -182,7 +181,8 @@ class Blocks(Layer):
ch_out, ch_out,
count, count,
stride, stride,
learning_rate=1.0): lr=1.0,
norm_type='bn'):
super(Blocks, self).__init__() super(Blocks, self).__init__()
self.blocks = [] self.blocks = []
...@@ -204,7 +204,8 @@ class Blocks(Layer): ...@@ -204,7 +204,8 @@ class Blocks(Layer):
ch_out=ch_out, ch_out=ch_out,
stride=self.stride, stride=self.stride,
shortcut=self.shortcut, shortcut=self.shortcut,
learning_rate=learning_rate)) lr=lr,
norm_type=norm_type))
self.blocks.append(block) self.blocks.append(block)
shortcut = True shortcut = True
...@@ -215,62 +216,75 @@ class Blocks(Layer): ...@@ -215,62 +216,75 @@ class Blocks(Layer):
return res_out return res_out
ResNet_cfg = {'50': [3, 4, 6, 3], '101': [3, 4, 23, 3], '152': [3, 8, 36, 3]}
@register @register
@serializable @serializable
class ResNet(Layer): class ResNet(Layer):
def __init__( def __init__(self, depth=50, norm_type='bn', freeze_at='res2'):
self,
norm_type='bn',
depth=50,
feature_maps=4,
freeze_at=2, ):
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.depth = depth
self.norm_type = norm_type
self.freeze_at = freeze_at
block_nums = ResNet_cfg[str(self.depth)]
if self.norm_type == 'bn':
atom_block = ConvBNLayer
elif self.norm_type == 'affine':
atom_block = ConvAffineLayer
else:
atom_block = None
assert atom_block != None, 'NormType only support BatchNorm and Affine!'
if depth == 50: self.conv1 = atom_block(
blocks = [3, 4, 6, 3] 'conv1', ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3)
elif depth == 101:
blocks = [3, 4, 23, 3]
elif depth == 152:
blocks = [3, 8, 36, 3]
self.conv = ConvBNLayer( self.pool = Pool2D(
"conv1",
ch_in=3,
ch_out=64,
filter_size=7,
stride=2,
padding=3,
learning_rate=0.)
self.pool2d_max = Pool2D(
pool_type='max', pool_size=3, pool_stride=2, pool_padding=1) pool_type='max', pool_size=3, pool_stride=2, pool_padding=1)
self.stage2 = Blocks( self.stage2 = Blocks(
"res2", "res2",
ch_in=64, ch_in=64,
ch_out=64, ch_out=64,
count=blocks[0], count=block_nums[0],
stride=1, stride=1,
learning_rate=0.) norm_type=norm_type)
self.stage3 = Blocks( self.stage3 = Blocks(
"res3", ch_in=256, ch_out=128, count=blocks[1], stride=2) "res3",
ch_in=256,
ch_out=128,
count=block_nums[1],
stride=2,
norm_type=norm_type)
self.stage4 = Blocks( self.stage4 = Blocks(
"res4", ch_in=512, ch_out=256, count=blocks[2], stride=2) "res4",
ch_in=512,
ch_out=256,
count=block_nums[2],
stride=2,
norm_type=norm_type)
def forward(self, inputs): def forward(self, inputs):
x = inputs['image'] x = inputs['image']
conv1 = self.conv(x) conv1 = self.conv1(x)
poo1 = self.pool2d_max(conv1)
pool1 = self.pool(conv1)
res2 = self.stage2(poo1) res2 = self.stage2(pool1)
res2.stop_gradient = True
res3 = self.stage3(res2) res3 = self.stage3(res2)
res4 = self.stage4(res3) res4 = self.stage4(res3)
outs = {'res2': res2, 'res3': res3, 'res4': res4} outs = {
'res2': res2,
'res3': res3,
'res4': res4,
'res_norm_type': self.norm_type
}
outs[self.freeze_at].stop_gradient = True
return outs return outs
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ppdet.modeling.ops import (
AnchorGeneratorYOLO, AnchorTargetGeneratorYOLO, AnchorGeneratorRPN,
AnchorTargetGeneratorRPN, ProposalGenerator, ProposalTargetGenerator,
MaskTargetGenerator, DecodeClipNms, YOLOBox, MultiClassNMS)
# TODO: modify here into ppdet.modeling.ops like DecodeClipNms
from ppdet.py_op.post_process import mask_post_process
@register @register
class BBoxPostProcess(object): class BBoxPostProcess(object):
__shared__ = ['num_classes', 'num_stages']
__inject__ = ['decode_clip_nms']
def __init__(self, def __init__(self,
decode_clip_nms,
num_classes=81,
num_stages=1,
decode=None, decode=None,
clip=None, clip=None,
nms=None, nms=None):
decode_clip_nms=DecodeClipNms().__dict__):
super(BBoxPostProcess, self).__init__() super(BBoxPostProcess, self).__init__()
self.num_classes = num_classes
self.num_stages = num_stages
self.decode = decode self.decode = decode
self.clip = clip self.clip = clip
self.nms = nms self.nms = nms
self.decode_clip_nms = decode_clip_nms self.decode_clip_nms = decode_clip_nms
if isinstance(decode_clip_nms, dict):
self.decode_clip_nms = DecodeClipNms(**decode_clip_nms)
def __call__(self, inputs): def __call__(self, inputs):
# TODO: split into 3 steps # TODO: split into 3 steps
...@@ -31,8 +29,25 @@ class BBoxPostProcess(object): ...@@ -31,8 +29,25 @@ class BBoxPostProcess(object):
# decode # decode
# clip # clip
# nms # nms
outs = self.decode_clip_nms(inputs['rpn_rois'], inputs['bbox_prob'], if self.num_stages > 0:
inputs['bbox_delta'], inputs['im_info']) bbox_prob_list = []
for i in range(self.num_stages):
bbox_prob_list.append(inputs['bbox_head_' + str(i)][
'bbox_prob'])
bbox_prob = fluid.layers.sum(bbox_prob_list) / float(
len(bbox_prob_list))
bbox_delta = inputs['bbox_head_' + str(i)]['bbox_delta']
if inputs['bbox_head_0']['cls_agnostic_bbox_reg'] == 2:
bbox_delta = fluid.layers.slice(
bbox_delta, axes=1, starts=[1], ends=[2])
bbox_delta = fluid.layers.expand(bbox_delta,
[1, self.num_classes, 1])
else:
bbox_prob = inputs['bbox_prob']
bbox_delta = inputs['bbox_delta']
outs = self.decode_clip_nms(inputs['rpn_rois'], bbox_prob, bbox_delta,
inputs['im_info'])
outs = {"predicted_bbox_nums": outs[0], "predicted_bbox": outs[1]} outs = {"predicted_bbox_nums": outs[0], "predicted_bbox": outs[1]}
return outs return outs
...@@ -40,22 +55,15 @@ class BBoxPostProcess(object): ...@@ -40,22 +55,15 @@ class BBoxPostProcess(object):
@register @register
class BBoxPostProcessYOLO(object): class BBoxPostProcessYOLO(object):
__shared__ = ['num_classes'] __shared__ = ['num_classes']
__inject__ = ['yolo_box', 'nms']
def __init__(self, def __init__(self, yolo_box, nms, num_classes=80, decode=None, clip=None):
num_classes=80,
decode=None,
clip=None,
yolo_box=YOLOBox().__dict__,
nms=MultiClassNMS().__dict__):
super(BBoxPostProcessYOLO, self).__init__() super(BBoxPostProcessYOLO, self).__init__()
self.yolo_box = yolo_box
self.nms = nms
self.num_classes = num_classes self.num_classes = num_classes
self.decode = decode self.decode = decode
self.clip = clip self.clip = clip
self.nms = nms
if isinstance(yolo_box, dict):
self.yolo_box = YOLOBox(**yolo_box)
if isinstance(nms, dict):
self.nms = MultiClassNMS(**nms)
def __call__(self, inputs): def __call__(self, inputs):
# TODO: split yolo_box into 2 steps # TODO: split yolo_box into 2 steps
...@@ -80,40 +88,14 @@ class BBoxPostProcessYOLO(object): ...@@ -80,40 +88,14 @@ class BBoxPostProcessYOLO(object):
return outs return outs
@register
class MaskPostProcess(object):
__shared__ = ['num_classes']
def __init__(self, num_classes=81):
super(MaskPostProcess, self).__init__()
self.num_classes = num_classes
def __call__(self, inputs):
# TODO: modify related ops for deploying
outs = mask_post_process(inputs['predicted_bbox_nums'].numpy(),
inputs['predicted_bbox'].numpy(),
inputs['mask_logits'].numpy(),
inputs['im_info'].numpy())
outs = {'predicted_mask': outs}
return outs
@register @register
class AnchorRPN(object): class AnchorRPN(object):
__inject__ = ['anchor_generator', 'anchor_target_generator'] __inject__ = ['anchor_generator', 'anchor_target_generator']
def __init__(self, def __init__(self, anchor_generator, anchor_target_generator):
anchor_type='rpn',
anchor_generator=AnchorGeneratorRPN().__dict__,
anchor_target_generator=AnchorTargetGeneratorRPN().__dict__):
super(AnchorRPN, self).__init__() super(AnchorRPN, self).__init__()
self.anchor_generator = anchor_generator self.anchor_generator = anchor_generator
self.anchor_target_generator = anchor_target_generator self.anchor_target_generator = anchor_target_generator
if isinstance(anchor_generator, dict):
self.anchor_generator = AnchorGeneratorRPN(**anchor_generator)
if isinstance(anchor_target_generator, dict):
self.anchor_target_generator = AnchorTargetGeneratorRPN(
**anchor_target_generator)
def __call__(self, inputs): def __call__(self, inputs):
outs = self.generate_anchors(inputs) outs = self.generate_anchors(inputs)
...@@ -122,7 +104,7 @@ class AnchorRPN(object): ...@@ -122,7 +104,7 @@ class AnchorRPN(object):
def generate_anchors(self, inputs): def generate_anchors(self, inputs):
# TODO: update here to use int to specify featmap size # TODO: update here to use int to specify featmap size
outs = self.anchor_generator(inputs['rpn_feat']) outs = self.anchor_generator(inputs['rpn_feat'])
outs = {'anchor': outs[0], 'var': outs[1], 'anchor_module': self} outs = {'anchor': outs[0], 'anchor_var': outs[1], 'anchor_module': self}
return outs return outs
def generate_anchors_target(self, inputs): def generate_anchors_target(self, inputs):
...@@ -143,7 +125,8 @@ class AnchorRPN(object): ...@@ -143,7 +125,8 @@ class AnchorRPN(object):
anchor_box=anchor, anchor_box=anchor,
gt_boxes=inputs['gt_bbox'], gt_boxes=inputs['gt_bbox'],
is_crowd=inputs['is_crowd'], is_crowd=inputs['is_crowd'],
im_info=inputs['im_info']) im_info=inputs['im_info'],
open_debug=inputs['open_debug'])
outs = { outs = {
'rpn_score_pred': score_pred, 'rpn_score_pred': score_pred,
'rpn_score_target': score_tgt, 'rpn_score_target': score_tgt,
...@@ -160,22 +143,12 @@ class AnchorYOLO(object): ...@@ -160,22 +143,12 @@ class AnchorYOLO(object):
'anchor_generator', 'anchor_target_generator', 'anchor_post_process' 'anchor_generator', 'anchor_target_generator', 'anchor_post_process'
] ]
def __init__(self, def __init__(self, anchor_generator, anchor_target_generator,
anchor_generator=AnchorGeneratorYOLO().__dict__, anchor_post_process):
anchor_target_generator=AnchorTargetGeneratorYOLO().__dict__,
anchor_post_process=BBoxPostProcessYOLO().__dict__):
super(AnchorYOLO, self).__init__() super(AnchorYOLO, self).__init__()
self.anchor_generator = anchor_generator self.anchor_generator = anchor_generator
self.anchor_target_generator = anchor_target_generator self.anchor_target_generator = anchor_target_generator
self.anchor_post_process = anchor_post_process self.anchor_post_process = anchor_post_process
if isinstance(anchor_generator, dict):
self.anchor_generator = AnchorGeneratorYOLO(**anchor_generator)
if isinstance(anchor_target_generator, dict):
self.anchor_target_generator = AnchorTargetGeneratorYOLO(
**anchor_target_generator)
if isinstance(anchor_post_process, dict):
self.anchor_post_process = BBoxPostProcessYOLO(
**anchor_post_process)
def __call__(self, inputs): def __call__(self, inputs):
outs = self.generate_anchors(inputs) outs = self.generate_anchors(inputs)
...@@ -200,30 +173,20 @@ class Proposal(object): ...@@ -200,30 +173,20 @@ class Proposal(object):
'proposal_generator', 'proposal_target_generator', 'bbox_post_process' 'proposal_generator', 'proposal_target_generator', 'bbox_post_process'
] ]
def __init__( def __init__(self, proposal_generator, proposal_target_generator,
self, bbox_post_process):
proposal_generator=ProposalGenerator().__dict__,
proposal_target_generator=ProposalTargetGenerator().__dict__,
bbox_post_process=BBoxPostProcess().__dict__, ):
super(Proposal, self).__init__() super(Proposal, self).__init__()
self.proposal_generator = proposal_generator self.proposal_generator = proposal_generator
self.proposal_target_generator = proposal_target_generator self.proposal_target_generator = proposal_target_generator
self.bbox_post_process = bbox_post_process self.bbox_post_process = bbox_post_process
if isinstance(proposal_generator, dict):
self.proposal_generator = ProposalGenerator(**proposal_generator) def __call__(self, inputs):
if isinstance(proposal_target_generator, dict):
self.proposal_target_generator = ProposalTargetGenerator(
**proposal_target_generator)
if isinstance(bbox_post_process, dict):
self.bbox_post_process = BBoxPostProcess(**bbox_post_process)
def __call__(self, inputs, stage=0):
outs = {} outs = {}
if stage == 0: if inputs['stage'] == 0:
proposal_out = self.generate_proposal(inputs) proposal_out = self.generate_proposal(inputs)
inputs.update(proposal_out) inputs.update(proposal_out)
if inputs['mode'] == 'train': if inputs['mode'] == 'train':
proposal_target_out = self.generate_proposal_target(inputs, stage) proposal_target_out = self.generate_proposal_target(inputs)
outs.update(proposal_target_out) outs.update(proposal_target_out)
return outs return outs
...@@ -234,7 +197,7 @@ class Proposal(object): ...@@ -234,7 +197,7 @@ class Proposal(object):
scores=rpn_rois_prob, scores=rpn_rois_prob,
bbox_deltas=inputs['rpn_rois_delta'], bbox_deltas=inputs['rpn_rois_delta'],
anchors=inputs['anchor'], anchors=inputs['anchor'],
variances=inputs['var'], variances=inputs['anchor_var'],
im_info=inputs['im_info'], im_info=inputs['im_info'],
mode=inputs['mode']) mode=inputs['mode'])
outs = { outs = {
...@@ -244,15 +207,24 @@ class Proposal(object): ...@@ -244,15 +207,24 @@ class Proposal(object):
} }
return outs return outs
def generate_proposal_target(self, inputs, stage=0): def generate_proposal_target(self, inputs):
if inputs['stage'] == 0:
rois = inputs['rpn_rois']
rois_num = inputs['rpn_rois_nums']
elif inputs['stage'] > 0:
last_proposal_out = inputs['proposal_' + str(inputs['stage'] - 1)]
rois = last_proposal_out['refined_bbox']
rois_num = last_proposal_out['rois_nums']
outs = self.proposal_target_generator( outs = self.proposal_target_generator(
rpn_rois=inputs['rpn_rois'], rpn_rois=rois,
rpn_rois_nums=inputs['rpn_rois_nums'], rpn_rois_nums=rois_num,
gt_classes=inputs['gt_class'], gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'], is_crowd=inputs['is_crowd'],
gt_boxes=inputs['gt_bbox'], gt_boxes=inputs['gt_bbox'],
im_info=inputs['im_info'], im_info=inputs['im_info'],
stage=stage) stage=inputs['stage'],
open_debug=inputs['open_debug'])
outs = { outs = {
'rois': outs[0], 'rois': outs[0],
'labels_int32': outs[1], 'labels_int32': outs[1],
...@@ -263,49 +235,31 @@ class Proposal(object): ...@@ -263,49 +235,31 @@ class Proposal(object):
} }
return outs return outs
def post_process(self, inputs): def refine_bbox(self, inputs):
outs = self.bbox_post_process(inputs)
return outs
@register
class Mask(object):
__inject__ = ['mask_target_generator', 'mask_post_process']
def __init__(self,
mask_target_generator=MaskTargetGenerator().__dict__,
mask_post_process=MaskPostProcess().__dict__):
super(Mask, self).__init__()
self.mask_target_generator = mask_target_generator
self.mask_post_process = mask_post_process
if isinstance(mask_target_generator, dict):
self.mask_target_generator = MaskTargetGenerator(
**mask_target_generator)
if isinstance(mask_post_process, dict):
self.mask_post_process = MaskPostProcess(**mask_post_process)
def __call__(self, inputs):
outs = {}
if inputs['mode'] == 'train': if inputs['mode'] == 'train':
outs = self.generate_mask_target(inputs) rois = inputs['proposal_' + str(inputs['stage'])]['rois']
return outs else:
rois = inputs['rpn_rois']
def generate_mask_target(self, inputs): bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])]
outs = self.mask_target_generator(
im_info=inputs['im_info'], bbox_delta_r = fluid.layers.reshape(
gt_classes=inputs['gt_class'], bbox_head_out['bbox_delta'],
is_crowd=inputs['is_crowd'], (-1, inputs['bbox_head_0']['cls_agnostic_bbox_reg'], 4))
gt_segms=inputs['gt_mask'], bbox_delta_s = fluid.layers.slice(
rois=inputs['rois'], bbox_delta_r, axes=[1], starts=[1], ends=[2])
rois_nums=inputs['rois_nums'],
labels_int32=inputs['labels_int32'], ) refined_bbox = fluid.layers.box_coder(
outs = { prior_box=rois,
'mask_rois': outs[0], prior_box_var=self.proposal_target_generator.bbox_reg_weights[
'rois_has_mask_int32': outs[1], inputs['stage']],
'mask_int32': outs[2] target_box=bbox_delta_s,
} code_type='decode_center_size',
box_normalized=False,
axis=1)
refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4])
outs = {'refined_bbox': refined_bbox}
return outs return outs
def post_process(self, inputs): def post_process(self, inputs):
outs = self.mask_post_process(inputs) outs = self.bbox_post_process(inputs)
return outs return outs
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph import Layer
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, MSRA from paddle.fluid.initializer import Normal, MSRA
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D from paddle.fluid.dygraph.nn import Conv2D, Pool2D
from ppdet.core.workspace import register from ppdet.core.workspace import register
# TODO: del import and use inject
from ..backbone.resnet import Blocks from ..backbone.resnet import Blocks
from ..ops import RoIExtractor
@register @register
class BBoxFeat(Layer): class BBoxFeat(Layer):
__inject__ = ['roi_extractor'] __inject__ = ['roi_extractor']
__shared__ = ['num_stages']
def __init__(self, def __init__(self, roi_extractor, feat_in=1024, feat_out=512, num_stages=1):
feat_in=1024,
feat_out=512,
roi_extractor=RoIExtractor().__dict__,
stage=0):
super(BBoxFeat, self).__init__() super(BBoxFeat, self).__init__()
self.roi_extractor = roi_extractor self.roi_extractor = roi_extractor
if isinstance(roi_extractor, dict): self.num_stages = num_stages
self.roi_extractor = RoIExtractor(**roi_extractor) self.res5s = []
if stage == 0: for i in range(self.num_stages):
postfix = '' if i == 0:
else: postfix = ''
postfix = '_' + str(stage) else:
self.res5 = Blocks( postfix = '_' + str(i)
"res5", ch_in=feat_in, ch_out=feat_out, count=3, stride=2) # TODO: set norm type
res5 = Blocks(
"res5" + postfix,
ch_in=feat_in,
ch_out=feat_out,
count=3,
stride=2)
self.res5s.append(res5)
self.res5_pool = fluid.dygraph.Pool2D( self.res5_pool = fluid.dygraph.Pool2D(
pool_type='avg', global_pooling=True) pool_type='avg', global_pooling=True)
def forward(self, inputs): def forward(self, inputs):
if inputs['mode'] == 'train': if inputs['mode'] == 'train':
rois = inputs['rois'] in_rois = inputs['proposal_' + str(inputs['stage'])]
rois_num = inputs['rois_nums'] rois = in_rois['rois']
rois_num = in_rois['rois_nums']
elif inputs['mode'] == 'infer': elif inputs['mode'] == 'infer':
rois = inputs['rpn_rois'] rois = inputs['rpn_rois']
rois_num = inputs['rpn_rois_nums'] rois_num = inputs['rpn_rois_nums']
...@@ -44,14 +49,14 @@ class BBoxFeat(Layer): ...@@ -44,14 +49,14 @@ class BBoxFeat(Layer):
rois_feat = self.roi_extractor(inputs['res4'], rois, rois_num) rois_feat = self.roi_extractor(inputs['res4'], rois, rois_num)
# TODO: add others # TODO: add others
y_res5 = self.res5(rois_feat) y_res5 = self.res5s[inputs['stage']](rois_feat)
y = self.res5_pool(y_res5) y = self.res5_pool(y_res5)
y = fluid.layers.squeeze(y, axes=[2, 3]) y = fluid.layers.squeeze(y, axes=[2, 3])
outs = { outs = {
'rois_feat': rois_feat, 'rois_feat': rois_feat,
'res5': y_res5, 'res5': y_res5,
"bbox_feat": y, "bbox_feat": y,
'shared_res5_block': self.res5, 'shared_res5_block': self.res5s[inputs['stage']],
'shared_roi_extractor': self.roi_extractor 'shared_roi_extractor': self.roi_extractor
} }
return outs return outs
...@@ -59,79 +64,92 @@ class BBoxFeat(Layer): ...@@ -59,79 +64,92 @@ class BBoxFeat(Layer):
@register @register
class BBoxHead(Layer): class BBoxHead(Layer):
__shared__ = ['num_classes']
__inject__ = ['bbox_feat'] __inject__ = ['bbox_feat']
__shared__ = ['num_classes', 'num_stages']
def __init__(self, def __init__(self,
in_feat=2048, bbox_feat,
feat_in=2048,
num_classes=81, num_classes=81,
bbox_feat=BBoxFeat().__dict__, cls_agnostic_bbox_reg=81,
stage=0): num_stages=1):
super(BBoxHead, self).__init__() super(BBoxHead, self).__init__()
self.num_classes = num_classes
self.bbox_feat = bbox_feat self.bbox_feat = bbox_feat
if isinstance(bbox_feat, dict): self.num_classes = num_classes
self.bbox_feat = BBoxFeat(**bbox_feat) self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg
if stage == 0: self.num_stages = num_stages
postfix = ''
else: self.bbox_scores = []
postfix = '_' + str(stage) self.bbox_deltas = []
self.bbox_score = fluid.dygraph.Linear( for i in range(self.num_stages):
input_dim=in_feat, if i == 0:
output_dim=1 * self.num_classes, postfix = ''
act=None, else:
param_attr=ParamAttr( postfix = '_' + str(i)
name='cls_score_w' + postfix, bbox_score = fluid.dygraph.Linear(
initializer=Normal( input_dim=feat_in,
loc=0.0, scale=0.001)), output_dim=1 * self.num_classes,
bias_attr=ParamAttr( act=None,
name='cls_score_b' + postfix, param_attr=ParamAttr(
learning_rate=2., name='cls_score_w' + postfix,
regularizer=L2Decay(0.))) initializer=Normal(
loc=0.0, scale=0.001)),
self.bbox_delta = fluid.dygraph.Linear( bias_attr=ParamAttr(
input_dim=in_feat, name='cls_score_b' + postfix,
output_dim=4 * self.num_classes, learning_rate=2.,
act=None, regularizer=L2Decay(0.)))
param_attr=ParamAttr(
name='bbox_pred_w' + postfix, bbox_delta = fluid.dygraph.Linear(
initializer=Normal( input_dim=feat_in,
loc=0.0, scale=0.01)), output_dim=4 * self.cls_agnostic_bbox_reg,
bias_attr=ParamAttr( act=None,
name='bbox_pred_b' + postfix, param_attr=ParamAttr(
learning_rate=2., name='bbox_pred_w' + postfix,
regularizer=L2Decay(0.))) initializer=Normal(
loc=0.0, scale=0.01)),
bias_attr=ParamAttr(
name='bbox_pred_b' + postfix,
learning_rate=2.,
regularizer=L2Decay(0.)))
self.bbox_scores.append(bbox_score)
self.bbox_deltas.append(bbox_delta)
def forward(self, inputs): def forward(self, inputs):
outs = self.bbox_feat(inputs) outs = self.bbox_feat(inputs)
x = outs['bbox_feat'] x = outs['bbox_feat']
bs = self.bbox_score(x) bs = self.bbox_scores[inputs['stage']](x)
bd = self.bbox_delta(x) bd = self.bbox_deltas[inputs['stage']](x)
outs.update({'bbox_score': bs, 'bbox_delta': bd}) outs.update({'bbox_score': bs, 'bbox_delta': bd})
if inputs['stage'] == 0:
outs.update({"cls_agnostic_bbox_reg": self.cls_agnostic_bbox_reg})
if inputs['mode'] == 'infer': if inputs['mode'] == 'infer':
bbox_prob = fluid.layers.softmax(bs, use_cudnn=False) bbox_prob = fluid.layers.softmax(bs, use_cudnn=False)
outs['bbox_prob'] = bbox_prob outs['bbox_prob'] = bbox_prob
return outs return outs
def loss(self, inputs): def loss(self, inputs):
bbox_out = inputs['bbox_head_' + str(inputs['stage'])]
bbox_target = inputs['proposal_' + str(inputs['stage'])]
# bbox cls # bbox cls
labels_int64 = fluid.layers.cast( labels_int64 = fluid.layers.cast(
x=inputs['labels_int32'], dtype='int64') x=bbox_target['labels_int32'], dtype='int64')
labels_int64.stop_gradient = True labels_int64.stop_gradient = True
bbox_score = fluid.layers.reshape(inputs['bbox_score'], bbox_score = fluid.layers.reshape(bbox_out['bbox_score'],
(-1, self.num_classes)) (-1, self.num_classes))
loss_bbox_cls = fluid.layers.softmax_with_cross_entropy( loss_bbox_cls = fluid.layers.softmax_with_cross_entropy(
logits=bbox_score, label=labels_int64) logits=bbox_score, label=labels_int64)
loss_bbox_cls = fluid.layers.reduce_mean( loss_bbox_cls = fluid.layers.reduce_mean(
loss_bbox_cls, name='loss_bbox_cls') loss_bbox_cls, name='loss_bbox_cls_' + str(inputs['stage']))
# bbox reg # bbox reg
loss_bbox_reg = fluid.layers.smooth_l1( loss_bbox_reg = fluid.layers.smooth_l1(
x=inputs['bbox_delta'], x=bbox_out['bbox_delta'],
y=inputs['bbox_targets'], y=bbox_target['bbox_targets'],
inside_weight=inputs['bbox_inside_weights'], inside_weight=bbox_target['bbox_inside_weights'],
outside_weight=inputs['bbox_outside_weights'], outside_weight=bbox_target['bbox_outside_weights'],
sigma=1.0) sigma=1.0)
loss_bbox_reg = fluid.layers.reduce_mean( loss_bbox_reg = fluid.layers.reduce_mean(
loss_bbox_reg, name='loss_bbox_loc') loss_bbox_reg, name='loss_bbox_loc_' + str(inputs['stage']))
return loss_bbox_cls, loss_bbox_reg return loss_bbox_cls, loss_bbox_reg
...@@ -6,55 +6,54 @@ from paddle.fluid.initializer import Normal, MSRA ...@@ -6,55 +6,54 @@ from paddle.fluid.initializer import Normal, MSRA
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D from paddle.fluid.dygraph.nn import Conv2D, Pool2D
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ..ops import RoIExtractor # TODO: del it and use inject
from ..backbone.resnet import Blocks from ..backbone.resnet import Blocks
@register @register
class MaskFeat(Layer): class MaskFeat(Layer):
__inject__ = ['mask_roi_extractor'] def __init__(self, feat_in=2048, feat_out=256, mask_stages=1):
def __init__(self,
feat_in=2048,
feat_out=256,
mask_roi_extractor=RoIExtractor().__dict__,
stage=0):
super(MaskFeat, self).__init__() super(MaskFeat, self).__init__()
self.feat_in = feat_in self.feat_in = feat_in
self.feat_out = feat_out self.feat_out = feat_out
self.mask_roi_extractor = mask_roi_extractor self.mask_stages = mask_stages
if isinstance(mask_roi_extractor, dict):
self.mask_roi_extractor = RoIExtractor(**mask_roi_extractor) for i in range(self.mask_stages):
if stage == 0: if i == 0:
postfix = '' postfix = ''
else: else:
postfix = '_' + str(stage) postfix = '_' + str(i)
self.upsample = fluid.dygraph.Conv2DTranspose( self.upsample = fluid.dygraph.Conv2DTranspose(
num_channels=self.feat_in, num_channels=self.feat_in,
num_filters=self.feat_out, num_filters=self.feat_out,
filter_size=2, filter_size=2,
stride=2, stride=2,
act='relu', act='relu',
param_attr=ParamAttr( param_attr=ParamAttr(
name='conv5_mask_w' + postfix, initializer=MSRA(uniform=False)), name='conv5_mask_w' + postfix,
bias_attr=ParamAttr( initializer=MSRA(uniform=False)),
name='conv5_mask_b' + postfix, bias_attr=ParamAttr(
learning_rate=2., name='conv5_mask_b' + postfix,
regularizer=L2Decay(0.))) learning_rate=2.,
regularizer=L2Decay(0.)))
def forward(self, inputs): def forward(self, inputs):
bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])]
if inputs['mode'] == 'train': if inputs['mode'] == 'train':
x = inputs['res5'] x = bbox_head_out['res5']
rois_feat = fluid.layers.gather(x, inputs['rois_has_mask_int32']) rois_feat = fluid.layers.gather(x, inputs['rois_has_mask_int32'])
elif inputs['mode'] == 'infer': elif inputs['mode'] == 'infer':
rois = inputs['predicted_bbox'][:, 2:] * inputs['im_info'][:, 2] rois = inputs['predicted_bbox'][:, 2:] * inputs['im_info'][:, 2]
rois_num = inputs['predicted_bbox_nums'] rois_num = inputs['predicted_bbox_nums']
# TODO: optim here # TODO: optim here
if callable(inputs['shared_roi_extractor']): shared_roi_ext = bbox_head_out['shared_roi_extractor']
rois_feat = inputs['shared_roi_extractor'](inputs['res4'], rois, if callable(shared_roi_ext):
rois_num) rois_feat = shared_roi_ext(inputs['res4'], rois, rois_num)
if callable(inputs['shared_res5_block']):
rois_feat = inputs['shared_res5_block'](rois_feat) shared_res5 = bbox_head_out['shared_res5_block']
if callable(shared_res5):
rois_feat = shared_res5(rois_feat)
# upsample # upsample
y = self.upsample(rois_feat) y = self.upsample(rois_feat)
outs = {'mask_feat': y} outs = {'mask_feat': y}
...@@ -67,33 +66,34 @@ class MaskHead(Layer): ...@@ -67,33 +66,34 @@ class MaskHead(Layer):
__inject__ = ['mask_feat'] __inject__ = ['mask_feat']
def __init__(self, def __init__(self,
mask_feat,
num_classes=81,
feat_in=256, feat_in=256,
resolution=14, resolution=14,
num_classes=81, mask_stages=1):
mask_feat=MaskFeat().__dict__,
stage=0):
super(MaskHead, self).__init__() super(MaskHead, self).__init__()
self.mask_feat = mask_feat
self.feat_in = feat_in self.feat_in = feat_in
self.resolution = resolution self.resolution = resolution
self.num_classes = num_classes self.num_classes = num_classes
self.mask_feat = mask_feat self.mask_stages = mask_stages
if isinstance(mask_feat, dict):
self.mask_feat = MaskFeat(**mask_feat) for i in range(self.mask_stages):
if stage == 0: if i == 0:
postfix = '' postfix = ''
else: else:
postfix = '_' + str(stage) postfix = '_' + str(i)
self.mask_fcn_logits = fluid.dygraph.Conv2D( self.mask_fcn_logits = fluid.dygraph.Conv2D(
num_channels=self.feat_in, num_channels=self.feat_in,
num_filters=self.num_classes, num_filters=self.num_classes,
filter_size=1, filter_size=1,
param_attr=ParamAttr( param_attr=ParamAttr(
name='mask_fcn_logits_w' + postfix, name='mask_fcn_logits_w' + postfix,
initializer=MSRA(uniform=False)), initializer=MSRA(uniform=False)),
bias_attr=ParamAttr( bias_attr=ParamAttr(
name='mask_fcn_logits_b' + postfix, name='mask_fcn_logits_b' + postfix,
learning_rate=2., learning_rate=2.,
regularizer=L2Decay(0.0))) regularizer=L2Decay(0.0)))
def forward(self, inputs): def forward(self, inputs):
# feat # feat
...@@ -115,7 +115,6 @@ class MaskHead(Layer): ...@@ -115,7 +115,6 @@ class MaskHead(Layer):
return outs return outs
def loss(self, inputs): def loss(self, inputs):
# input needs (model_out, target)
reshape_dim = self.num_classes * self.resolution * self.resolution reshape_dim = self.num_classes * self.resolution * self.resolution
mask_logits = fluid.layers.reshape(inputs['mask_logits'], mask_logits = fluid.layers.reshape(inputs['mask_logits'],
(-1, reshape_dim)) (-1, reshape_dim))
......
...@@ -4,7 +4,6 @@ from paddle.fluid.param_attr import ParamAttr ...@@ -4,7 +4,6 @@ from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D from paddle.fluid.dygraph.nn import Conv2D
from ppdet.core.workspace import register from ppdet.core.workspace import register
...@@ -20,10 +19,10 @@ class RPNFeat(Layer): ...@@ -20,10 +19,10 @@ class RPNFeat(Layer):
padding=1, padding=1,
act='relu', act='relu',
param_attr=ParamAttr( param_attr=ParamAttr(
"conv_rpn_w", initializer=Normal( name="conv_rpn_w", initializer=Normal(
loc=0., scale=0.01)), loc=0., scale=0.01)),
bias_attr=ParamAttr( bias_attr=ParamAttr(
"conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
def forward(self, inputs): def forward(self, inputs):
x = inputs.get('res4') x = inputs.get('res4')
...@@ -36,12 +35,10 @@ class RPNFeat(Layer): ...@@ -36,12 +35,10 @@ class RPNFeat(Layer):
class RPNHead(Layer): class RPNHead(Layer):
__inject__ = ['rpn_feat'] __inject__ = ['rpn_feat']
def __init__(self, anchor_per_position=15, rpn_feat=RPNFeat().__dict__): def __init__(self, rpn_feat, anchor_per_position=15):
super(RPNHead, self).__init__() super(RPNHead, self).__init__()
self.anchor_per_position = anchor_per_position
self.rpn_feat = rpn_feat self.rpn_feat = rpn_feat
if isinstance(rpn_feat, dict): self.anchor_per_position = anchor_per_position
self.rpn_feat = RPNFeat(**rpn_feat)
# rpn roi classification scores # rpn roi classification scores
self.rpn_rois_score = Conv2D( self.rpn_rois_score = Conv2D(
......
...@@ -132,19 +132,11 @@ class YOLOv3Head(Layer): ...@@ -132,19 +132,11 @@ class YOLOv3Head(Layer):
__shared__ = ['num_classes'] __shared__ = ['num_classes']
__inject__ = ['yolo_feat'] __inject__ = ['yolo_feat']
def __init__( def __init__(self, yolo_feat, num_classes=80, anchor_per_position=3):
self,
num_classes=80,
anchor_per_position=3,
mode='train',
yolo_feat=YOLOFeat().__dict__, ):
super(YOLOv3Head, self).__init__() super(YOLOv3Head, self).__init__()
self.num_classes = num_classes self.num_classes = num_classes
self.anchor_per_position = anchor_per_position self.anchor_per_position = anchor_per_position
self.mode = mode
self.yolo_feat = yolo_feat self.yolo_feat = yolo_feat
if isinstance(yolo_feat, dict):
self.yolo_feat = YOLOFeat(**yolo_feat)
self.yolo_outs = [] self.yolo_outs = []
for i in range(3): for i in range(3):
......
import numpy as np
import paddle.fluid as fluid
from ppdet.core.workspace import register
# TODO: regitster mask_post_process op
from ppdet.py_op.post_process import mask_post_process
@register
class MaskPostProcess(object):
__shared__ = ['num_classes']
def __init__(self, num_classes=81):
super(MaskPostProcess, self).__init__()
self.num_classes = num_classes
def __call__(self, inputs):
# TODO: modify related ops for deploying
outs = mask_post_process(inputs['predicted_bbox_nums'].numpy(),
inputs['predicted_bbox'].numpy(),
inputs['mask_logits'].numpy(),
inputs['im_info'].numpy())
outs = {'predicted_mask': outs}
return outs
@register
class Mask(object):
__inject__ = ['mask_target_generator', 'mask_post_process']
def __init__(self, mask_target_generator, mask_post_process):
super(Mask, self).__init__()
self.mask_target_generator = mask_target_generator
self.mask_post_process = mask_post_process
def __call__(self, inputs):
outs = {}
if inputs['mode'] == 'train':
outs = self.generate_mask_target(inputs)
return outs
def generate_mask_target(self, inputs):
proposal_out = inputs['proposal_' + str(inputs['stage'])]
outs = self.mask_target_generator(
im_info=inputs['im_info'],
gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'],
gt_segms=inputs['gt_mask'],
rois=proposal_out['rois'],
rois_nums=proposal_out['rois_nums'],
labels_int32=proposal_out['labels_int32'])
outs = {
'mask_rois': outs[0],
'rois_has_mask_int32': outs[1],
'mask_int32': outs[2]
}
return outs
def post_process(self, inputs):
outs = self.mask_post_process(inputs)
return outs
...@@ -49,13 +49,20 @@ class AnchorTargetGeneratorRPN(object): ...@@ -49,13 +49,20 @@ class AnchorTargetGeneratorRPN(object):
self.negative_overlap = negative_overlap self.negative_overlap = negative_overlap
self.use_random = use_random self.use_random = use_random
def __call__(self, cls_logits, bbox_pred, anchor_box, gt_boxes, is_crowd, def __call__(self,
im_info): cls_logits,
bbox_pred,
anchor_box,
gt_boxes,
is_crowd,
im_info,
open_debug=False):
anchor_box = anchor_box.numpy() anchor_box = anchor_box.numpy()
gt_boxes = gt_boxes.numpy() gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy() is_crowd = is_crowd.numpy()
im_info = im_info.numpy() im_info = im_info.numpy()
if open_debug:
self.use_random = False
loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target( loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target(
anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh, anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh,
self.batch_size_per_im, self.positive_overlap, self.batch_size_per_im, self.positive_overlap,
...@@ -191,7 +198,7 @@ class ProposalTargetGenerator(object): ...@@ -191,7 +198,7 @@ class ProposalTargetGenerator(object):
bg_thresh_lo=[0., ], bg_thresh_lo=[0., ],
bbox_reg_weights=[[0.1, 0.1, 0.2, 0.2]], bbox_reg_weights=[[0.1, 0.1, 0.2, 0.2]],
num_classes=81, num_classes=81,
shuffle_before_sample=True, use_random=True,
is_cls_agnostic=False, is_cls_agnostic=False,
is_cascade_rcnn=False): is_cascade_rcnn=False):
super(ProposalTargetGenerator, self).__init__() super(ProposalTargetGenerator, self).__init__()
...@@ -202,7 +209,7 @@ class ProposalTargetGenerator(object): ...@@ -202,7 +209,7 @@ class ProposalTargetGenerator(object):
self.bg_thresh_lo = bg_thresh_lo self.bg_thresh_lo = bg_thresh_lo
self.bbox_reg_weights = bbox_reg_weights self.bbox_reg_weights = bbox_reg_weights
self.num_classes = num_classes self.num_classes = num_classes
self.use_random = shuffle_before_sample self.use_random = use_random
self.is_cls_agnostic = is_cls_agnostic, self.is_cls_agnostic = is_cls_agnostic,
self.is_cascade_rcnn = is_cascade_rcnn self.is_cascade_rcnn = is_cascade_rcnn
...@@ -213,13 +220,17 @@ class ProposalTargetGenerator(object): ...@@ -213,13 +220,17 @@ class ProposalTargetGenerator(object):
is_crowd, is_crowd,
gt_boxes, gt_boxes,
im_info, im_info,
stage=0): stage=0,
open_debug=False):
rpn_rois = rpn_rois.numpy() rpn_rois = rpn_rois.numpy()
rpn_rois_nums = rpn_rois_nums.numpy() rpn_rois_nums = rpn_rois_nums.numpy()
gt_classes = gt_classes.numpy() gt_classes = gt_classes.numpy()
gt_boxes = gt_boxes.numpy() gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy() is_crowd = is_crowd.numpy()
im_info = im_info.numpy() im_info = im_info.numpy()
if open_debug:
self.use_random = False
outs = generate_proposal_target( outs = generate_proposal_target(
rpn_rois, rpn_rois_nums, gt_classes, is_crowd, gt_boxes, im_info, rpn_rois, rpn_rois_nums, gt_classes, is_crowd, gt_boxes, im_info,
self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage], self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage],
...@@ -265,10 +276,10 @@ class MaskTargetGenerator(object): ...@@ -265,10 +276,10 @@ class MaskTargetGenerator(object):
@register @register
class RoIExtractor(object): class RoIExtractor(object):
def __init__(self, def __init__(self,
resolution=7, resolution=14,
spatial_scale=1. / 16, spatial_scale=1. / 16,
sampling_ratio=0, sampling_ratio=0,
extractor_type='RoIPool'): extractor_type='RoIAlign'):
super(RoIExtractor, self).__init__() super(RoIExtractor, self).__init__()
if isinstance(resolution, Integral): if isinstance(resolution, Integral):
resolution = [resolution, resolution] resolution = [resolution, resolution]
......
...@@ -21,25 +21,44 @@ class BufferDict(dict): ...@@ -21,25 +21,44 @@ class BufferDict(dict):
for k, v in dict(*args, **kwargs).items(): for k, v in dict(*args, **kwargs).items():
self[k] = v self[k] = v
def update_v(self, key, value):
if key in self.keys():
super(BufferDict, self).__setitem__(key, value)
else:
raise Exception("The %s is not in global inputs dict" % key)
def get(self, key): def get(self, key):
return self.__getitem__(key) return self.__getitem__(key)
def set(self, key, value): def set(self, key, value):
self.__setitem__(key, value) return self.__setitem__(key, value)
def debug(self, dshape=True, dtype=False, dvalue=False, name='all'): def debug(self, dshape=True, dvalue=True, dtype=False):
if name == 'all': if self['open_debug']:
ditems = self.items() if self['debug_names'] is None:
else: ditems = self.keys()
ditems = self.get(name) else:
ditems = self['debug_names']
for k, v in ditems:
info = [k] infos = {}
if dshape == True and hasattr(v, 'shape'): for k in ditems:
info.append(v.shape) if type(k) is dict:
if dtype == True: i_d = {}
info.append(type(v)) for i, j in k.items():
if dvalue == True and hasattr(v, 'numpy'): if type(j) is list:
info.append(np.mean(np.abs(v.numpy()))) for jj in j:
i_d[jj] = self.get_debug_info(self[i][jj])
print(info) infos[i] = i_d
else:
infos[k] = self.get_debug_info(self[k])
print(infos)
def get_debug_info(self, v, dshape=True, dvalue=True, dtype=False):
info = []
if dshape == True and hasattr(v, 'shape'):
info.append(v.shape)
if dvalue == True and hasattr(v, 'numpy'):
info.append(np.mean(np.abs(v.numpy())))
if dtype == True:
info.append(type(v))
return info
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册