diff --git a/ppdet/modeling/__init__.py b/ppdet/modeling/__init__.py index e17aa99c217717dac05f9c33566edb9e483e26f4..36b9e0d867e2aec2ee0319522ea6785ad32c6cc5 100644 --- a/ppdet/modeling/__init__.py +++ b/ppdet/modeling/__init__.py @@ -1,11 +1,13 @@ from . import ops -from . import anchor +from . import bbox +from . import mask from . import backbone from . import head from . import architecture from .ops import * -from .anchor import * +from .bbox import * +from .mask import * from .backbone import * from .head import * from .architecture import * diff --git a/ppdet/modeling/architecture/__init__.py b/ppdet/modeling/architecture/__init__.py index fd60774597f473c2e29dbf525142e41916431c84..e83f20b745301e4ea1d959b492a08ab63d9dcf86 100644 --- a/ppdet/modeling/architecture/__init__.py +++ b/ppdet/modeling/architecture/__init__.py @@ -9,8 +9,10 @@ from . import meta_arch from . import faster_rcnn from . import mask_rcnn from . import yolo +from . import cascade_rcnn from .meta_arch import * from .faster_rcnn import * from .mask_rcnn import * from .yolo import * +from .cascade_rcnn import * diff --git a/ppdet/modeling/architecture/cascade_rcnn.py b/ppdet/modeling/architecture/cascade_rcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..bdcb6cb51230a606a4b267ff5a61a8272d1ca881 --- /dev/null +++ b/ppdet/modeling/architecture/cascade_rcnn.py @@ -0,0 +1,127 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from paddle import fluid +from ppdet.core.workspace import register +from .meta_arch import BaseArch + +__all__ = ['CascadeRCNN'] + + +@register +class CascadeRCNN(BaseArch): + __category__ = 'architecture' + __shared__ = ['num_stages'] + __inject__ = [ + 'anchor', + 'proposal', + 'mask', + 'backbone', + 'rpn_head', + 'bbox_head', + 'mask_head', + ] + + def __init__(self, + anchor, + proposal, + mask, + backbone, + rpn_head, + bbox_head, + mask_head, + num_stages=3, + *args, + **kwargs): + super(CascadeRCNN, self).__init__(*args, **kwargs) + self.anchor = anchor + self.proposal = proposal + self.mask = mask + self.backbone = backbone + self.rpn_head = rpn_head + self.bbox_head = bbox_head + self.mask_head = mask_head + self.num_stages = num_stages + + def model_arch(self, ): + # Backbone + bb_out = self.backbone(self.gbd) + self.gbd.update(bb_out) + + # RPN + rpn_head_out = self.rpn_head(self.gbd) + self.gbd.update(rpn_head_out) + + # Anchor + anchor_out = self.anchor(self.gbd) + self.gbd.update(anchor_out) + + self.gbd['stage'] = 0 + for i in range(self.num_stages): + self.gbd.update_v('stage', i) + # Proposal BBox + proposal_out = self.proposal(self.gbd) + self.gbd.update({"proposal_" + str(i): proposal_out}) + + # BBox Head + bbox_head_out = self.bbox_head(self.gbd) + self.gbd.update({'bbox_head_' + str(i): bbox_head_out}) + + refine_bbox_out = self.proposal.refine_bbox(self.gbd) + self.gbd['proposal_' + str(i)].update(refine_bbox_out) + + if self.gbd['mode'] == 'infer': + bbox_out = self.proposal.post_process(self.gbd) + self.gbd.update(bbox_out) + + # Mask + mask_out = self.mask(self.gbd) + self.gbd.update(mask_out) + + # Mask Head + mask_head_out = self.mask_head(self.gbd) + self.gbd.update(mask_head_out) + + if self.gbd['mode'] == 'infer': + mask_out = self.mask.post_process(self.gbd) + self.gbd.update(mask_out) + + def loss(self, ): + outs = {} + losses = [] + + rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd) + outs['loss_rpn_cls'] = rpn_cls_loss + outs['loss_rpn_reg'] = rpn_reg_loss + losses.extend([rpn_cls_loss, rpn_reg_loss]) + + bbox_cls_loss_list = [] + bbox_reg_loss_list = [] + for i in range(self.num_stages): + self.gbd.update_v('stage', i) + bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd) + bbox_cls_loss_list.append(bbox_cls_loss) + bbox_reg_loss_list.append(bbox_reg_loss) + outs['loss_bbox_cls_' + str(i)] = bbox_cls_loss + outs['loss_bbox_reg_' + str(i)] = bbox_reg_loss + losses.extend(bbox_cls_loss_list) + losses.extend(bbox_reg_loss_list) + + mask_loss = self.mask_head.loss(self.gbd) + outs['mask_loss'] = mask_loss + losses.append(mask_loss) + + loss = fluid.layers.sum(losses) + outs['loss'] = loss + return outs + + def infer(self, ): + outs = { + 'bbox': self.gbd['predicted_bbox'].numpy(), + 'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(), + 'mask': self.gbd['predicted_mask'].numpy(), + 'im_id': self.gbd['im_id'].numpy(), + 'im_shape': self.gbd['im_shape'].numpy() + } + return inputs diff --git a/ppdet/modeling/architecture/faster_rcnn.py b/ppdet/modeling/architecture/faster_rcnn.py index bb600a4d132e471df33a691c5aede408979f134b..33c6945f1420ef0cde55c9a0740191516e836328 100644 --- a/ppdet/modeling/architecture/faster_rcnn.py +++ b/ppdet/modeling/architecture/faster_rcnn.py @@ -3,7 +3,6 @@ from __future__ import division from __future__ import print_function from paddle import fluid - from ppdet.core.workspace import register from .meta_arch import BaseArch @@ -21,27 +20,16 @@ class FasterRCNN(BaseArch): 'bbox_head', ] - def __init__(self, - anchor, - proposal, - backbone, - rpn_head, - bbox_head, - rpn_only=False, - mode='train'): - super(FasterRCNN, self).__init__() + def __init__(self, anchor, proposal, backbone, rpn_head, bbox_head, *args, + **kwargs): + super(FasterRCNN, self).__init__(*args, **kwargs) self.anchor = anchor self.proposal = proposal self.backbone = backbone self.rpn_head = rpn_head self.bbox_head = bbox_head - self.rpn_only = rpn_only - self.mode = mode - - def forward(self, inputs, inputs_keys): - self.gbd = self.build_inputs(inputs, inputs_keys) - self.gbd['mode'] = self.mode + def model_arch(self, ): # Backbone bb_out = self.backbone(self.gbd) self.gbd.update(bb_out) @@ -55,29 +43,21 @@ class FasterRCNN(BaseArch): self.gbd.update(anchor_out) # Proposal BBox + self.gbd['stage'] = 0 proposal_out = self.proposal(self.gbd) - self.gbd.update(proposal_out) + self.gbd.update({'proposal_0': proposal_out}) # BBox Head - bbox_head_out = self.bbox_head(self.gbd) - self.gbd.update(bbox_head_out) + bboxhead_out = self.bbox_head(self.gbd) + self.gbd.update({'bbox_head_0': bboxhead_out}) if self.gbd['mode'] == 'infer': bbox_out = self.proposal.post_process(self.gbd) self.gbd.update(bbox_out) - # result - if self.gbd['mode'] == 'train': - return self.loss(self.gbd) - elif self.gbd['mode'] == 'infer': - return self.infer(self.gbd) - else: - raise "Now, only support train or infer mode!" - - def loss(self, inputs): - losses = [] - rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(inputs) - bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(inputs) + def loss(self, ): + rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd) + bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd) losses = [rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss] loss = fluid.layers.sum(losses) out = { @@ -89,11 +69,11 @@ class FasterRCNN(BaseArch): } return out - def infer(self, inputs): + def infer(self, ): outs = { - "bbox": inputs['predicted_bbox'].numpy(), - "bbox_nums": inputs['predicted_bbox_nums'].numpy(), - 'im_id': inputs['im_id'].numpy(), - 'im_shape': inputs['im_shape'].numpy() + "bbox": self.gbd['predicted_bbox'].numpy(), + "bbox_nums": self.gbd['predicted_bbox_nums'].numpy(), + 'im_id': self.gbd['im_id'].numpy(), + 'im_shape': self.gbd['im_shape'].numpy() } return outs diff --git a/ppdet/modeling/architecture/mask_rcnn.py b/ppdet/modeling/architecture/mask_rcnn.py index b42466a3f39933d1a60b3fd6f972c5837d289991..1b462c2e40e4bf3cbbb0f5f88750696365c251e5 100644 --- a/ppdet/modeling/architecture/mask_rcnn.py +++ b/ppdet/modeling/architecture/mask_rcnn.py @@ -3,11 +3,10 @@ from __future__ import division from __future__ import print_function from paddle import fluid - from ppdet.core.workspace import register from ppdet.utils.data_structure import BufferDict - from .meta_arch import BaseArch + __all__ = ['MaskRCNN'] @@ -24,18 +23,9 @@ class MaskRCNN(BaseArch): 'mask_head', ] - def __init__(self, - anchor, - proposal, - mask, - backbone, - rpn_head, - bbox_head, - mask_head, - rpn_only=False, - mode='train'): - super(MaskRCNN, self).__init__() - + def __init__(self, anchor, proposal, mask, backbone, rpn_head, bbox_head, + mask_head, *args, **kwargs): + super(MaskRCNN, self).__init__(*args, **kwargs) self.anchor = anchor self.proposal = proposal self.mask = mask @@ -43,12 +33,8 @@ class MaskRCNN(BaseArch): self.rpn_head = rpn_head self.bbox_head = bbox_head self.mask_head = mask_head - self.mode = mode - - def forward(self, inputs, inputs_keys): - self.gbd = self.build_inputs(inputs, inputs_keys) - self.gbd['mode'] = mode + def model_arch(self, ): # Backbone bb_out = self.backbone(self.gbd) self.gbd.update(bb_out) @@ -62,12 +48,13 @@ class MaskRCNN(BaseArch): self.gbd.update(anchor_out) # Proposal BBox + self.gbd['stage'] = 0 proposal_out = self.proposal(self.gbd) - self.gbd.update(proposal_out) + self.gbd.update({'proposal_0': proposal_out}) # BBox Head - bbox_head_out = self.bbox_head(self.gbd) - self.gbd.update(bbox_head_out) + bboxhead_out = self.bbox_head(self.gbd) + self.gbd.update({'bbox_head_0': bboxhead_out}) if self.gbd['mode'] == 'infer': bbox_out = self.proposal.post_process(self.gbd) @@ -85,19 +72,11 @@ class MaskRCNN(BaseArch): mask_out = self.mask.post_process(self.gbd) self.gbd.update(mask_out) - # result - if self.gbd['mode'] == 'train': - return self.loss(self.gbd) - elif self.gbd['mode'] == 'infer': - self.infer(self.gbd) - else: - raise "Now, only support train or infer mode!" - - def loss(self, inputs): + def loss(self, ): losses = [] - rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(inputs) - bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(inputs) - mask_loss = self.mask_head.loss(inputs) + rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(self.gbd) + bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(self.gbd) + mask_loss = self.mask_head.loss(self.gbd) losses = [ rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss, mask_loss ] @@ -112,12 +91,12 @@ class MaskRCNN(BaseArch): } return out - def infer(self, inputs): + def infer(self, ): outs = { - 'bbox': inputs['predicted_bbox'].numpy(), - 'bbox_nums': inputs['predicted_bbox_nums'].numpy(), - 'mask': inputs['predicted_mask'].numpy(), - 'im_id': inputs['im_id'].numpy(), - 'im_shape': inputs['im_shape'].numpy() + 'bbox': self.gbd['predicted_bbox'].numpy(), + 'bbox_nums': self.gbd['predicted_bbox_nums'].numpy(), + 'mask': self.gbd['predicted_mask'].numpy(), + 'im_id': self.gbd['im_id'].numpy(), + 'im_shape': self.gbd['im_shape'].numpy() } return inputs diff --git a/ppdet/modeling/architecture/meta_arch.py b/ppdet/modeling/architecture/meta_arch.py index c3bec696351ed07b0a5850966e628714d26c689f..1b0dcaa037ece9ca93d82e42450a80bce50ed68e 100644 --- a/ppdet/modeling/architecture/meta_arch.py +++ b/ppdet/modeling/architecture/meta_arch.py @@ -3,11 +3,8 @@ from __future__ import division from __future__ import print_function import numpy as np - -from paddle import fluid from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph.base import to_variable - from ppdet.core.workspace import register from ppdet.utils.data_structure import BufferDict @@ -16,22 +13,43 @@ __all__ = ['BaseArch'] @register class BaseArch(Layer): - def __init__(self, mode='train', *args, **kwargs): + def __init__(self, *args, **kwargs): super(BaseArch, self).__init__() - self.mode = mode + self.args = args + self.kwargs = kwargs - def forward(self, inputs, inputs_keys, mode='train'): - raise NotImplementedError("Should implement forward method!") + def forward(self, inputs, inputs_keys): + self.gbd = BufferDict() + self.gbd.update(self.kwargs) + assert self.gbd[ + 'mode'] is not None, "Please specify mode train or infer in config file!" + if self.kwargs['open_debug'] is None: + self.gbd['open_debug'] = False - def loss(self, inputs): - raise NotImplementedError("Should implement loss method!") + self.build_inputs(inputs, inputs_keys) - def infer(self, inputs): - raise NotImplementedError("Should implement infer method!") + self.model_arch() + + self.gbd.debug() + + if self.gbd['mode'] == 'train': + out = self.loss() + elif self.gbd['mode'] == 'infer': + out = self.infer() + else: + raise "Now, only support train or infer mode!" + return out def build_inputs(self, inputs, inputs_keys): - gbd = BufferDict() for i, k in enumerate(inputs_keys): v = to_variable(np.array([x[i] for x in inputs])) - gbd.set(k, v) - return gbd + self.gbd.set(k, v) + + def model_arch(self, ): + raise NotImplementedError("Should implement model_arch method!") + + def loss(self, ): + raise NotImplementedError("Should implement loss method!") + + def infer(self, ): + raise NotImplementedError("Should implement infer method!") diff --git a/ppdet/modeling/architecture/yolo.py b/ppdet/modeling/architecture/yolo.py index e095061901a56dfa65dc7006e52362f2a8b37d73..8235604a15a7af7315d8be015ccde134cb9e9ce8 100644 --- a/ppdet/modeling/architecture/yolo.py +++ b/ppdet/modeling/architecture/yolo.py @@ -2,8 +2,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from paddle import fluid - from ppdet.core.workspace import register from .meta_arch import BaseArch @@ -19,17 +17,13 @@ class YOLOv3(BaseArch): 'yolo_head', ] - def __init__(self, anchor, backbone, yolo_head, mode='train'): - super(YOLOv3, self).__init__() + def __init__(self, anchor, backbone, yolo_head, *args, **kwargs): + super(YOLOv3, self).__init__(*args, **kwargs) self.anchor = anchor self.backbone = backbone self.yolo_head = yolo_head - self.mode = mode - - def forward(self, inputs, inputs_keys): - self.gbd = self.build_inputs(inputs, inputs_keys) - self.gbd['mode'] = self.mode + def model_arch(self, ): # Backbone bb_out = self.backbone(self.gbd) self.gbd.update(bb_out) @@ -46,23 +40,14 @@ class YOLOv3(BaseArch): bbox_out = self.anchor.post_process(self.gbd) self.gbd.update(bbox_out) - # result - if self.gbd['mode'] == 'train': - return self.loss(self.gbd) - elif self.gbd['mode'] == 'infer': - return self.infer(self.gbd) - else: - raise "Now, only support train or infer mode!" - - def loss(self, inputs): - yolo_loss = self.yolo_head.loss(inputs) - out = {'loss': yolo_loss, } + def loss(self, ): + yolo_loss = self.yolo_head.loss(self.gbd) + out = {'loss': yolo_loss} return out - def infer(self, inputs): + def infer(self, ): outs = { - "bbox": inputs['predicted_bbox'].numpy(), - "bbox_nums": inputs['predicted_bbox_nums'] + "bbox": self.gbd['predicted_bbox'].numpy(), + "bbox_nums": self.gbd['predicted_bbox_nums'] } - print(outs['bbox_nums']) return outs diff --git a/ppdet/modeling/backbone/resnet.py b/ppdet/modeling/backbone/resnet.py index d8369ffcf3b84355def16fb183699b97ed01b262..87417e84906190d4d7614462060cfd75eaa68755 100755 --- a/ppdet/modeling/backbone/resnet.py +++ b/ppdet/modeling/backbone/resnet.py @@ -4,7 +4,6 @@ from paddle.fluid.dygraph import Layer from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm from paddle.fluid.param_attr import ParamAttr from paddle.fluid.initializer import Constant - from ppdet.core.workspace import register, serializable @@ -17,10 +16,10 @@ class ConvBNLayer(Layer): stride, padding, act='relu', - learning_rate=1.0): + lr=1.0): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( + self.conv = Conv2D( num_channels=ch_in, num_filters=ch_out, filter_size=filter_size, @@ -29,26 +28,23 @@ class ConvBNLayer(Layer): groups=1, act=act, param_attr=ParamAttr( - name=name_scope + "_weights", learning_rate=learning_rate), + name=name_scope + "_weights", learning_rate=lr), bias_attr=ParamAttr(name=name_scope + "_bias")) - if name_scope == "conv1": bn_name = "bn_" + name_scope else: bn_name = "bn" + name_scope[3:] - - self._bn = BatchNorm( + self.bn = BatchNorm( num_channels=ch_out, act=act, param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), + bias_attr=ParamAttr(name=bn_name + '_offset'), moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - is_test=True) + moving_variance_name=bn_name + '_variance') def forward(self, inputs): - x = self._conv(inputs) - out = self._bn(x) + out = self.conv(inputs) + out = self.bn(out) return out @@ -60,11 +56,11 @@ class ConvAffineLayer(Layer): filter_size, stride, padding, - learning_rate=1.0, + lr=1.0, act='relu'): super(ConvAffineLayer, self).__init__() - self._conv = Conv2D( + self.conv = Conv2D( num_channels=ch_in, num_filters=ch_out, filter_size=filter_size, @@ -72,36 +68,34 @@ class ConvAffineLayer(Layer): padding=padding, act=None, param_attr=ParamAttr( - name=name_scope + "_weights", learning_rate=learning_rate), + name=name_scope + "_weights", learning_rate=lr), bias_attr=False) - if name_scope == "conv1": bn_name = "bn_" + name_scope else: bn_name = "bn" + name_scope[3:] - self.name_scope = name_scope - - self.scale = fluid.Layer.create_parameter( + self.scale = fluid.layers.create_parameter( shape=[ch_out], dtype='float32', attr=ParamAttr( name=bn_name + '_scale', learning_rate=0.), default_initializer=Constant(1.)) - self.bias = fluid.layers.create_parameter( + + self.offset = fluid.layers.create_parameter( shape=[ch_out], dtype='float32', attr=ParamAttr( - bn_name + '_offset', learning_rate=0.), + name=bn_name + '_offset', learning_rate=0.), default_initializer=Constant(0.)) self.act = act def forward(self, inputs): - conv = self._conv(inputs) + out = self.conv(inputs) out = fluid.layers.affine_channel( - x=conv, scale=self.scale, bias=self.bias) + out, scale=self.scale, bias=self.offset) if self.act == 'relu': - out = fluid.layers.relu(x=out) + out = fluid.layers.relu(out) return out @@ -112,12 +106,21 @@ class BottleNeck(Layer): ch_out, stride, shortcut=True, - learning_rate=1.0): + lr=1.0, + norm_type='bn'): super(BottleNeck, self).__init__() + self.name_scope = name_scope + if norm_type == 'bn': + atom_block = ConvBNLayer + elif norm_type == 'affine': + atom_block = ConvAffineLayer + else: + atom_block = None + assert atom_block != None, 'NormType only support BatchNorm and Affine!' self.shortcut = shortcut if not shortcut: - self.short = ConvBNLayer( + self.branch1 = atom_block( name_scope + "_branch1", ch_in=ch_in, ch_out=ch_out * 4, @@ -125,52 +128,48 @@ class BottleNeck(Layer): stride=stride, padding=0, act=None, - learning_rate=learning_rate) + lr=lr) - self.conv1 = ConvBNLayer( + self.branch2a = atom_block( name_scope + "_branch2a", ch_in=ch_in, ch_out=ch_out, filter_size=1, stride=stride, padding=0, - learning_rate=learning_rate, ) + lr=lr) - self.conv2 = ConvBNLayer( + self.branch2b = atom_block( name_scope + "_branch2b", ch_in=ch_out, ch_out=ch_out, filter_size=3, stride=1, padding=1, - learning_rate=learning_rate) + lr=lr) - self.conv3 = ConvBNLayer( + self.branch2c = atom_block( name_scope + "_branch2c", ch_in=ch_out, ch_out=ch_out * 4, filter_size=1, stride=1, padding=0, - learning_rate=learning_rate, + lr=lr, act=None) - self.name_scope = name_scope def forward(self, inputs): if self.shortcut: short = inputs else: - short = self.short(inputs) + short = self.branch1(inputs) - conv1 = self.conv1(inputs) - conv2 = self.conv2(conv1) - conv3 = self.conv3(conv2) + out = self.branch2a(inputs) + out = self.branch2b(out) + out = self.branch2c(out) out = fluid.layers.elementwise_add( - x=short, - y=conv3, - act='relu', - name=self.name_scope + ".add.output.5") + x=short, y=out, act='relu', name=self.name_scope + ".add.output.5") return out @@ -182,7 +181,8 @@ class Blocks(Layer): ch_out, count, stride, - learning_rate=1.0): + lr=1.0, + norm_type='bn'): super(Blocks, self).__init__() self.blocks = [] @@ -204,7 +204,8 @@ class Blocks(Layer): ch_out=ch_out, stride=self.stride, shortcut=self.shortcut, - learning_rate=learning_rate)) + lr=lr, + norm_type=norm_type)) self.blocks.append(block) shortcut = True @@ -215,62 +216,75 @@ class Blocks(Layer): return res_out +ResNet_cfg = {'50': [3, 4, 6, 3], '101': [3, 4, 23, 3], '152': [3, 8, 36, 3]} + + @register @serializable class ResNet(Layer): - def __init__( - self, - norm_type='bn', - depth=50, - feature_maps=4, - freeze_at=2, ): + def __init__(self, depth=50, norm_type='bn', freeze_at='res2'): super(ResNet, self).__init__() + self.depth = depth + self.norm_type = norm_type + self.freeze_at = freeze_at + + block_nums = ResNet_cfg[str(self.depth)] + if self.norm_type == 'bn': + atom_block = ConvBNLayer + elif self.norm_type == 'affine': + atom_block = ConvAffineLayer + else: + atom_block = None + assert atom_block != None, 'NormType only support BatchNorm and Affine!' - if depth == 50: - blocks = [3, 4, 6, 3] - elif depth == 101: - blocks = [3, 4, 23, 3] - elif depth == 152: - blocks = [3, 8, 36, 3] + self.conv1 = atom_block( + 'conv1', ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3) - self.conv = ConvBNLayer( - "conv1", - ch_in=3, - ch_out=64, - filter_size=7, - stride=2, - padding=3, - learning_rate=0.) - - self.pool2d_max = Pool2D( + self.pool = Pool2D( pool_type='max', pool_size=3, pool_stride=2, pool_padding=1) self.stage2 = Blocks( "res2", ch_in=64, ch_out=64, - count=blocks[0], + count=block_nums[0], stride=1, - learning_rate=0.) + norm_type=norm_type) self.stage3 = Blocks( - "res3", ch_in=256, ch_out=128, count=blocks[1], stride=2) + "res3", + ch_in=256, + ch_out=128, + count=block_nums[1], + stride=2, + norm_type=norm_type) self.stage4 = Blocks( - "res4", ch_in=512, ch_out=256, count=blocks[2], stride=2) + "res4", + ch_in=512, + ch_out=256, + count=block_nums[2], + stride=2, + norm_type=norm_type) def forward(self, inputs): x = inputs['image'] - conv1 = self.conv(x) - poo1 = self.pool2d_max(conv1) + conv1 = self.conv1(x) + + pool1 = self.pool(conv1) - res2 = self.stage2(poo1) - res2.stop_gradient = True + res2 = self.stage2(pool1) res3 = self.stage3(res2) res4 = self.stage4(res3) - outs = {'res2': res2, 'res3': res3, 'res4': res4} + outs = { + 'res2': res2, + 'res3': res3, + 'res4': res4, + 'res_norm_type': self.norm_type + } + outs[self.freeze_at].stop_gradient = True return outs diff --git a/ppdet/modeling/anchor.py b/ppdet/modeling/bbox.py similarity index 54% rename from ppdet/modeling/anchor.py rename to ppdet/modeling/bbox.py index bd3b0c0a37b31514e6d40f7335b04d7cb1e68b29..b91f6adec72089779bde98c93ad78b1d3702508a 100644 --- a/ppdet/modeling/anchor.py +++ b/ppdet/modeling/bbox.py @@ -1,29 +1,27 @@ import numpy as np import paddle.fluid as fluid - from ppdet.core.workspace import register -from ppdet.modeling.ops import ( - AnchorGeneratorYOLO, AnchorTargetGeneratorYOLO, AnchorGeneratorRPN, - AnchorTargetGeneratorRPN, ProposalGenerator, ProposalTargetGenerator, - MaskTargetGenerator, DecodeClipNms, YOLOBox, MultiClassNMS) -# TODO: modify here into ppdet.modeling.ops like DecodeClipNms -from ppdet.py_op.post_process import mask_post_process @register class BBoxPostProcess(object): + __shared__ = ['num_classes', 'num_stages'] + __inject__ = ['decode_clip_nms'] + def __init__(self, + decode_clip_nms, + num_classes=81, + num_stages=1, decode=None, clip=None, - nms=None, - decode_clip_nms=DecodeClipNms().__dict__): + nms=None): super(BBoxPostProcess, self).__init__() + self.num_classes = num_classes + self.num_stages = num_stages self.decode = decode self.clip = clip self.nms = nms self.decode_clip_nms = decode_clip_nms - if isinstance(decode_clip_nms, dict): - self.decode_clip_nms = DecodeClipNms(**decode_clip_nms) def __call__(self, inputs): # TODO: split into 3 steps @@ -31,8 +29,25 @@ class BBoxPostProcess(object): # decode # clip # nms - outs = self.decode_clip_nms(inputs['rpn_rois'], inputs['bbox_prob'], - inputs['bbox_delta'], inputs['im_info']) + if self.num_stages > 0: + bbox_prob_list = [] + for i in range(self.num_stages): + bbox_prob_list.append(inputs['bbox_head_' + str(i)][ + 'bbox_prob']) + bbox_prob = fluid.layers.sum(bbox_prob_list) / float( + len(bbox_prob_list)) + bbox_delta = inputs['bbox_head_' + str(i)]['bbox_delta'] + if inputs['bbox_head_0']['cls_agnostic_bbox_reg'] == 2: + bbox_delta = fluid.layers.slice( + bbox_delta, axes=1, starts=[1], ends=[2]) + bbox_delta = fluid.layers.expand(bbox_delta, + [1, self.num_classes, 1]) + else: + bbox_prob = inputs['bbox_prob'] + bbox_delta = inputs['bbox_delta'] + + outs = self.decode_clip_nms(inputs['rpn_rois'], bbox_prob, bbox_delta, + inputs['im_info']) outs = {"predicted_bbox_nums": outs[0], "predicted_bbox": outs[1]} return outs @@ -40,22 +55,15 @@ class BBoxPostProcess(object): @register class BBoxPostProcessYOLO(object): __shared__ = ['num_classes'] + __inject__ = ['yolo_box', 'nms'] - def __init__(self, - num_classes=80, - decode=None, - clip=None, - yolo_box=YOLOBox().__dict__, - nms=MultiClassNMS().__dict__): + def __init__(self, yolo_box, nms, num_classes=80, decode=None, clip=None): super(BBoxPostProcessYOLO, self).__init__() + self.yolo_box = yolo_box + self.nms = nms self.num_classes = num_classes self.decode = decode self.clip = clip - self.nms = nms - if isinstance(yolo_box, dict): - self.yolo_box = YOLOBox(**yolo_box) - if isinstance(nms, dict): - self.nms = MultiClassNMS(**nms) def __call__(self, inputs): # TODO: split yolo_box into 2 steps @@ -80,40 +88,14 @@ class BBoxPostProcessYOLO(object): return outs -@register -class MaskPostProcess(object): - __shared__ = ['num_classes'] - - def __init__(self, num_classes=81): - super(MaskPostProcess, self).__init__() - self.num_classes = num_classes - - def __call__(self, inputs): - # TODO: modify related ops for deploying - outs = mask_post_process(inputs['predicted_bbox_nums'].numpy(), - inputs['predicted_bbox'].numpy(), - inputs['mask_logits'].numpy(), - inputs['im_info'].numpy()) - outs = {'predicted_mask': outs} - return outs - - @register class AnchorRPN(object): __inject__ = ['anchor_generator', 'anchor_target_generator'] - def __init__(self, - anchor_type='rpn', - anchor_generator=AnchorGeneratorRPN().__dict__, - anchor_target_generator=AnchorTargetGeneratorRPN().__dict__): + def __init__(self, anchor_generator, anchor_target_generator): super(AnchorRPN, self).__init__() self.anchor_generator = anchor_generator self.anchor_target_generator = anchor_target_generator - if isinstance(anchor_generator, dict): - self.anchor_generator = AnchorGeneratorRPN(**anchor_generator) - if isinstance(anchor_target_generator, dict): - self.anchor_target_generator = AnchorTargetGeneratorRPN( - **anchor_target_generator) def __call__(self, inputs): outs = self.generate_anchors(inputs) @@ -122,7 +104,7 @@ class AnchorRPN(object): def generate_anchors(self, inputs): # TODO: update here to use int to specify featmap size outs = self.anchor_generator(inputs['rpn_feat']) - outs = {'anchor': outs[0], 'var': outs[1], 'anchor_module': self} + outs = {'anchor': outs[0], 'anchor_var': outs[1], 'anchor_module': self} return outs def generate_anchors_target(self, inputs): @@ -143,7 +125,8 @@ class AnchorRPN(object): anchor_box=anchor, gt_boxes=inputs['gt_bbox'], is_crowd=inputs['is_crowd'], - im_info=inputs['im_info']) + im_info=inputs['im_info'], + open_debug=inputs['open_debug']) outs = { 'rpn_score_pred': score_pred, 'rpn_score_target': score_tgt, @@ -160,22 +143,12 @@ class AnchorYOLO(object): 'anchor_generator', 'anchor_target_generator', 'anchor_post_process' ] - def __init__(self, - anchor_generator=AnchorGeneratorYOLO().__dict__, - anchor_target_generator=AnchorTargetGeneratorYOLO().__dict__, - anchor_post_process=BBoxPostProcessYOLO().__dict__): + def __init__(self, anchor_generator, anchor_target_generator, + anchor_post_process): super(AnchorYOLO, self).__init__() self.anchor_generator = anchor_generator self.anchor_target_generator = anchor_target_generator self.anchor_post_process = anchor_post_process - if isinstance(anchor_generator, dict): - self.anchor_generator = AnchorGeneratorYOLO(**anchor_generator) - if isinstance(anchor_target_generator, dict): - self.anchor_target_generator = AnchorTargetGeneratorYOLO( - **anchor_target_generator) - if isinstance(anchor_post_process, dict): - self.anchor_post_process = BBoxPostProcessYOLO( - **anchor_post_process) def __call__(self, inputs): outs = self.generate_anchors(inputs) @@ -200,30 +173,20 @@ class Proposal(object): 'proposal_generator', 'proposal_target_generator', 'bbox_post_process' ] - def __init__( - self, - proposal_generator=ProposalGenerator().__dict__, - proposal_target_generator=ProposalTargetGenerator().__dict__, - bbox_post_process=BBoxPostProcess().__dict__, ): + def __init__(self, proposal_generator, proposal_target_generator, + bbox_post_process): super(Proposal, self).__init__() self.proposal_generator = proposal_generator self.proposal_target_generator = proposal_target_generator self.bbox_post_process = bbox_post_process - if isinstance(proposal_generator, dict): - self.proposal_generator = ProposalGenerator(**proposal_generator) - if isinstance(proposal_target_generator, dict): - self.proposal_target_generator = ProposalTargetGenerator( - **proposal_target_generator) - if isinstance(bbox_post_process, dict): - self.bbox_post_process = BBoxPostProcess(**bbox_post_process) - - def __call__(self, inputs, stage=0): + + def __call__(self, inputs): outs = {} - if stage == 0: + if inputs['stage'] == 0: proposal_out = self.generate_proposal(inputs) inputs.update(proposal_out) if inputs['mode'] == 'train': - proposal_target_out = self.generate_proposal_target(inputs, stage) + proposal_target_out = self.generate_proposal_target(inputs) outs.update(proposal_target_out) return outs @@ -234,7 +197,7 @@ class Proposal(object): scores=rpn_rois_prob, bbox_deltas=inputs['rpn_rois_delta'], anchors=inputs['anchor'], - variances=inputs['var'], + variances=inputs['anchor_var'], im_info=inputs['im_info'], mode=inputs['mode']) outs = { @@ -244,15 +207,24 @@ class Proposal(object): } return outs - def generate_proposal_target(self, inputs, stage=0): + def generate_proposal_target(self, inputs): + if inputs['stage'] == 0: + rois = inputs['rpn_rois'] + rois_num = inputs['rpn_rois_nums'] + elif inputs['stage'] > 0: + last_proposal_out = inputs['proposal_' + str(inputs['stage'] - 1)] + rois = last_proposal_out['refined_bbox'] + rois_num = last_proposal_out['rois_nums'] + outs = self.proposal_target_generator( - rpn_rois=inputs['rpn_rois'], - rpn_rois_nums=inputs['rpn_rois_nums'], + rpn_rois=rois, + rpn_rois_nums=rois_num, gt_classes=inputs['gt_class'], is_crowd=inputs['is_crowd'], gt_boxes=inputs['gt_bbox'], im_info=inputs['im_info'], - stage=stage) + stage=inputs['stage'], + open_debug=inputs['open_debug']) outs = { 'rois': outs[0], 'labels_int32': outs[1], @@ -263,49 +235,31 @@ class Proposal(object): } return outs - def post_process(self, inputs): - outs = self.bbox_post_process(inputs) - return outs - - -@register -class Mask(object): - __inject__ = ['mask_target_generator', 'mask_post_process'] - - def __init__(self, - mask_target_generator=MaskTargetGenerator().__dict__, - mask_post_process=MaskPostProcess().__dict__): - super(Mask, self).__init__() - self.mask_target_generator = mask_target_generator - self.mask_post_process = mask_post_process - if isinstance(mask_target_generator, dict): - self.mask_target_generator = MaskTargetGenerator( - **mask_target_generator) - if isinstance(mask_post_process, dict): - self.mask_post_process = MaskPostProcess(**mask_post_process) - - def __call__(self, inputs): - outs = {} + def refine_bbox(self, inputs): if inputs['mode'] == 'train': - outs = self.generate_mask_target(inputs) - return outs - - def generate_mask_target(self, inputs): - outs = self.mask_target_generator( - im_info=inputs['im_info'], - gt_classes=inputs['gt_class'], - is_crowd=inputs['is_crowd'], - gt_segms=inputs['gt_mask'], - rois=inputs['rois'], - rois_nums=inputs['rois_nums'], - labels_int32=inputs['labels_int32'], ) - outs = { - 'mask_rois': outs[0], - 'rois_has_mask_int32': outs[1], - 'mask_int32': outs[2] - } + rois = inputs['proposal_' + str(inputs['stage'])]['rois'] + else: + rois = inputs['rpn_rois'] + bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])] + + bbox_delta_r = fluid.layers.reshape( + bbox_head_out['bbox_delta'], + (-1, inputs['bbox_head_0']['cls_agnostic_bbox_reg'], 4)) + bbox_delta_s = fluid.layers.slice( + bbox_delta_r, axes=[1], starts=[1], ends=[2]) + + refined_bbox = fluid.layers.box_coder( + prior_box=rois, + prior_box_var=self.proposal_target_generator.bbox_reg_weights[ + inputs['stage']], + target_box=bbox_delta_s, + code_type='decode_center_size', + box_normalized=False, + axis=1) + refined_bbox = fluid.layers.reshape(refined_bbox, shape=[-1, 4]) + outs = {'refined_bbox': refined_bbox} return outs def post_process(self, inputs): - outs = self.mask_post_process(inputs) + outs = self.bbox_post_process(inputs) return outs diff --git a/ppdet/modeling/head/bbox_head.py b/ppdet/modeling/head/bbox_head.py index ecd72d1b80863951e8cd10fb3905554532e30ccf..d1fe38937378d6dd645e165358516bd8ae2af0a9 100644 --- a/ppdet/modeling/head/bbox_head.py +++ b/ppdet/modeling/head/bbox_head.py @@ -1,41 +1,46 @@ import paddle.fluid as fluid from paddle.fluid.dygraph import Layer - from paddle.fluid.param_attr import ParamAttr from paddle.fluid.initializer import Normal, MSRA from paddle.fluid.regularizer import L2Decay from paddle.fluid.dygraph.nn import Conv2D, Pool2D from ppdet.core.workspace import register +# TODO: del import and use inject from ..backbone.resnet import Blocks -from ..ops import RoIExtractor @register class BBoxFeat(Layer): __inject__ = ['roi_extractor'] + __shared__ = ['num_stages'] - def __init__(self, - feat_in=1024, - feat_out=512, - roi_extractor=RoIExtractor().__dict__, - stage=0): + def __init__(self, roi_extractor, feat_in=1024, feat_out=512, num_stages=1): super(BBoxFeat, self).__init__() self.roi_extractor = roi_extractor - if isinstance(roi_extractor, dict): - self.roi_extractor = RoIExtractor(**roi_extractor) - if stage == 0: - postfix = '' - else: - postfix = '_' + str(stage) - self.res5 = Blocks( - "res5", ch_in=feat_in, ch_out=feat_out, count=3, stride=2) + self.num_stages = num_stages + self.res5s = [] + for i in range(self.num_stages): + if i == 0: + postfix = '' + else: + postfix = '_' + str(i) + # TODO: set norm type + res5 = Blocks( + "res5" + postfix, + ch_in=feat_in, + ch_out=feat_out, + count=3, + stride=2) + self.res5s.append(res5) self.res5_pool = fluid.dygraph.Pool2D( pool_type='avg', global_pooling=True) def forward(self, inputs): + if inputs['mode'] == 'train': - rois = inputs['rois'] - rois_num = inputs['rois_nums'] + in_rois = inputs['proposal_' + str(inputs['stage'])] + rois = in_rois['rois'] + rois_num = in_rois['rois_nums'] elif inputs['mode'] == 'infer': rois = inputs['rpn_rois'] rois_num = inputs['rpn_rois_nums'] @@ -44,14 +49,14 @@ class BBoxFeat(Layer): rois_feat = self.roi_extractor(inputs['res4'], rois, rois_num) # TODO: add others - y_res5 = self.res5(rois_feat) + y_res5 = self.res5s[inputs['stage']](rois_feat) y = self.res5_pool(y_res5) y = fluid.layers.squeeze(y, axes=[2, 3]) outs = { 'rois_feat': rois_feat, 'res5': y_res5, "bbox_feat": y, - 'shared_res5_block': self.res5, + 'shared_res5_block': self.res5s[inputs['stage']], 'shared_roi_extractor': self.roi_extractor } return outs @@ -59,79 +64,92 @@ class BBoxFeat(Layer): @register class BBoxHead(Layer): - __shared__ = ['num_classes'] __inject__ = ['bbox_feat'] + __shared__ = ['num_classes', 'num_stages'] def __init__(self, - in_feat=2048, + bbox_feat, + feat_in=2048, num_classes=81, - bbox_feat=BBoxFeat().__dict__, - stage=0): + cls_agnostic_bbox_reg=81, + num_stages=1): super(BBoxHead, self).__init__() - self.num_classes = num_classes self.bbox_feat = bbox_feat - if isinstance(bbox_feat, dict): - self.bbox_feat = BBoxFeat(**bbox_feat) - if stage == 0: - postfix = '' - else: - postfix = '_' + str(stage) - self.bbox_score = fluid.dygraph.Linear( - input_dim=in_feat, - output_dim=1 * self.num_classes, - act=None, - param_attr=ParamAttr( - name='cls_score_w' + postfix, - initializer=Normal( - loc=0.0, scale=0.001)), - bias_attr=ParamAttr( - name='cls_score_b' + postfix, - learning_rate=2., - regularizer=L2Decay(0.))) - - self.bbox_delta = fluid.dygraph.Linear( - input_dim=in_feat, - output_dim=4 * self.num_classes, - act=None, - param_attr=ParamAttr( - name='bbox_pred_w' + postfix, - initializer=Normal( - loc=0.0, scale=0.01)), - bias_attr=ParamAttr( - name='bbox_pred_b' + postfix, - learning_rate=2., - regularizer=L2Decay(0.))) + self.num_classes = num_classes + self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg + self.num_stages = num_stages + + self.bbox_scores = [] + self.bbox_deltas = [] + for i in range(self.num_stages): + if i == 0: + postfix = '' + else: + postfix = '_' + str(i) + bbox_score = fluid.dygraph.Linear( + input_dim=feat_in, + output_dim=1 * self.num_classes, + act=None, + param_attr=ParamAttr( + name='cls_score_w' + postfix, + initializer=Normal( + loc=0.0, scale=0.001)), + bias_attr=ParamAttr( + name='cls_score_b' + postfix, + learning_rate=2., + regularizer=L2Decay(0.))) + + bbox_delta = fluid.dygraph.Linear( + input_dim=feat_in, + output_dim=4 * self.cls_agnostic_bbox_reg, + act=None, + param_attr=ParamAttr( + name='bbox_pred_w' + postfix, + initializer=Normal( + loc=0.0, scale=0.01)), + bias_attr=ParamAttr( + name='bbox_pred_b' + postfix, + learning_rate=2., + regularizer=L2Decay(0.))) + self.bbox_scores.append(bbox_score) + self.bbox_deltas.append(bbox_delta) def forward(self, inputs): outs = self.bbox_feat(inputs) x = outs['bbox_feat'] - bs = self.bbox_score(x) - bd = self.bbox_delta(x) + bs = self.bbox_scores[inputs['stage']](x) + bd = self.bbox_deltas[inputs['stage']](x) outs.update({'bbox_score': bs, 'bbox_delta': bd}) + if inputs['stage'] == 0: + outs.update({"cls_agnostic_bbox_reg": self.cls_agnostic_bbox_reg}) if inputs['mode'] == 'infer': bbox_prob = fluid.layers.softmax(bs, use_cudnn=False) outs['bbox_prob'] = bbox_prob return outs def loss(self, inputs): + bbox_out = inputs['bbox_head_' + str(inputs['stage'])] + bbox_target = inputs['proposal_' + str(inputs['stage'])] + # bbox cls labels_int64 = fluid.layers.cast( - x=inputs['labels_int32'], dtype='int64') + x=bbox_target['labels_int32'], dtype='int64') labels_int64.stop_gradient = True - bbox_score = fluid.layers.reshape(inputs['bbox_score'], + bbox_score = fluid.layers.reshape(bbox_out['bbox_score'], (-1, self.num_classes)) loss_bbox_cls = fluid.layers.softmax_with_cross_entropy( logits=bbox_score, label=labels_int64) loss_bbox_cls = fluid.layers.reduce_mean( - loss_bbox_cls, name='loss_bbox_cls') + loss_bbox_cls, name='loss_bbox_cls_' + str(inputs['stage'])) + # bbox reg loss_bbox_reg = fluid.layers.smooth_l1( - x=inputs['bbox_delta'], - y=inputs['bbox_targets'], - inside_weight=inputs['bbox_inside_weights'], - outside_weight=inputs['bbox_outside_weights'], + x=bbox_out['bbox_delta'], + y=bbox_target['bbox_targets'], + inside_weight=bbox_target['bbox_inside_weights'], + outside_weight=bbox_target['bbox_outside_weights'], sigma=1.0) loss_bbox_reg = fluid.layers.reduce_mean( - loss_bbox_reg, name='loss_bbox_loc') + loss_bbox_reg, name='loss_bbox_loc_' + str(inputs['stage'])) return loss_bbox_cls, loss_bbox_reg diff --git a/ppdet/modeling/head/mask_head.py b/ppdet/modeling/head/mask_head.py index 1e904c0b019b1d991facb16f0828939b544b4db2..c65020f05b22898729dc0985857e46b09acb7715 100644 --- a/ppdet/modeling/head/mask_head.py +++ b/ppdet/modeling/head/mask_head.py @@ -6,55 +6,54 @@ from paddle.fluid.initializer import Normal, MSRA from paddle.fluid.regularizer import L2Decay from paddle.fluid.dygraph.nn import Conv2D, Pool2D from ppdet.core.workspace import register -from ..ops import RoIExtractor +# TODO: del it and use inject from ..backbone.resnet import Blocks @register class MaskFeat(Layer): - __inject__ = ['mask_roi_extractor'] - - def __init__(self, - feat_in=2048, - feat_out=256, - mask_roi_extractor=RoIExtractor().__dict__, - stage=0): + def __init__(self, feat_in=2048, feat_out=256, mask_stages=1): super(MaskFeat, self).__init__() self.feat_in = feat_in self.feat_out = feat_out - self.mask_roi_extractor = mask_roi_extractor - if isinstance(mask_roi_extractor, dict): - self.mask_roi_extractor = RoIExtractor(**mask_roi_extractor) - if stage == 0: - postfix = '' - else: - postfix = '_' + str(stage) - self.upsample = fluid.dygraph.Conv2DTranspose( - num_channels=self.feat_in, - num_filters=self.feat_out, - filter_size=2, - stride=2, - act='relu', - param_attr=ParamAttr( - name='conv5_mask_w' + postfix, initializer=MSRA(uniform=False)), - bias_attr=ParamAttr( - name='conv5_mask_b' + postfix, - learning_rate=2., - regularizer=L2Decay(0.))) + self.mask_stages = mask_stages + + for i in range(self.mask_stages): + if i == 0: + postfix = '' + else: + postfix = '_' + str(i) + self.upsample = fluid.dygraph.Conv2DTranspose( + num_channels=self.feat_in, + num_filters=self.feat_out, + filter_size=2, + stride=2, + act='relu', + param_attr=ParamAttr( + name='conv5_mask_w' + postfix, + initializer=MSRA(uniform=False)), + bias_attr=ParamAttr( + name='conv5_mask_b' + postfix, + learning_rate=2., + regularizer=L2Decay(0.))) def forward(self, inputs): + bbox_head_out = inputs['bbox_head_' + str(inputs['stage'])] if inputs['mode'] == 'train': - x = inputs['res5'] + x = bbox_head_out['res5'] rois_feat = fluid.layers.gather(x, inputs['rois_has_mask_int32']) elif inputs['mode'] == 'infer': rois = inputs['predicted_bbox'][:, 2:] * inputs['im_info'][:, 2] rois_num = inputs['predicted_bbox_nums'] # TODO: optim here - if callable(inputs['shared_roi_extractor']): - rois_feat = inputs['shared_roi_extractor'](inputs['res4'], rois, - rois_num) - if callable(inputs['shared_res5_block']): - rois_feat = inputs['shared_res5_block'](rois_feat) + shared_roi_ext = bbox_head_out['shared_roi_extractor'] + if callable(shared_roi_ext): + rois_feat = shared_roi_ext(inputs['res4'], rois, rois_num) + + shared_res5 = bbox_head_out['shared_res5_block'] + if callable(shared_res5): + rois_feat = shared_res5(rois_feat) + # upsample y = self.upsample(rois_feat) outs = {'mask_feat': y} @@ -67,33 +66,34 @@ class MaskHead(Layer): __inject__ = ['mask_feat'] def __init__(self, + mask_feat, + num_classes=81, feat_in=256, resolution=14, - num_classes=81, - mask_feat=MaskFeat().__dict__, - stage=0): + mask_stages=1): super(MaskHead, self).__init__() + self.mask_feat = mask_feat self.feat_in = feat_in self.resolution = resolution self.num_classes = num_classes - self.mask_feat = mask_feat - if isinstance(mask_feat, dict): - self.mask_feat = MaskFeat(**mask_feat) - if stage == 0: - postfix = '' - else: - postfix = '_' + str(stage) - self.mask_fcn_logits = fluid.dygraph.Conv2D( - num_channels=self.feat_in, - num_filters=self.num_classes, - filter_size=1, - param_attr=ParamAttr( - name='mask_fcn_logits_w' + postfix, - initializer=MSRA(uniform=False)), - bias_attr=ParamAttr( - name='mask_fcn_logits_b' + postfix, - learning_rate=2., - regularizer=L2Decay(0.0))) + self.mask_stages = mask_stages + + for i in range(self.mask_stages): + if i == 0: + postfix = '' + else: + postfix = '_' + str(i) + self.mask_fcn_logits = fluid.dygraph.Conv2D( + num_channels=self.feat_in, + num_filters=self.num_classes, + filter_size=1, + param_attr=ParamAttr( + name='mask_fcn_logits_w' + postfix, + initializer=MSRA(uniform=False)), + bias_attr=ParamAttr( + name='mask_fcn_logits_b' + postfix, + learning_rate=2., + regularizer=L2Decay(0.0))) def forward(self, inputs): # feat @@ -115,7 +115,6 @@ class MaskHead(Layer): return outs def loss(self, inputs): - # input needs (model_out, target) reshape_dim = self.num_classes * self.resolution * self.resolution mask_logits = fluid.layers.reshape(inputs['mask_logits'], (-1, reshape_dim)) diff --git a/ppdet/modeling/head/rpn_head.py b/ppdet/modeling/head/rpn_head.py index 86308b46c171fb26273112c65f54d121e719687e..219c08747d8d797fb4b0e00fe5a2e49ce5d9f093 100644 --- a/ppdet/modeling/head/rpn_head.py +++ b/ppdet/modeling/head/rpn_head.py @@ -4,7 +4,6 @@ from paddle.fluid.param_attr import ParamAttr from paddle.fluid.initializer import Normal from paddle.fluid.regularizer import L2Decay from paddle.fluid.dygraph.nn import Conv2D - from ppdet.core.workspace import register @@ -20,10 +19,10 @@ class RPNFeat(Layer): padding=1, act='relu', param_attr=ParamAttr( - "conv_rpn_w", initializer=Normal( + name="conv_rpn_w", initializer=Normal( loc=0., scale=0.01)), bias_attr=ParamAttr( - "conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) + name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.))) def forward(self, inputs): x = inputs.get('res4') @@ -36,12 +35,10 @@ class RPNFeat(Layer): class RPNHead(Layer): __inject__ = ['rpn_feat'] - def __init__(self, anchor_per_position=15, rpn_feat=RPNFeat().__dict__): + def __init__(self, rpn_feat, anchor_per_position=15): super(RPNHead, self).__init__() - self.anchor_per_position = anchor_per_position self.rpn_feat = rpn_feat - if isinstance(rpn_feat, dict): - self.rpn_feat = RPNFeat(**rpn_feat) + self.anchor_per_position = anchor_per_position # rpn roi classification scores self.rpn_rois_score = Conv2D( diff --git a/ppdet/modeling/head/yolo_head.py b/ppdet/modeling/head/yolo_head.py index d7bb50a96251fb075993c214d929d6d34018de66..56b1bafab6011326ab3601a31fd54f7bc5fdfcd6 100644 --- a/ppdet/modeling/head/yolo_head.py +++ b/ppdet/modeling/head/yolo_head.py @@ -132,19 +132,11 @@ class YOLOv3Head(Layer): __shared__ = ['num_classes'] __inject__ = ['yolo_feat'] - def __init__( - self, - num_classes=80, - anchor_per_position=3, - mode='train', - yolo_feat=YOLOFeat().__dict__, ): + def __init__(self, yolo_feat, num_classes=80, anchor_per_position=3): super(YOLOv3Head, self).__init__() self.num_classes = num_classes self.anchor_per_position = anchor_per_position - self.mode = mode self.yolo_feat = yolo_feat - if isinstance(yolo_feat, dict): - self.yolo_feat = YOLOFeat(**yolo_feat) self.yolo_outs = [] for i in range(3): diff --git a/ppdet/modeling/mask.py b/ppdet/modeling/mask.py new file mode 100644 index 0000000000000000000000000000000000000000..b7122c5b597b5d2f862047f83d32060ac87d851c --- /dev/null +++ b/ppdet/modeling/mask.py @@ -0,0 +1,61 @@ +import numpy as np +import paddle.fluid as fluid +from ppdet.core.workspace import register + +# TODO: regitster mask_post_process op +from ppdet.py_op.post_process import mask_post_process + + +@register +class MaskPostProcess(object): + __shared__ = ['num_classes'] + + def __init__(self, num_classes=81): + super(MaskPostProcess, self).__init__() + self.num_classes = num_classes + + def __call__(self, inputs): + # TODO: modify related ops for deploying + outs = mask_post_process(inputs['predicted_bbox_nums'].numpy(), + inputs['predicted_bbox'].numpy(), + inputs['mask_logits'].numpy(), + inputs['im_info'].numpy()) + outs = {'predicted_mask': outs} + return outs + + +@register +class Mask(object): + __inject__ = ['mask_target_generator', 'mask_post_process'] + + def __init__(self, mask_target_generator, mask_post_process): + super(Mask, self).__init__() + self.mask_target_generator = mask_target_generator + self.mask_post_process = mask_post_process + + def __call__(self, inputs): + outs = {} + if inputs['mode'] == 'train': + outs = self.generate_mask_target(inputs) + return outs + + def generate_mask_target(self, inputs): + proposal_out = inputs['proposal_' + str(inputs['stage'])] + outs = self.mask_target_generator( + im_info=inputs['im_info'], + gt_classes=inputs['gt_class'], + is_crowd=inputs['is_crowd'], + gt_segms=inputs['gt_mask'], + rois=proposal_out['rois'], + rois_nums=proposal_out['rois_nums'], + labels_int32=proposal_out['labels_int32']) + outs = { + 'mask_rois': outs[0], + 'rois_has_mask_int32': outs[1], + 'mask_int32': outs[2] + } + return outs + + def post_process(self, inputs): + outs = self.mask_post_process(inputs) + return outs diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index 524b6763a2b004c413a5c30d133e8e8bc42c1134..d4c7663c7cfeea50dc0960d81f46bc34f67761a1 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -49,13 +49,20 @@ class AnchorTargetGeneratorRPN(object): self.negative_overlap = negative_overlap self.use_random = use_random - def __call__(self, cls_logits, bbox_pred, anchor_box, gt_boxes, is_crowd, - im_info): + def __call__(self, + cls_logits, + bbox_pred, + anchor_box, + gt_boxes, + is_crowd, + im_info, + open_debug=False): anchor_box = anchor_box.numpy() gt_boxes = gt_boxes.numpy() is_crowd = is_crowd.numpy() im_info = im_info.numpy() - + if open_debug: + self.use_random = False loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target( anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh, self.batch_size_per_im, self.positive_overlap, @@ -191,7 +198,7 @@ class ProposalTargetGenerator(object): bg_thresh_lo=[0., ], bbox_reg_weights=[[0.1, 0.1, 0.2, 0.2]], num_classes=81, - shuffle_before_sample=True, + use_random=True, is_cls_agnostic=False, is_cascade_rcnn=False): super(ProposalTargetGenerator, self).__init__() @@ -202,7 +209,7 @@ class ProposalTargetGenerator(object): self.bg_thresh_lo = bg_thresh_lo self.bbox_reg_weights = bbox_reg_weights self.num_classes = num_classes - self.use_random = shuffle_before_sample + self.use_random = use_random self.is_cls_agnostic = is_cls_agnostic, self.is_cascade_rcnn = is_cascade_rcnn @@ -213,13 +220,17 @@ class ProposalTargetGenerator(object): is_crowd, gt_boxes, im_info, - stage=0): + stage=0, + open_debug=False): rpn_rois = rpn_rois.numpy() rpn_rois_nums = rpn_rois_nums.numpy() gt_classes = gt_classes.numpy() gt_boxes = gt_boxes.numpy() is_crowd = is_crowd.numpy() im_info = im_info.numpy() + if open_debug: + self.use_random = False + outs = generate_proposal_target( rpn_rois, rpn_rois_nums, gt_classes, is_crowd, gt_boxes, im_info, self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage], @@ -265,10 +276,10 @@ class MaskTargetGenerator(object): @register class RoIExtractor(object): def __init__(self, - resolution=7, + resolution=14, spatial_scale=1. / 16, sampling_ratio=0, - extractor_type='RoIPool'): + extractor_type='RoIAlign'): super(RoIExtractor, self).__init__() if isinstance(resolution, Integral): resolution = [resolution, resolution] diff --git a/ppdet/utils/data_structure.py b/ppdet/utils/data_structure.py index b55504172de8f420e7a6a469c1ee1406b7512d5c..05d845c64a2978231d1b07e5fcbbbe85f7ded567 100644 --- a/ppdet/utils/data_structure.py +++ b/ppdet/utils/data_structure.py @@ -21,25 +21,44 @@ class BufferDict(dict): for k, v in dict(*args, **kwargs).items(): self[k] = v + def update_v(self, key, value): + if key in self.keys(): + super(BufferDict, self).__setitem__(key, value) + else: + raise Exception("The %s is not in global inputs dict" % key) + def get(self, key): return self.__getitem__(key) def set(self, key, value): - self.__setitem__(key, value) + return self.__setitem__(key, value) - def debug(self, dshape=True, dtype=False, dvalue=False, name='all'): - if name == 'all': - ditems = self.items() - else: - ditems = self.get(name) - - for k, v in ditems: - info = [k] - if dshape == True and hasattr(v, 'shape'): - info.append(v.shape) - if dtype == True: - info.append(type(v)) - if dvalue == True and hasattr(v, 'numpy'): - info.append(np.mean(np.abs(v.numpy()))) - - print(info) + def debug(self, dshape=True, dvalue=True, dtype=False): + if self['open_debug']: + if self['debug_names'] is None: + ditems = self.keys() + else: + ditems = self['debug_names'] + + infos = {} + for k in ditems: + if type(k) is dict: + i_d = {} + for i, j in k.items(): + if type(j) is list: + for jj in j: + i_d[jj] = self.get_debug_info(self[i][jj]) + infos[i] = i_d + else: + infos[k] = self.get_debug_info(self[k]) + print(infos) + + def get_debug_info(self, v, dshape=True, dvalue=True, dtype=False): + info = [] + if dshape == True and hasattr(v, 'shape'): + info.append(v.shape) + if dvalue == True and hasattr(v, 'numpy'): + info.append(np.mean(np.abs(v.numpy()))) + if dtype == True: + info.append(type(v)) + return info