未验证 提交 e9d7f8a8 编写于 作者: F FDInSky 提交者: GitHub

add rcnn's modeling part (#862)

add faster rcnn\mask\cascade rcnn model arch\module\op
add base debug func in bufferdict 
上级 a62f6803
from . import architecture
from . import backbone
from . import head
from . import ops
from . import anchor
from .architecture import *
from .backbone import *
from .head import *
from .ops import *
from .anchor import *
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph.base import to_variable
from ppdet.core.workspace import register
from ppdet.modeling.ops import (AnchorGenerator, RPNAnchorTargetGenerator,
ProposalGenerator, ProposalTargetGenerator,
MaskTargetGenerator, DecodeClipNms)
# TODO: modify here into ppdet.modeling.ops like DecodeClipNms
from ppdet.py_op.post_process import mask_post_process
@register
class BBoxPostProcess(Layer):
def __init__(self,
decode=None,
clip=None,
nms=None,
decode_clip_nms=DecodeClipNms().__dict__):
super(BBoxPostProcess, self).__init__()
self.decode = decode
self.clip = clip
self.nms = nms
self.decode_clip_nms = decode_clip_nms
if isinstance(decode_clip_nms, dict):
self.decode_clip_nms = DecodeClipNms(**decode_clip_nms)
def __call__(self, inputs):
# TODO: split into 3 steps
# TODO: modify related ops for deploying
# decode
# clip
# nms
outs = self.decode_clip_nms(inputs['rpn_rois'], inputs['bbox_prob'],
inputs['bbox_delta'], inputs['im_info'])
outs = {"predicted_bbox_nums": outs[0], "predicted_bbox": outs[1]}
return outs
@register
class MaskPostProcess(object):
__shared__ = ['num_classes']
def __init__(self, num_classes=81):
super(MaskPostProcess, self).__init__()
self.num_classes = num_classes
def __call__(self, inputs):
# TODO: modify related ops for deploying
outs = mask_post_process(inputs['predicted_bbox_nums'].numpy(),
inputs['predicted_bbox'].numpy(),
inputs['mask_logits'].numpy(),
inputs['im_info'].numpy())
outs = {'predicted_mask': outs}
return outs
@register
class Anchor(object):
__inject__ = ['anchor_generator', 'anchor_target_generator']
def __init__(self,
anchor_type='rpn',
anchor_generator=AnchorGenerator().__dict__,
anchor_target_generator=RPNAnchorTargetGenerator().__dict__):
super(Anchor, self).__init__()
self.anchor_generator = anchor_generator
self.anchor_target_generator = anchor_target_generator
if isinstance(anchor_generator, dict):
self.anchor_generator = AnchorGenerator(**anchor_generator)
if isinstance(anchor_target_generator, dict):
self.anchor_target_generator = RPNAnchorTargetGenerator(
**anchor_target_generator)
def __call__(self, inputs):
outs = self.generate_anchors(inputs)
return outs
def generate_anchors(self, inputs):
# TODO: update here to use int to specify featmap size
outs = self.anchor_generator(inputs['rpn_feat'])
outs = {'anchor': outs[0], 'var': outs[1], 'anchor_module': self}
return outs
def generate_anchors_target(self, inputs):
# TODO: add yolo anchor targets
rpn_rois_score = fluid.layers.transpose(
inputs['rpn_rois_score'], perm=[0, 2, 3, 1])
rpn_rois_delta = fluid.layers.transpose(
inputs['rpn_rois_delta'], perm=[0, 2, 3, 1])
rpn_rois_score = fluid.layers.reshape(
x=rpn_rois_score, shape=(0, -1, 1))
rpn_rois_delta = fluid.layers.reshape(
x=rpn_rois_delta, shape=(0, -1, 4))
anchor = fluid.layers.reshape(inputs['anchor'], shape=(-1, 4))
#var = fluid.layers.reshape(inputs['var'], shape=(-1, 4))
score_pred, roi_pred, score_tgt, roi_tgt, roi_weight = self.anchor_target_generator(
bbox_pred=rpn_rois_delta,
cls_logits=rpn_rois_score,
anchor_box=anchor,
gt_boxes=inputs['gt_bbox'],
is_crowd=inputs['is_crowd'],
im_info=inputs['im_info'])
outs = {
'rpn_score_pred': score_pred,
'rpn_score_target': score_tgt,
'rpn_rois_pred': roi_pred,
'rpn_rois_target': roi_tgt,
'rpn_rois_weight': roi_weight
}
return outs
def post_process(self, ):
# TODO: whether move bbox post process to here
pass
@register
class Proposal(object):
__inject__ = [
'proposal_generator', 'proposal_target_generator', 'bbox_post_process'
]
def __init__(
self,
proposal_generator=ProposalGenerator().__dict__,
proposal_target_generator=ProposalTargetGenerator().__dict__,
bbox_post_process=BBoxPostProcess().__dict__, ):
super(Proposal, self).__init__()
self.proposal_generator = proposal_generator
self.proposal_target_generator = proposal_target_generator
self.bbox_post_process = bbox_post_process
if isinstance(proposal_generator, dict):
self.proposal_generator = ProposalGenerator(**proposal_generator)
if isinstance(proposal_target_generator, dict):
self.proposal_target_generator = ProposalTargetGenerator(
**proposal_target_generator)
if isinstance(bbox_post_process, dict):
self.bbox_post_process = BBoxPostProcess(**bbox_post_process)
def __call__(self, inputs, stage=0):
outs = {}
if stage == 0:
proposal_out = self.generate_proposal(inputs)
inputs.update(proposal_out)
if inputs['mode'] == 'train':
proposal_target_out = self.generate_proposal_target(inputs, stage)
outs.update(proposal_target_out)
return outs
def generate_proposal(self, inputs):
rpn_rois_prob = fluid.layers.sigmoid(
inputs['rpn_rois_score'], name='rpn_rois_prob')
outs = self.proposal_generator(
scores=rpn_rois_prob,
bbox_deltas=inputs['rpn_rois_delta'],
anchors=inputs['anchor'],
variances=inputs['var'],
im_info=inputs['im_info'],
mode=inputs['mode'])
outs = {
'rpn_rois': outs[0],
'rpn_rois_probs': outs[1],
'rpn_rois_nums': outs[2]
}
return outs
def generate_proposal_target(self, inputs, stage=0):
outs = self.proposal_target_generator(
rpn_rois=inputs['rpn_rois'],
rpn_rois_nums=inputs['rpn_rois_nums'],
gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'],
gt_boxes=inputs['gt_bbox'],
im_info=inputs['im_info'],
stage=stage)
outs = {
'rois': outs[0],
'labels_int32': outs[1],
'bbox_targets': outs[2],
'bbox_inside_weights': outs[3],
'bbox_outside_weights': outs[4],
'rois_nums': outs[5]
}
return outs
def post_process(self, inputs):
outs = self.bbox_post_process(inputs)
return outs
@register
class Mask(object):
__inject__ = ['mask_target_generator', 'mask_post_process']
def __init__(self,
mask_target_generator=MaskTargetGenerator().__dict__,
mask_post_process=MaskPostProcess().__dict__):
super(Mask, self).__init__()
self.mask_target_generator = mask_target_generator
self.mask_post_process = mask_post_process
if isinstance(mask_target_generator, dict):
self.mask_target_generator = MaskTargetGenerator(
**mask_target_generator)
if isinstance(mask_post_process, dict):
self.mask_post_process = MaskPostProcess(**mask_post_process)
def __call__(self, inputs):
outs = {}
if inputs['mode'] == 'train':
outs = self.generate_mask_target(inputs)
return outs
def generate_mask_target(self, inputs):
outs = self.mask_target_generator(
im_info=inputs['im_info'],
gt_classes=inputs['gt_class'],
is_crowd=inputs['is_crowd'],
gt_segms=inputs['gt_mask'],
rois=inputs['rois'],
rois_nums=inputs['rois_nums'],
labels_int32=inputs['labels_int32'], )
outs = {
'mask_rois': outs[0],
'rois_has_mask_int32': outs[1],
'mask_int32': outs[2]
}
return outs
def post_process(self, inputs):
outs = self.mask_post_process(inputs)
return outs
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
from . import meta_arch
from . import faster_rcnn
from . import mask_rcnn
from .meta_arch import *
from .faster_rcnn import *
from .mask_rcnn import *
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from ppdet.core.workspace import register
from .meta_arch import BaseArch
__all__ = ['FasterRCNN']
@register
class FasterRCNN(BaseArch):
__category__ = 'architecture'
__inject__ = [
'anchor',
'proposal',
'backbone',
'rpn_head',
'bbox_head',
]
def __init__(self,
anchor,
proposal,
backbone,
rpn_head,
bbox_head,
rpn_only=False):
super(FasterRCNN, self).__init__()
self.anchor = anchor
self.proposal = proposal
self.backbone = backbone
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.rpn_only = rpn_only
def forward(self, inputs, inputs_keys, mode='train'):
self.gbd = self.build_inputs(inputs, inputs_keys)
self.gbd['mode'] = mode
# Backbone
bb_out = self.backbone(self.gbd)
self.gbd.update(bb_out)
# RPN
rpn_head_out = self.rpn_head(self.gbd)
self.gbd.update(rpn_head_out)
# Anchor
anchor_out = self.anchor(self.gbd)
self.gbd.update(anchor_out)
# Proposal BBox
proposal_out = self.proposal(self.gbd)
self.gbd.update(proposal_out)
# BBox Head
bbox_head_out = self.bbox_head(self.gbd)
self.gbd.update(bbox_head_out)
if self.gbd['mode'] == 'infer':
bbox_out = self.proposal.post_process(self.gbd)
self.gbd.update(bbox_out)
# result
if self.gbd['mode'] == 'train':
return self.loss(self.gbd)
elif self.gbd['mode'] == 'infer':
return self.infer(self.gbd)
else:
raise "Now, only support train or infer mode!"
def loss(self, inputs):
losses = []
rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(inputs)
bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(inputs)
losses = [rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss]
loss = fluid.layers.sum(losses)
out = {
'loss': loss,
'loss_rpn_cls': rpn_cls_loss,
'loss_rpn_reg': rpn_reg_loss,
'loss_bbox_cls': bbox_cls_loss,
'loss_bbox_reg': bbox_reg_loss,
}
return out
def infer(self, inputs):
outs = {
"bbox_nums": inputs['predicted_bbox_nums'].numpy(),
"bbox": inputs['predicted_bbox'].numpy(),
'im_id': inputs['im_id'].numpy(),
'im_shape': inputs['im_shape'].numpy()
}
return outs
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from ppdet.core.workspace import register
from ppdet.utils.data_structure import BufferDict
from .meta_arch import BaseArch
__all__ = ['MaskRCNN']
@register
class MaskRCNN(BaseArch):
__category__ = 'architecture'
__inject__ = [
'anchor',
'proposal',
'mask',
'backbone',
'rpn_head',
'bbox_head',
'mask_head',
]
def __init__(self,
anchor,
proposal,
mask,
backbone,
rpn_head,
bbox_head,
mask_head,
rpn_only=False):
super(MaskRCNN, self).__init__()
self.anchor = anchor
self.proposal = proposal
self.mask = mask
self.backbone = backbone
self.rpn_head = rpn_head
self.bbox_head = bbox_head
self.mask_head = mask_head
def forward(self, inputs, inputs_keys, mode='train'):
self.gbd = self.build_inputs(inputs, inputs_keys)
self.gbd['mode'] = mode
# Backbone
bb_out = self.backbone(self.gbd)
self.gbd.update(bb_out)
# RPN
rpn_head_out = self.rpn_head(self.gbd)
self.gbd.update(rpn_head_out)
# Anchor
anchor_out = self.anchor(self.gbd)
self.gbd.update(anchor_out)
# Proposal BBox
proposal_out = self.proposal(self.gbd)
self.gbd.update(proposal_out)
# BBox Head
bbox_head_out = self.bbox_head(self.gbd)
self.gbd.update(bbox_head_out)
if self.gbd['mode'] == 'infer':
bbox_out = self.proposal.post_process(self.gbd)
self.gbd.update(bbox_out)
# Mask
mask_out = self.mask(self.gbd)
self.gbd.update(mask_out)
# Mask Head
mask_head_out = self.mask_head(self.gbd)
self.gbd.update(mask_head_out)
if self.gbd['mode'] == 'infer':
mask_out = self.mask.post_process(self.gbd)
self.gbd.update(mask_out)
# result
if self.gbd['mode'] == 'train':
return self.loss(self.gbd)
elif self.gbd['mode'] == 'infer':
self.infer(self.gbd)
else:
raise "Now, only support train or infer mode!"
def loss(self, inputs):
losses = []
rpn_cls_loss, rpn_reg_loss = self.rpn_head.loss(inputs)
bbox_cls_loss, bbox_reg_loss = self.bbox_head.loss(inputs)
mask_loss = self.mask_head.loss(inputs)
losses = [
rpn_cls_loss, rpn_reg_loss, bbox_cls_loss, bbox_reg_loss, mask_loss
]
loss = fluid.layers.sum(losses)
out = {
'loss': loss,
'loss_rpn_cls': rpn_cls_loss,
'loss_rpn_reg': rpn_reg_loss,
'loss_bbox_cls': bbox_cls_loss,
'loss_bbox_reg': bbox_reg_loss,
'loss_mask': mask_loss
}
return out
def infer(self, inputs):
outs = {
'bbox_nums': inputs['predicted_bbox_nums'].numpy(),
'bbox': inputs['predicted_bbox'].numpy(),
'mask': inputs['predicted_mask'].numpy(),
'im_id': inputs['im_id'].numpy(),
'im_shape': inputs['im_shape'].numpy()
}
return inputs
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from paddle import fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph.base import to_variable
from ppdet.core.workspace import register
from ppdet.utils.data_structure import BufferDict
__all__ = ['BaseArch']
@register
class BaseArch(Layer):
def __init__(self, *args, **kwargs):
super(BaseArch, self).__init__()
def forward(self, inputs, inputs_keys, mode='train'):
raise NotImplementedError("Should implement forward method!")
def loss(self, inputs):
raise NotImplementedError("Should implement loss method!")
def infer(self, inputs):
raise NotImplementedError("Should implement infer method!")
def build_inputs(self, inputs, inputs_keys):
gbd = BufferDict()
for i, k in enumerate(inputs_keys):
v = to_variable(np.array([x[i] for x in inputs]))
gbd.set(k, v)
return gbd
from . import resnet
from .resnet import *
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Conv2D, Pool2D, BatchNorm
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
from ppdet.core.workspace import register, serializable
class ConvBNLayer(Layer):
def __init__(self,
name_scope,
ch_in,
ch_out,
filter_size,
stride,
padding,
act='relu',
learning_rate=1.0):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=ch_in,
num_filters=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=1,
act=act,
param_attr=ParamAttr(
name=name_scope + "_weights", learning_rate=learning_rate),
bias_attr=ParamAttr(name=name_scope + "_bias"))
if name_scope == "conv1":
bn_name = "bn_" + name_scope
else:
bn_name = "bn" + name_scope[3:]
self._bn = BatchNorm(
num_channels=ch_out,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',
is_test=True)
def forward(self, inputs):
x = self._conv(inputs)
out = self._bn(x)
return out
class ConvAffineLayer(Layer):
def __init__(self,
name_scope,
ch_in,
ch_out,
filter_size,
stride,
padding,
learning_rate=1.0,
act='relu'):
super(ConvAffineLayer, self).__init__()
self._conv = Conv2D(
num_channels=ch_in,
num_filters=ch_out,
filter_size=filter_size,
stride=stride,
padding=padding,
act=None,
param_attr=ParamAttr(
name=name_scope + "_weights", learning_rate=learning_rate),
bias_attr=False)
if name_scope == "conv1":
bn_name = "bn_" + name_scope
else:
bn_name = "bn" + name_scope[3:]
self.name_scope = name_scope
self.scale = fluid.Layer.create_parameter(
shape=[ch_out],
dtype='float32',
attr=ParamAttr(
name=bn_name + '_scale', learning_rate=0.),
default_initializer=Constant(1.))
self.bias = fluid.layers.create_parameter(
shape=[ch_out],
dtype='float32',
attr=ParamAttr(
bn_name + '_offset', learning_rate=0.),
default_initializer=Constant(0.))
self.act = act
def forward(self, inputs):
conv = self._conv(inputs)
out = fluid.layers.affine_channel(
x=conv, scale=self.scale, bias=self.bias)
if self.act == 'relu':
out = fluid.layers.relu(x=out)
return out
class BottleNeck(Layer):
def __init__(self,
name_scope,
ch_in,
ch_out,
stride,
shortcut=True,
learning_rate=1.0):
super(BottleNeck, self).__init__()
self.shortcut = shortcut
if not shortcut:
self.short = ConvBNLayer(
name_scope + "_branch1",
ch_in=ch_in,
ch_out=ch_out * 4,
filter_size=1,
stride=stride,
padding=0,
act=None,
learning_rate=learning_rate)
self.conv1 = ConvBNLayer(
name_scope + "_branch2a",
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=stride,
padding=0,
learning_rate=learning_rate, )
self.conv2 = ConvBNLayer(
name_scope + "_branch2b",
ch_in=ch_out,
ch_out=ch_out,
filter_size=3,
stride=1,
padding=1,
learning_rate=learning_rate)
self.conv3 = ConvBNLayer(
name_scope + "_branch2c",
ch_in=ch_out,
ch_out=ch_out * 4,
filter_size=1,
stride=1,
padding=0,
learning_rate=learning_rate,
act=None)
self.name_scope = name_scope
def forward(self, inputs):
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
conv1 = self.conv1(inputs)
conv2 = self.conv2(conv1)
conv3 = self.conv3(conv2)
out = fluid.layers.elementwise_add(
x=short,
y=conv3,
act='relu',
name=self.name_scope + ".add.output.5")
return out
class Blocks(Layer):
def __init__(self,
name_scope,
ch_in,
ch_out,
count,
stride,
learning_rate=1.0):
super(Blocks, self).__init__()
self.blocks = []
for i in range(count):
if i == 0:
name = name_scope + "a"
self.stride = stride
self.shortcut = False
else:
name = name_scope + chr(ord("a") + i)
self.stride = 1
self.shortcut = True
block = self.add_sublayer(
name,
BottleNeck(
name,
ch_in=ch_in if i == 0 else ch_out * 4,
ch_out=ch_out,
stride=self.stride,
shortcut=self.shortcut,
learning_rate=learning_rate))
self.blocks.append(block)
shortcut = True
def forward(self, inputs):
res_out = self.blocks[0](inputs)
for block in self.blocks[1:]:
res_out = block(res_out)
return res_out
@register
@serializable
class ResNet(Layer):
def __init__(
self,
norm_type='bn',
depth=50,
feature_maps=4,
freeze_at=2, ):
super(ResNet, self).__init__()
if depth == 50:
blocks = [3, 4, 6, 3]
elif depth == 101:
blocks = [3, 4, 23, 3]
elif depth == 152:
blocks = [3, 8, 36, 3]
self.conv = ConvBNLayer(
"conv1",
ch_in=3,
ch_out=64,
filter_size=7,
stride=2,
padding=3,
learning_rate=0.)
self.pool2d_max = Pool2D(
pool_type='max', pool_size=3, pool_stride=2, pool_padding=1)
self.stage2 = Blocks(
"res2",
ch_in=64,
ch_out=64,
count=blocks[0],
stride=1,
learning_rate=0.)
self.stage3 = Blocks(
"res3", ch_in=256, ch_out=128, count=blocks[1], stride=2)
self.stage4 = Blocks(
"res4", ch_in=512, ch_out=256, count=blocks[2], stride=2)
def forward(self, inputs):
x = inputs['image']
conv1 = self.conv(x)
poo1 = self.pool2d_max(conv1)
res2 = self.stage2(poo1)
res2.stop_gradient = True
res3 = self.stage3(res2)
res4 = self.stage4(res3)
outs = {'res2': res2, 'res3': res3, 'res4': res4}
return outs
from . import rpn_head
from . import bbox_head
from . import mask_head
from .rpn_head import *
from .bbox_head import *
from .mask_head import *
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, MSRA
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D
from ppdet.core.workspace import register
from ..backbone.resnet import Blocks
from ..ops import RoIExtractor
@register
class BBoxFeat(Layer):
__inject__ = ['roi_extractor']
def __init__(self,
feat_in=1024,
feat_out=512,
roi_extractor=RoIExtractor().__dict__,
stage=0):
super(BBoxFeat, self).__init__()
self.roi_extractor = roi_extractor
if isinstance(roi_extractor, dict):
self.roi_extractor = RoIExtractor(**roi_extractor)
if stage == 0:
postfix = ''
else:
postfix = '_' + str(stage)
self.res5 = Blocks(
"res5", ch_in=feat_in, ch_out=feat_out, count=3, stride=2)
self.res5_pool = fluid.dygraph.Pool2D(
pool_type='avg', global_pooling=True)
def forward(self, inputs):
if inputs['mode'] == 'train':
rois = inputs['rois']
rois_num = inputs['rois_nums']
elif inputs['mode'] == 'infer':
rois = inputs['rpn_rois']
rois_num = inputs['rpn_rois_nums']
else:
raise "BBoxFeat only support train or infer mode!"
rois_feat = self.roi_extractor(inputs['res4'], rois, rois_num)
# TODO: add others
y_res5 = self.res5(rois_feat)
y = self.res5_pool(y_res5)
y = fluid.layers.squeeze(y, axes=[2, 3])
outs = {
'rois_feat': rois_feat,
'res5': y_res5,
"bbox_feat": y,
'shared_res5_block': self.res5,
'shared_roi_extractor': self.roi_extractor
}
return outs
@register
class BBoxHead(Layer):
__shared__ = ['num_classes']
__inject__ = ['bbox_feat']
def __init__(self,
in_feat=2048,
num_classes=81,
bbox_feat=BBoxFeat().__dict__,
stage=0):
super(BBoxHead, self).__init__()
self.num_classes = num_classes
self.bbox_feat = bbox_feat
if isinstance(bbox_feat, dict):
self.bbox_feat = BBoxFeat(**bbox_feat)
if stage == 0:
postfix = ''
else:
postfix = '_' + str(stage)
self.bbox_score = fluid.dygraph.Linear(
input_dim=in_feat,
output_dim=1 * self.num_classes,
act=None,
param_attr=ParamAttr(
name='cls_score_w' + postfix,
initializer=Normal(
loc=0.0, scale=0.001)),
bias_attr=ParamAttr(
name='cls_score_b' + postfix,
learning_rate=2.,
regularizer=L2Decay(0.)))
self.bbox_delta = fluid.dygraph.Linear(
input_dim=in_feat,
output_dim=4 * self.num_classes,
act=None,
param_attr=ParamAttr(
name='bbox_pred_w' + postfix,
initializer=Normal(
loc=0.0, scale=0.01)),
bias_attr=ParamAttr(
name='bbox_pred_b' + postfix,
learning_rate=2.,
regularizer=L2Decay(0.)))
def forward(self, inputs):
outs = self.bbox_feat(inputs)
x = outs['bbox_feat']
bs = self.bbox_score(x)
bd = self.bbox_delta(x)
outs.update({'bbox_score': bs, 'bbox_delta': bd})
if inputs['mode'] == 'infer':
bbox_prob = fluid.layers.softmax(bs, use_cudnn=False)
outs['bbox_prob'] = bbox_prob
return outs
def loss(self, inputs):
# bbox cls
labels_int64 = fluid.layers.cast(
x=inputs['labels_int32'], dtype='int64')
labels_int64.stop_gradient = True
bbox_score = fluid.layers.reshape(inputs['bbox_score'],
(-1, self.num_classes))
loss_bbox_cls = fluid.layers.softmax_with_cross_entropy(
logits=bbox_score, label=labels_int64)
loss_bbox_cls = fluid.layers.reduce_mean(
loss_bbox_cls, name='loss_bbox_cls')
# bbox reg
loss_bbox_reg = fluid.layers.smooth_l1(
x=inputs['bbox_delta'],
y=inputs['bbox_targets'],
inside_weight=inputs['bbox_inside_weights'],
outside_weight=inputs['bbox_outside_weights'],
sigma=1.0)
loss_bbox_reg = fluid.layers.reduce_mean(
loss_bbox_reg, name='loss_bbox_loc')
return loss_bbox_cls, loss_bbox_reg
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, MSRA
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, Pool2D
from ppdet.core.workspace import register
from ..ops import RoIExtractor
from ..backbone.resnet import Blocks
@register
class MaskFeat(Layer):
__inject__ = ['mask_roi_extractor']
def __init__(self,
feat_in=2048,
feat_out=256,
mask_roi_extractor=RoIExtractor().__dict__,
stage=0):
super(MaskFeat, self).__init__()
self.feat_in = feat_in
self.feat_out = feat_out
self.mask_roi_extractor = mask_roi_extractor
if isinstance(mask_roi_extractor, dict):
self.mask_roi_extractor = RoIExtractor(**mask_roi_extractor)
if stage == 0:
postfix = ''
else:
postfix = '_' + str(stage)
self.upsample = fluid.dygraph.Conv2DTranspose(
num_channels=self.feat_in,
num_filters=self.feat_out,
filter_size=2,
stride=2,
act='relu',
param_attr=ParamAttr(
name='conv5_mask_w' + postfix, initializer=MSRA(uniform=False)),
bias_attr=ParamAttr(
name='conv5_mask_b' + postfix,
learning_rate=2.,
regularizer=L2Decay(0.)))
def forward(self, inputs):
if inputs['mode'] == 'train':
x = inputs['res5']
rois_feat = fluid.layers.gather(x, inputs['rois_has_mask_int32'])
elif inputs['mode'] == 'infer':
rois = inputs['predicted_bbox'][:, 2:] * inputs['im_info'][:, 2]
rois_num = inputs['predicted_bbox_nums']
# TODO: optim here
if callable(inputs['shared_roi_extractor']):
rois_feat = inputs['shared_roi_extractor'](inputs['res4'], rois,
rois_num)
if callable(inputs['shared_res5_block']):
rois_feat = inputs['shared_res5_block'](rois_feat)
# upsample
y = self.upsample(rois_feat)
outs = {'mask_feat': y}
return outs
@register
class MaskHead(Layer):
__shared__ = ['num_classes']
__inject__ = ['mask_feat']
def __init__(self,
feat_in=256,
resolution=14,
num_classes=81,
mask_feat=MaskFeat().__dict__,
stage=0):
super(MaskHead, self).__init__()
self.feat_in = feat_in
self.resolution = resolution
self.num_classes = num_classes
self.mask_feat = mask_feat
if isinstance(mask_feat, dict):
self.mask_feat = MaskFeat(**mask_feat)
if stage == 0:
postfix = ''
else:
postfix = '_' + str(stage)
self.mask_fcn_logits = fluid.dygraph.Conv2D(
num_channels=self.feat_in,
num_filters=self.num_classes,
filter_size=1,
param_attr=ParamAttr(
name='mask_fcn_logits_w' + postfix,
initializer=MSRA(uniform=False)),
bias_attr=ParamAttr(
name='mask_fcn_logits_b' + postfix,
learning_rate=2.,
regularizer=L2Decay(0.0)))
def forward(self, inputs):
# feat
outs = self.mask_feat(inputs)
x = outs['mask_feat']
# logits
mask_logits = self.mask_fcn_logits(x)
if inputs['mode'] == 'infer':
pred_bbox = inputs['predicted_bbox']
shape = reduce((lambda x, y: x * y), pred_bbox.shape)
shape = np.asarray(shape).reshape((1, 1))
ones = np.ones((1, 1), dtype=np.int32)
cond = (shape == ones).all()
if cond:
mask_logits = pred_bbox
outs['mask_logits'] = mask_logits
return outs
def loss(self, inputs):
# input needs (model_out, target)
reshape_dim = self.num_classes * self.resolution * self.resolution
mask_logits = fluid.layers.reshape(inputs['mask_logits'],
(-1, reshape_dim))
mask_label = fluid.layers.cast(x=inputs['mask_int32'], dtype='float32')
loss_mask = fluid.layers.sigmoid_cross_entropy_with_logits(
x=mask_logits, label=mask_label, ignore_index=-1, normalize=True)
loss_mask = fluid.layers.reduce_sum(loss_mask, name='loss_mask')
return loss_mask
import paddle.fluid as fluid
from paddle.fluid.dygraph import Layer
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D
from ppdet.core.workspace import register
from ..ops import RPNAnchorTargetGenerator
@register
class RPNFeat(Layer):
def __init__(self, feat_in=1024, feat_out=1024):
super(RPNFeat, self).__init__()
self.rpn_conv = fluid.dygraph.Conv2D(
num_channels=1024,
num_filters=1024,
filter_size=3,
stride=1,
padding=1,
act='relu',
param_attr=ParamAttr(
"conv_rpn_w", initializer=Normal(
loc=0., scale=0.01)),
bias_attr=ParamAttr(
"conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
def forward(self, inputs):
x = inputs.get('res4')
y = self.rpn_conv(x)
outs = {'rpn_feat': y}
return outs
@register
class RPNHead(Layer):
__inject__ = ['rpn_feat']
def __init__(self, anchor_per_position=15, rpn_feat=RPNFeat().__dict__):
super(RPNHead, self).__init__()
self.anchor_per_position = anchor_per_position
self.rpn_feat = rpn_feat
if isinstance(rpn_feat, dict):
self.rpn_feat = RPNFeat(**rpn_feat)
# rpn roi classification scores
self.rpn_rois_score = fluid.dygraph.Conv2D(
num_channels=1024,
num_filters=1 * self.anchor_per_position,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(
name="rpn_cls_logits_w", initializer=Normal(
loc=0., scale=0.01)),
bias_attr=ParamAttr(
name="rpn_cls_logits_b",
learning_rate=2.,
regularizer=L2Decay(0.)))
# rpn roi bbox regression deltas
self.rpn_rois_delta = fluid.dygraph.Conv2D(
num_channels=1024,
num_filters=4 * self.anchor_per_position,
filter_size=1,
stride=1,
padding=0,
act=None,
param_attr=ParamAttr(
name="rpn_bbox_pred_w", initializer=Normal(
loc=0., scale=0.01)),
bias_attr=ParamAttr(
name="rpn_bbox_pred_b",
learning_rate=2.,
regularizer=L2Decay(0.)))
def forward(self, inputs):
outs = self.rpn_feat(inputs)
x = outs['rpn_feat']
rrs = self.rpn_rois_score(x)
rrd = self.rpn_rois_delta(x)
outs.update({'rpn_rois_score': rrs, 'rpn_rois_delta': rrd})
return outs
def loss(self, inputs):
if callable(inputs['anchor_module']):
rpn_targets = inputs['anchor_module'].generate_anchors_target(
inputs)
# cls loss
score_tgt = fluid.layers.cast(
x=rpn_targets['rpn_score_target'], dtype='float32')
rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=rpn_targets['rpn_score_pred'], label=score_tgt)
rpn_cls_loss = fluid.layers.reduce_mean(
rpn_cls_loss, name='loss_rpn_cls')
# reg loss
rpn_reg_loss = fluid.layers.smooth_l1(
x=rpn_targets['rpn_rois_pred'],
y=rpn_targets['rpn_rois_target'],
sigma=3.0,
inside_weight=rpn_targets['rpn_rois_weight'],
outside_weight=rpn_targets['rpn_rois_weight'])
rpn_reg_loss = fluid.layers.reduce_mean(
rpn_reg_loss, name='loss_rpn_reg')
return rpn_cls_loss, rpn_reg_loss
import numpy as np
from numbers import Integral
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from ppdet.core.workspace import register, serializable
from ppdet.py_op.target import generate_rpn_anchor_target, generate_proposal_target, generate_mask_target
from ppdet.py_op.post_process import bbox_post_process
@register
@serializable
class AnchorGenerator(object):
def __init__(self,
anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0],
stride=[16.0, 16.0],
variance=[1.0, 1.0, 1.0, 1.0]):
super(AnchorGenerator, self).__init__()
self.anchor_sizes = anchor_sizes
self.aspect_ratios = aspect_ratios
self.stride = stride
self.variance = variance
def __call__(self, inputs):
outs = fluid.layers.anchor_generator(
input=inputs,
anchor_sizes=self.anchor_sizes,
aspect_ratios=self.aspect_ratios,
stride=self.stride,
variance=self.variance)
return outs
@register
@serializable
class RPNAnchorTargetGenerator(object):
def __init__(self,
batch_size_per_im=256,
straddle_thresh=0.,
fg_fraction=0.5,
positive_overlap=0.7,
negative_overlap=0.3,
use_random=True):
super(RPNAnchorTargetGenerator, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.straddle_thresh = straddle_thresh
self.fg_fraction = fg_fraction
self.positive_overlap = positive_overlap
self.negative_overlap = negative_overlap
self.use_random = use_random
def __call__(self, cls_logits, bbox_pred, anchor_box, gt_boxes, is_crowd,
im_info):
anchor_box = anchor_box.numpy()
gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy()
im_info = im_info.numpy()
loc_indexes, score_indexes, tgt_labels, tgt_bboxes, bbox_inside_weights = generate_rpn_anchor_target(
anchor_box, gt_boxes, is_crowd, im_info, self.straddle_thresh,
self.batch_size_per_im, self.positive_overlap,
self.negative_overlap, self.fg_fraction, self.use_random)
loc_indexes = to_variable(loc_indexes)
score_indexes = to_variable(score_indexes)
tgt_labels = to_variable(tgt_labels)
tgt_bboxes = to_variable(tgt_bboxes)
bbox_inside_weights = to_variable(bbox_inside_weights)
loc_indexes.stop_gradient = True
score_indexes.stop_gradient = True
tgt_labels.stop_gradient = True
cls_logits = fluid.layers.reshape(x=cls_logits, shape=(-1, ))
bbox_pred = fluid.layers.reshape(x=bbox_pred, shape=(-1, 4))
pred_cls_logits = fluid.layers.gather(cls_logits, score_indexes)
pred_bbox_pred = fluid.layers.gather(bbox_pred, loc_indexes)
return pred_cls_logits, pred_bbox_pred, tgt_labels, tgt_bboxes, bbox_inside_weights
@register
@serializable
class ProposalGenerator(object):
__append_doc__ = True
def __init__(self,
train_pre_nms_top_n=12000,
train_post_nms_top_n=2000,
infer_pre_nms_top_n=6000,
infer_post_nms_top_n=1000,
nms_thresh=.5,
min_size=.1,
eta=1.,
return_rois_num=True):
super(ProposalGenerator, self).__init__()
self.train_pre_nms_top_n = train_pre_nms_top_n
self.train_post_nms_top_n = train_post_nms_top_n
self.infer_pre_nms_top_n = infer_pre_nms_top_n
self.infer_post_nms_top_n = infer_post_nms_top_n
self.nms_thresh = nms_thresh
self.min_size = min_size
self.eta = eta
self.return_rois_num = return_rois_num
def __call__(self,
scores,
bbox_deltas,
anchors,
variances,
im_info,
mode='train'):
pre_nms_top_n = self.train_pre_nms_top_n if mode == 'train' else self.infer_pre_nms_top_n
post_nms_top_n = self.train_post_nms_top_n if mode == 'train' else self.infer_post_nms_top_n
outs = fluid.layers.generate_proposals(
scores,
bbox_deltas,
im_info,
anchors,
variances,
pre_nms_top_n=pre_nms_top_n,
post_nms_top_n=post_nms_top_n,
nms_thresh=self.nms_thresh,
min_size=self.min_size,
eta=self.eta,
return_rois_num=self.return_rois_num)
return outs
@register
@serializable
class ProposalTargetGenerator(object):
__shared__ = ['num_classes']
def __init__(self,
batch_size_per_im=512,
fg_fraction=.25,
fg_thresh=[.5, ],
bg_thresh_hi=[.5, ],
bg_thresh_lo=[0., ],
bbox_reg_weights=[[0.1, 0.1, 0.2, 0.2]],
num_classes=81,
shuffle_before_sample=True,
is_cls_agnostic=False,
is_cascade_rcnn=False):
super(ProposalTargetGenerator, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction
self.fg_thresh = fg_thresh
self.bg_thresh_hi = bg_thresh_hi
self.bg_thresh_lo = bg_thresh_lo
self.bbox_reg_weights = bbox_reg_weights
self.num_classes = num_classes
self.use_random = shuffle_before_sample
self.is_cls_agnostic = is_cls_agnostic,
self.is_cascade_rcnn = is_cascade_rcnn
def __call__(self,
rpn_rois,
rpn_rois_nums,
gt_classes,
is_crowd,
gt_boxes,
im_info,
stage=0):
rpn_rois = rpn_rois.numpy()
rpn_rois_nums = rpn_rois_nums.numpy()
gt_classes = gt_classes.numpy()
gt_boxes = gt_boxes.numpy()
is_crowd = is_crowd.numpy()
im_info = im_info.numpy()
outs = generate_proposal_target(
rpn_rois, rpn_rois_nums, gt_classes, is_crowd, gt_boxes, im_info,
self.batch_size_per_im, self.fg_fraction, self.fg_thresh[stage],
self.bg_thresh_hi[stage], self.bg_thresh_lo[stage],
self.bbox_reg_weights[stage], self.num_classes, self.use_random,
self.is_cls_agnostic, self.is_cascade_rcnn)
outs = [to_variable(v) for v in outs]
for v in outs:
v.stop_gradient = True
return outs
@register
@serializable
class MaskTargetGenerator(object):
__shared__ = ['num_classes']
def __init__(self, num_classes=81, resolution=14):
super(MaskTargetGenerator, self).__init__()
self.num_classes = num_classes
self.resolution = resolution
def __call__(self, im_info, gt_classes, is_crowd, gt_segms, rois, rois_nums,
labels_int32):
im_info = im_info.numpy()
gt_classes = gt_classes.numpy()
is_crowd = is_crowd.numpy()
gt_segms = gt_segms.numpy()
rois = rois.numpy()
rois_nums = rois_nums.numpy()
labels_int32 = labels_int32.numpy()
outs = generate_mask_target(im_info, gt_classes, is_crowd, gt_segms,
rois, rois_nums, labels_int32,
self.num_classes, self.resolution)
outs = [to_variable(v) for v in outs]
for v in outs:
v.stop_gradient = True
return outs
@register
class RoIExtractor(object):
def __init__(self,
resolution=7,
spatial_scale=1. / 16,
sampling_ratio=0,
extractor_type='RoIPool'):
super(RoIExtractor, self).__init__()
if isinstance(resolution, Integral):
resolution = [resolution, resolution]
self.resolution = resolution
self.spatial_scale = spatial_scale
self.sampling_ratio = sampling_ratio
self.extractor_type = extractor_type
def __call__(self, feat, rois, rois_nums):
cur_l = 0
new_nums = [cur_l]
rois_nums_np = rois_nums.numpy()
for l in rois_nums_np:
cur_l += l
new_nums.append(cur_l)
nums_t = to_variable(np.asarray(new_nums))
if self.extractor_type == 'RoIAlign':
rois_feat = fluid.layers.roi_align(
feat,
rois,
self.resolution[0],
self.resolution[1],
self.spatial_scale,
rois_lod=nums_t)
elif self.extractor_type == 'RoIPool':
rois_feat = fluid.layers.roi_pool(
feat,
rois,
self.resolution[0],
self.resolution[1],
self.spatial_scale,
rois_lod=nums_t)
return rois_feat
@register
@serializable
class DecodeClipNms(object):
__shared__ = ['num_classes']
def __init__(
self,
num_classes=81,
keep_top_k=100,
score_threshold=0.05,
nms_threshold=0.5, ):
super(DecodeClipNms, self).__init__()
self.num_classes = num_classes
self.keep_top_k = keep_top_k
self.score_threshold = score_threshold
self.nms_threshold = nms_threshold
def __call__(self, bbox, bbox_prob, bbox_delta, img_info):
outs = bbox_post_process(bbox.numpy(),
bbox_prob.numpy(),
bbox_delta.numpy(),
img_info.numpy(), self.keep_top_k,
self.score_threshold, self.nms_threshold,
self.num_classes)
outs = [to_variable(v) for v in outs]
for v in outs:
v.stop_gradient = True
return outs
@register
@serializable
class AnchorGrid(object):
"""Generate anchor grid
Args:
image_size (int or list): input image size, may be a single integer or
list of [h, w]. Default: 512
min_level (int): min level of the feature pyramid. Default: 3
max_level (int): max level of the feature pyramid. Default: 7
anchor_base_scale: base anchor scale. Default: 4
num_scales: number of anchor scales. Default: 3
aspect_ratios: aspect ratios. default: [[1, 1], [1.4, 0.7], [0.7, 1.4]]
"""
def __init__(self,
image_size=512,
min_level=3,
max_level=7,
anchor_base_scale=4,
num_scales=3,
aspect_ratios=[[1, 1], [1.4, 0.7], [0.7, 1.4]]):
super(AnchorGrid, self).__init__()
if isinstance(image_size, Integral):
self.image_size = [image_size, image_size]
else:
self.image_size = image_size
for dim in self.image_size:
assert dim % 2 ** max_level == 0, \
"image size should be multiple of the max level stride"
self.min_level = min_level
self.max_level = max_level
self.anchor_base_scale = anchor_base_scale
self.num_scales = num_scales
self.aspect_ratios = aspect_ratios
@property
def base_cell(self):
if not hasattr(self, '_base_cell'):
self._base_cell = self.make_cell()
return self._base_cell
def make_cell(self):
scales = [2**(i / self.num_scales) for i in range(self.num_scales)]
scales = np.array(scales)
ratios = np.array(self.aspect_ratios)
ws = np.outer(scales, ratios[:, 0]).reshape(-1, 1)
hs = np.outer(scales, ratios[:, 1]).reshape(-1, 1)
anchors = np.hstack((-0.5 * ws, -0.5 * hs, 0.5 * ws, 0.5 * hs))
return anchors
def make_grid(self, stride):
cell = self.base_cell * stride * self.anchor_base_scale
x_steps = np.arange(stride // 2, self.image_size[1], stride)
y_steps = np.arange(stride // 2, self.image_size[0], stride)
offset_x, offset_y = np.meshgrid(x_steps, y_steps)
offset_x = offset_x.flatten()
offset_y = offset_y.flatten()
offsets = np.stack((offset_x, offset_y, offset_x, offset_y), axis=-1)
offsets = offsets[:, np.newaxis, :]
return (cell + offsets).reshape(-1, 4)
def generate(self):
return [
self.make_grid(2**l)
for l in range(self.min_level, self.max_level + 1)
]
def __call__(self):
if not hasattr(self, '_anchor_vars'):
anchor_vars = []
helper = LayerHelper('anchor_grid')
for idx, l in enumerate(range(self.min_level, self.max_level + 1)):
stride = 2**l
anchors = self.make_grid(stride)
var = helper.create_parameter(
attr=ParamAttr(name='anchors_{}'.format(idx)),
shape=anchors.shape,
dtype='float32',
stop_gradient=True,
default_initializer=NumpyArrayInitializer(anchors))
anchor_vars.append(var)
var.persistable = True
self._anchor_vars = anchor_vars
return self._anchor_vars
import numpy as np
class BufferDict(dict):
def __init__(self, **kwargs):
super(BufferDict, self).__init__(**kwargs)
def __getitem__(self, key):
if key in self.keys():
return super(BufferDict, self).__getitem__(key)
else:
raise Exception("The %s is not in global inputs dict" % key)
def __setitem__(self, key, value):
if key not in self.keys():
super(BufferDict, self).__setitem__(key, value)
else:
raise Exception("The %s is already in global inputs dict" % key)
def update(self, *args, **kwargs):
for k, v in dict(*args, **kwargs).items():
self[k] = v
def get(self, key):
return self.__getitem__(key)
def set(self, key, value):
self.__setitem__(key, value)
def debug(self, dshape=True, dtype=False, dvalue=False, name='all'):
if name == 'all':
ditems = self.items()
else:
ditems = self.get(name)
for k, v in ditems:
info = [k]
if dshape == True and hasattr(v, 'shape'):
info.append(v.shape)
if dtype == True:
info.append(type(v))
if dvalue == True and hasattr(v, 'numpy'):
info.append(np.mean(np.abs(v.numpy())))
print(info)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册