提交 6b5d7100 编写于 作者: Y Yuan Gao 提交者: wangguanzhong

add rpn recall function and class-aware rpn (#2814)

* add rpn recall function and class-aware rpn
上级 3dd4f349
...@@ -22,8 +22,8 @@ from paddle.fluid.initializer import Normal ...@@ -22,8 +22,8 @@ from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ppdet.modeling.ops import (AnchorGenerator, from ppdet.modeling.ops import (AnchorGenerator, RPNTargetAssign,
RPNTargetAssign, GenerateProposals) GenerateProposals)
__all__ = ['RPNTargetAssign', 'GenerateProposals', 'RPNHead', 'FPNRPNHead'] __all__ = ['RPNTargetAssign', 'GenerateProposals', 'RPNHead', 'FPNRPNHead']
...@@ -48,12 +48,14 @@ class RPNHead(object): ...@@ -48,12 +48,14 @@ class RPNHead(object):
anchor_generator=AnchorGenerator().__dict__, anchor_generator=AnchorGenerator().__dict__,
rpn_target_assign=RPNTargetAssign().__dict__, rpn_target_assign=RPNTargetAssign().__dict__,
train_proposal=GenerateProposals(12000, 2000).__dict__, train_proposal=GenerateProposals(12000, 2000).__dict__,
test_proposal=GenerateProposals().__dict__): test_proposal=GenerateProposals().__dict__,
num_classes=1):
super(RPNHead, self).__init__() super(RPNHead, self).__init__()
self.anchor_generator = anchor_generator self.anchor_generator = anchor_generator
self.rpn_target_assign = rpn_target_assign self.rpn_target_assign = rpn_target_assign
self.train_proposal = train_proposal self.train_proposal = train_proposal
self.test_proposal = test_proposal self.test_proposal = test_proposal
self.num_classes = num_classes
if isinstance(anchor_generator, dict): if isinstance(anchor_generator, dict):
self.anchor_generator = AnchorGenerator(**anchor_generator) self.anchor_generator = AnchorGenerator(**anchor_generator)
if isinstance(rpn_target_assign, dict): if isinstance(rpn_target_assign, dict):
...@@ -96,7 +98,7 @@ class RPNHead(object): ...@@ -96,7 +98,7 @@ class RPNHead(object):
# Proposal classification scores # Proposal classification scores
self.rpn_cls_score = fluid.layers.conv2d( self.rpn_cls_score = fluid.layers.conv2d(
rpn_conv, rpn_conv,
num_filters=num_anchor, num_filters=num_anchor * self.num_classes,
filter_size=1, filter_size=1,
stride=1, stride=1,
padding=0, padding=0,
...@@ -147,12 +149,27 @@ class RPNHead(object): ...@@ -147,12 +149,27 @@ class RPNHead(object):
body_feat = list(body_feats.values())[-1] body_feat = list(body_feats.values())[-1]
rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat)
rpn_cls_score_prob = fluid.layers.sigmoid( if self.num_classes == 1:
rpn_cls_score, name='rpn_cls_score_prob') rpn_cls_prob = fluid.layers.sigmoid(
rpn_cls_score, name='rpn_cls_prob')
else:
rpn_cls_score = fluid.layers.transpose(
rpn_cls_score, perm=[0, 2, 3, 1])
rpn_cls_score = fluid.layers.reshape(
rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes))
rpn_cls_prob_tmp = fluid.layers.softmax(
rpn_cls_score, use_cudnn=False, name='rpn_cls_prob')
rpn_cls_prob_slice = fluid.layers.slice(
rpn_cls_prob_tmp, axes=[4], starts=[1],
ends=[self.num_classes])
rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1)
rpn_cls_prob = fluid.layers.reshape(
rpn_cls_prob, shape=(0, 0, 0, -1))
rpn_cls_prob = fluid.layers.transpose(
rpn_cls_prob, perm=[0, 3, 1, 2])
prop_op = self.train_proposal if mode == 'train' else self.test_proposal prop_op = self.train_proposal if mode == 'train' else self.test_proposal
rpn_rois, rpn_roi_probs = prop_op( rpn_rois, rpn_roi_probs = prop_op(
scores=rpn_cls_score_prob, scores=rpn_cls_prob,
bbox_deltas=rpn_bbox_pred, bbox_deltas=rpn_bbox_pred,
im_info=im_info, im_info=im_info,
anchors=self.anchor, anchors=self.anchor,
...@@ -165,7 +182,8 @@ class RPNHead(object): ...@@ -165,7 +182,8 @@ class RPNHead(object):
rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1])
anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4))
rpn_cls_score = fluid.layers.reshape(x=rpn_cls_score, shape=(0, -1, 1)) rpn_cls_score = fluid.layers.reshape(
x=rpn_cls_score, shape=(0, -1, self.num_classes))
rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4))
return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var
...@@ -177,7 +195,7 @@ class RPNHead(object): ...@@ -177,7 +195,7 @@ class RPNHead(object):
return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred,
self.anchor, self.anchor_var) self.anchor, self.anchor_var)
def get_loss(self, im_info, gt_box, is_crowd): def get_loss(self, im_info, gt_box, is_crowd, gt_label=None):
""" """
Sample proposals and Calculate rpn loss. Sample proposals and Calculate rpn loss.
...@@ -196,20 +214,37 @@ class RPNHead(object): ...@@ -196,20 +214,37 @@ class RPNHead(object):
""" """
rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input()
score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ if self.num_classes == 1:
self.rpn_target_assign( score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
bbox_pred=rpn_bbox, self.rpn_target_assign(
cls_logits=rpn_cls, bbox_pred=rpn_bbox,
anchor_box=anchor, cls_logits=rpn_cls,
anchor_var=anchor_var, anchor_box=anchor,
gt_boxes=gt_box, anchor_var=anchor_var,
is_crowd=is_crowd, gt_boxes=gt_box,
im_info=im_info) is_crowd=is_crowd,
im_info=im_info)
score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
score_tgt.stop_gradient = True score_tgt.stop_gradient = True
rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=score_pred, label=score_tgt) x=score_pred, label=score_tgt)
else:
score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
self.rpn_target_assign(
bbox_pred=rpn_bbox,
cls_logits=rpn_cls,
anchor_box=anchor,
anchor_var=anchor_var,
gt_boxes=gt_box,
gt_labels=gt_label,
is_crowd=is_crowd,
num_classes=self.num_classes,
im_info=im_info)
labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64')
labels_int64.stop_gradient = True
rpn_cls_loss = fluid.layers.softmax_with_cross_entropy(
logits=score_pred, label=labels_int64, numeric_stable_mode=True)
rpn_cls_loss = fluid.layers.reduce_mean( rpn_cls_loss = fluid.layers.reduce_mean(
rpn_cls_loss, name='loss_rpn_cls') rpn_cls_loss, name='loss_rpn_cls')
...@@ -261,13 +296,15 @@ class FPNRPNHead(RPNHead): ...@@ -261,13 +296,15 @@ class FPNRPNHead(RPNHead):
anchor_start_size=32, anchor_start_size=32,
num_chan=256, num_chan=256,
min_level=2, min_level=2,
max_level=6): max_level=6,
num_classes=1):
super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign, super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign,
train_proposal, test_proposal) train_proposal, test_proposal)
self.anchor_start_size = anchor_start_size self.anchor_start_size = anchor_start_size
self.num_chan = num_chan self.num_chan = num_chan
self.min_level = min_level self.min_level = min_level
self.max_level = max_level self.max_level = max_level
self.num_classes = num_classes
self.fpn_rpn_list = [] self.fpn_rpn_list = []
self.anchors_list = [] self.anchors_list = []
...@@ -319,9 +356,10 @@ class FPNRPNHead(RPNHead): ...@@ -319,9 +356,10 @@ class FPNRPNHead(RPNHead):
**(feat_lvl - self.min_level), ), **(feat_lvl - self.min_level), ),
stride=(2.**feat_lvl, 2.**feat_lvl)) stride=(2.**feat_lvl, 2.**feat_lvl))
cls_num_filters = num_anchors * self.num_classes
self.rpn_cls_score = fluid.layers.conv2d( self.rpn_cls_score = fluid.layers.conv2d(
input=conv_rpn_fpn, input=conv_rpn_fpn,
num_filters=num_anchors, num_filters=cls_num_filters,
filter_size=1, filter_size=1,
act=None, act=None,
name=cls_name, name=cls_name,
...@@ -366,19 +404,37 @@ class FPNRPNHead(RPNHead): ...@@ -366,19 +404,37 @@ class FPNRPNHead(RPNHead):
shape of (rois_num, 1). shape of (rois_num, 1).
""" """
rpn_cls_logits_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat, rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat,
feat_lvl) feat_lvl)
prop_op = self.train_proposal if mode == 'train' else self.test_proposal prop_op = self.train_proposal if mode == 'train' else self.test_proposal
rpn_cls_prob_fpn = fluid.layers.sigmoid( if self.num_classes == 1:
rpn_cls_logits_fpn, name='rpn_cls_probs_fpn' + str(feat_lvl)) rpn_cls_prob_fpn = fluid.layers.sigmoid(
rpn_rois_fpn, rpn_roi_probs_fpn = prop_op( rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl))
else:
rpn_cls_score_fpn = fluid.layers.transpose(
rpn_cls_score_fpn, perm=[0, 2, 3, 1])
rpn_cls_score_fpn = fluid.layers.reshape(
rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes))
rpn_cls_prob_fpn = fluid.layers.softmax(
rpn_cls_score_fpn,
use_cudnn=False,
name='rpn_cls_prob_fpn' + str(feat_lvl))
rpn_cls_prob_fpn = fluid.layers.slice(
rpn_cls_prob_fpn, axes=[4], starts=[1],
ends=[self.num_classes])
rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1)
rpn_cls_prob_fpn = fluid.layers.reshape(
rpn_cls_prob_fpn, shape=(0, 0, 0, -1))
rpn_cls_prob_fpn = fluid.layers.transpose(
rpn_cls_prob_fpn, perm=[0, 3, 1, 2])
rpn_rois_fpn, rpn_roi_prob_fpn = prop_op(
scores=rpn_cls_prob_fpn, scores=rpn_cls_prob_fpn,
bbox_deltas=rpn_bbox_pred_fpn, bbox_deltas=rpn_bbox_pred_fpn,
im_info=im_info, im_info=im_info,
anchors=self.anchors, anchors=self.anchors,
variances=self.anchor_var) variances=self.anchor_var)
return rpn_rois_fpn, rpn_roi_probs_fpn return rpn_rois_fpn, rpn_roi_prob_fpn
def get_proposals(self, fpn_feats, im_info, mode='train'): def get_proposals(self, fpn_feats, im_info, mode='train'):
""" """
......
...@@ -49,6 +49,7 @@ class CascadeRCNN(object): ...@@ -49,6 +49,7 @@ class CascadeRCNN(object):
roi_extractor='FPNRoIAlign', roi_extractor='FPNRoIAlign',
bbox_head='CascadeBBoxHead', bbox_head='CascadeBBoxHead',
bbox_assigner='CascadeBBoxAssigner', bbox_assigner='CascadeBBoxAssigner',
rpn_only=False,
fpn='FPN'): fpn='FPN'):
super(CascadeRCNN, self).__init__() super(CascadeRCNN, self).__init__()
assert fpn is not None, "cascade RCNN requires FPN" assert fpn is not None, "cascade RCNN requires FPN"
...@@ -58,6 +59,7 @@ class CascadeRCNN(object): ...@@ -58,6 +59,7 @@ class CascadeRCNN(object):
self.bbox_assigner = bbox_assigner self.bbox_assigner = bbox_assigner
self.roi_extractor = roi_extractor self.roi_extractor = roi_extractor
self.bbox_head = bbox_head self.bbox_head = bbox_head
self.rpn_only = rpn_only
# Cascade local cfg # Cascade local cfg
self.cls_agnostic_bbox_reg = 2 self.cls_agnostic_bbox_reg = 2
(brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights (brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights
...@@ -88,6 +90,12 @@ class CascadeRCNN(object): ...@@ -88,6 +90,12 @@ class CascadeRCNN(object):
if mode == 'train': if mode == 'train':
rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd) rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
else:
if self.rpn_only:
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale
return {'proposal': rois}
proposal_list = [] proposal_list = []
roi_feat_list = [] roi_feat_list = []
......
...@@ -48,6 +48,7 @@ class FasterRCNN(object): ...@@ -48,6 +48,7 @@ class FasterRCNN(object):
roi_extractor, roi_extractor,
bbox_head='BBoxHead', bbox_head='BBoxHead',
bbox_assigner='BBoxAssigner', bbox_assigner='BBoxAssigner',
rpn_only=False,
fpn=None): fpn=None):
super(FasterRCNN, self).__init__() super(FasterRCNN, self).__init__()
self.backbone = backbone self.backbone = backbone
...@@ -56,6 +57,7 @@ class FasterRCNN(object): ...@@ -56,6 +57,7 @@ class FasterRCNN(object):
self.roi_extractor = roi_extractor self.roi_extractor = roi_extractor
self.bbox_head = bbox_head self.bbox_head = bbox_head
self.fpn = fpn self.fpn = fpn
self.rpn_only = rpn_only
def build(self, feed_vars, mode='train'): def build(self, feed_vars, mode='train'):
im = feed_vars['image'] im = feed_vars['image']
...@@ -90,7 +92,12 @@ class FasterRCNN(object): ...@@ -90,7 +92,12 @@ class FasterRCNN(object):
bbox_targets = outs[2] bbox_targets = outs[2]
bbox_inside_weights = outs[3] bbox_inside_weights = outs[3]
bbox_outside_weights = outs[4] bbox_outside_weights = outs[4]
else:
if self.rpn_only:
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale
return {'proposal': rois}
if self.fpn is None: if self.fpn is None:
# in models without FPN, roi extractor only uses the last level of # in models without FPN, roi extractor only uses the last level of
# feature maps. And body_feat_names[-1] represents the name of # feature maps. And body_feat_names[-1] represents the name of
......
...@@ -51,6 +51,7 @@ class MaskRCNN(object): ...@@ -51,6 +51,7 @@ class MaskRCNN(object):
roi_extractor='RoIAlign', roi_extractor='RoIAlign',
mask_assigner='MaskAssigner', mask_assigner='MaskAssigner',
mask_head='MaskHead', mask_head='MaskHead',
rpn_only=False,
fpn=None): fpn=None):
super(MaskRCNN, self).__init__() super(MaskRCNN, self).__init__()
self.backbone = backbone self.backbone = backbone
...@@ -60,6 +61,7 @@ class MaskRCNN(object): ...@@ -60,6 +61,7 @@ class MaskRCNN(object):
self.bbox_head = bbox_head self.bbox_head = bbox_head
self.mask_assigner = mask_assigner self.mask_assigner = mask_assigner
self.mask_head = mask_head self.mask_head = mask_head
self.rpn_only = rpn_only
self.fpn = fpn self.fpn = fpn
def build(self, feed_vars, mode='train'): def build(self, feed_vars, mode='train'):
...@@ -130,7 +132,11 @@ class MaskRCNN(object): ...@@ -130,7 +132,11 @@ class MaskRCNN(object):
return loss return loss
else: else:
if self.rpn_only:
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale
return {'proposal': rois}
if self.fpn is None: if self.fpn is None:
last_feat = body_feats[list(body_feats.keys())[-1]] last_feat = body_feats[list(body_feats.keys())[-1]]
roi_feat = self.roi_extractor(last_feat, rois) roi_feat = self.roi_extractor(last_feat, rois)
......
...@@ -30,7 +30,12 @@ import logging ...@@ -30,7 +30,12 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
__all__ = [ __all__ = [
'bbox_eval', 'mask_eval', 'bbox2out', 'mask2out', 'get_category_info' 'bbox_eval',
'mask_eval',
'bbox2out',
'mask2out',
'get_category_info',
'proposal_eval',
] ]
...@@ -42,6 +47,34 @@ def clip_bbox(bbox): ...@@ -42,6 +47,34 @@ def clip_bbox(bbox):
return xmin, ymin, xmax, ymax return xmin, ymin, xmax, ymax
def proposal_eval(results, anno_file, outfile, max_dets=(100, 300, 1000)):
assert 'proposal' in results[0]
assert outfile.endswith('.json')
xywh_results = proposal2out(results)
assert len(
xywh_results) > 0, "The number of valid proposal detected is zero.\n \
Please use reasonable model and check input data."
with open(outfile, 'w') as f:
json.dump(xywh_results, f)
coco_gt = COCO(anno_file)
logger.info("Start evaluate...")
coco_dt = coco_gt.loadRes(outfile)
coco_ev = COCOeval(coco_gt, coco_dt, 'bbox')
coco_ev.params.useCats = 0
coco_ev.params.maxDets = list(max_dets)
coco_ev.evaluate()
coco_ev.accumulate()
coco_ev.summarize()
# flush coco evaluation result
sys.stdout.flush()
def bbox_eval(results, anno_file, outfile, with_background=True): def bbox_eval(results, anno_file, outfile, with_background=True):
assert 'bbox' in results[0] assert 'bbox' in results[0]
assert outfile.endswith('.json') assert outfile.endswith('.json')
...@@ -96,6 +129,44 @@ def mask_eval(results, anno_file, outfile, resolution, thresh_binarize=0.5): ...@@ -96,6 +129,44 @@ def mask_eval(results, anno_file, outfile, resolution, thresh_binarize=0.5):
coco_ev.summarize() coco_ev.summarize()
def proposal2out(results, is_bbox_normalized=False):
xywh_res = []
for t in results:
bboxes = t['proposal'][0]
lengths = t['proposal'][1][0]
im_ids = np.array(t['im_id'][0])
if bboxes.shape == (1, 1) or bboxes is None:
continue
k = 0
for i in range(len(lengths)):
num = lengths[i]
im_id = int(im_ids[i][0])
for j in range(num):
dt = bboxes[k]
xmin, ymin, xmax, ymax = dt.tolist()
if is_bbox_normalized:
xmin, ymin, xmax, ymax = \
clip_bbox([xmin, ymin, xmax, ymax])
w = xmax - xmin
h = ymax - ymin
else:
w = xmax - xmin + 1
h = ymax - ymin + 1
bbox = [xmin, ymin, w, h]
coco_res = {
'image_id': im_id,
'category_id': 1,
'bbox': bbox,
'score': 1.0
}
xywh_res.append(coco_res)
k += 1
return xywh_res
def bbox2out(results, clsid2catid, is_bbox_normalized=False): def bbox2out(results, clsid2catid, is_bbox_normalized=False):
xywh_res = [] xywh_res = []
for t in results: for t in results:
......
...@@ -91,13 +91,19 @@ def eval_run(exe, compile_program, pyreader, keys, values, cls): ...@@ -91,13 +91,19 @@ def eval_run(exe, compile_program, pyreader, keys, values, cls):
def eval_results(results, feed, metric, resolution=None, output_file=None): def eval_results(results, feed, metric, resolution=None, output_file=None):
"""Evaluation for evaluation program results""" """Evaluation for evaluation program results"""
if metric == 'COCO': if metric == 'COCO':
from ppdet.utils.coco_eval import bbox_eval, mask_eval from ppdet.utils.coco_eval import proposal_eval, bbox_eval, mask_eval
anno_file = getattr(feed.dataset, 'annotation', None) anno_file = getattr(feed.dataset, 'annotation', None)
with_background = getattr(feed, 'with_background', True) with_background = getattr(feed, 'with_background', True)
output = 'bbox.json' if 'proposal' in results[0]:
if output_file: output = 'proposal.json'
output = '{}_bbox.json'.format(output_file) if output_file:
bbox_eval(results, anno_file, output, with_background) output = '{}_proposal.json'.format(output_file)
proposal_eval(results, anno_file, output)
if 'bbox' in results[0]:
output = 'bbox.json'
if output_file:
output = '{}_bbox.json'.format(output_file)
bbox_eval(results, anno_file, output, with_background)
if 'mask' in results[0]: if 'mask' in results[0]:
output = 'mask.json' output = 'mask.json'
if output_file: if output_file:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册