提交 6b5d7100 编写于 作者: Y Yuan Gao 提交者: wangguanzhong

add rpn recall function and class-aware rpn (#2814)

* add rpn recall function and class-aware rpn
上级 3dd4f349
......@@ -22,8 +22,8 @@ from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register
from ppdet.modeling.ops import (AnchorGenerator,
RPNTargetAssign, GenerateProposals)
from ppdet.modeling.ops import (AnchorGenerator, RPNTargetAssign,
GenerateProposals)
__all__ = ['RPNTargetAssign', 'GenerateProposals', 'RPNHead', 'FPNRPNHead']
......@@ -48,12 +48,14 @@ class RPNHead(object):
anchor_generator=AnchorGenerator().__dict__,
rpn_target_assign=RPNTargetAssign().__dict__,
train_proposal=GenerateProposals(12000, 2000).__dict__,
test_proposal=GenerateProposals().__dict__):
test_proposal=GenerateProposals().__dict__,
num_classes=1):
super(RPNHead, self).__init__()
self.anchor_generator = anchor_generator
self.rpn_target_assign = rpn_target_assign
self.train_proposal = train_proposal
self.test_proposal = test_proposal
self.num_classes = num_classes
if isinstance(anchor_generator, dict):
self.anchor_generator = AnchorGenerator(**anchor_generator)
if isinstance(rpn_target_assign, dict):
......@@ -96,7 +98,7 @@ class RPNHead(object):
# Proposal classification scores
self.rpn_cls_score = fluid.layers.conv2d(
rpn_conv,
num_filters=num_anchor,
num_filters=num_anchor * self.num_classes,
filter_size=1,
stride=1,
padding=0,
......@@ -147,12 +149,27 @@ class RPNHead(object):
body_feat = list(body_feats.values())[-1]
rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat)
rpn_cls_score_prob = fluid.layers.sigmoid(
rpn_cls_score, name='rpn_cls_score_prob')
if self.num_classes == 1:
rpn_cls_prob = fluid.layers.sigmoid(
rpn_cls_score, name='rpn_cls_prob')
else:
rpn_cls_score = fluid.layers.transpose(
rpn_cls_score, perm=[0, 2, 3, 1])
rpn_cls_score = fluid.layers.reshape(
rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes))
rpn_cls_prob_tmp = fluid.layers.softmax(
rpn_cls_score, use_cudnn=False, name='rpn_cls_prob')
rpn_cls_prob_slice = fluid.layers.slice(
rpn_cls_prob_tmp, axes=[4], starts=[1],
ends=[self.num_classes])
rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1)
rpn_cls_prob = fluid.layers.reshape(
rpn_cls_prob, shape=(0, 0, 0, -1))
rpn_cls_prob = fluid.layers.transpose(
rpn_cls_prob, perm=[0, 3, 1, 2])
prop_op = self.train_proposal if mode == 'train' else self.test_proposal
rpn_rois, rpn_roi_probs = prop_op(
scores=rpn_cls_score_prob,
scores=rpn_cls_prob,
bbox_deltas=rpn_bbox_pred,
im_info=im_info,
anchors=self.anchor,
......@@ -165,7 +182,8 @@ class RPNHead(object):
rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1])
anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4))
rpn_cls_score = fluid.layers.reshape(x=rpn_cls_score, shape=(0, -1, 1))
rpn_cls_score = fluid.layers.reshape(
x=rpn_cls_score, shape=(0, -1, self.num_classes))
rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4))
return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var
......@@ -177,7 +195,7 @@ class RPNHead(object):
return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred,
self.anchor, self.anchor_var)
def get_loss(self, im_info, gt_box, is_crowd):
def get_loss(self, im_info, gt_box, is_crowd, gt_label=None):
"""
Sample proposals and Calculate rpn loss.
......@@ -196,20 +214,37 @@ class RPNHead(object):
"""
rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input()
score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
self.rpn_target_assign(
bbox_pred=rpn_bbox,
cls_logits=rpn_cls,
anchor_box=anchor,
anchor_var=anchor_var,
gt_boxes=gt_box,
is_crowd=is_crowd,
im_info=im_info)
score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
score_tgt.stop_gradient = True
rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=score_pred, label=score_tgt)
if self.num_classes == 1:
score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
self.rpn_target_assign(
bbox_pred=rpn_bbox,
cls_logits=rpn_cls,
anchor_box=anchor,
anchor_var=anchor_var,
gt_boxes=gt_box,
is_crowd=is_crowd,
im_info=im_info)
score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
score_tgt.stop_gradient = True
rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=score_pred, label=score_tgt)
else:
score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
self.rpn_target_assign(
bbox_pred=rpn_bbox,
cls_logits=rpn_cls,
anchor_box=anchor,
anchor_var=anchor_var,
gt_boxes=gt_box,
gt_labels=gt_label,
is_crowd=is_crowd,
num_classes=self.num_classes,
im_info=im_info)
labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64')
labels_int64.stop_gradient = True
rpn_cls_loss = fluid.layers.softmax_with_cross_entropy(
logits=score_pred, label=labels_int64, numeric_stable_mode=True)
rpn_cls_loss = fluid.layers.reduce_mean(
rpn_cls_loss, name='loss_rpn_cls')
......@@ -261,13 +296,15 @@ class FPNRPNHead(RPNHead):
anchor_start_size=32,
num_chan=256,
min_level=2,
max_level=6):
max_level=6,
num_classes=1):
super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign,
train_proposal, test_proposal)
self.anchor_start_size = anchor_start_size
self.num_chan = num_chan
self.min_level = min_level
self.max_level = max_level
self.num_classes = num_classes
self.fpn_rpn_list = []
self.anchors_list = []
......@@ -319,9 +356,10 @@ class FPNRPNHead(RPNHead):
**(feat_lvl - self.min_level), ),
stride=(2.**feat_lvl, 2.**feat_lvl))
cls_num_filters = num_anchors * self.num_classes
self.rpn_cls_score = fluid.layers.conv2d(
input=conv_rpn_fpn,
num_filters=num_anchors,
num_filters=cls_num_filters,
filter_size=1,
act=None,
name=cls_name,
......@@ -366,19 +404,37 @@ class FPNRPNHead(RPNHead):
shape of (rois_num, 1).
"""
rpn_cls_logits_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat,
feat_lvl)
rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat,
feat_lvl)
prop_op = self.train_proposal if mode == 'train' else self.test_proposal
rpn_cls_prob_fpn = fluid.layers.sigmoid(
rpn_cls_logits_fpn, name='rpn_cls_probs_fpn' + str(feat_lvl))
rpn_rois_fpn, rpn_roi_probs_fpn = prop_op(
if self.num_classes == 1:
rpn_cls_prob_fpn = fluid.layers.sigmoid(
rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl))
else:
rpn_cls_score_fpn = fluid.layers.transpose(
rpn_cls_score_fpn, perm=[0, 2, 3, 1])
rpn_cls_score_fpn = fluid.layers.reshape(
rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes))
rpn_cls_prob_fpn = fluid.layers.softmax(
rpn_cls_score_fpn,
use_cudnn=False,
name='rpn_cls_prob_fpn' + str(feat_lvl))
rpn_cls_prob_fpn = fluid.layers.slice(
rpn_cls_prob_fpn, axes=[4], starts=[1],
ends=[self.num_classes])
rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1)
rpn_cls_prob_fpn = fluid.layers.reshape(
rpn_cls_prob_fpn, shape=(0, 0, 0, -1))
rpn_cls_prob_fpn = fluid.layers.transpose(
rpn_cls_prob_fpn, perm=[0, 3, 1, 2])
rpn_rois_fpn, rpn_roi_prob_fpn = prop_op(
scores=rpn_cls_prob_fpn,
bbox_deltas=rpn_bbox_pred_fpn,
im_info=im_info,
anchors=self.anchors,
variances=self.anchor_var)
return rpn_rois_fpn, rpn_roi_probs_fpn
return rpn_rois_fpn, rpn_roi_prob_fpn
def get_proposals(self, fpn_feats, im_info, mode='train'):
"""
......
......@@ -49,6 +49,7 @@ class CascadeRCNN(object):
roi_extractor='FPNRoIAlign',
bbox_head='CascadeBBoxHead',
bbox_assigner='CascadeBBoxAssigner',
rpn_only=False,
fpn='FPN'):
super(CascadeRCNN, self).__init__()
assert fpn is not None, "cascade RCNN requires FPN"
......@@ -58,6 +59,7 @@ class CascadeRCNN(object):
self.bbox_assigner = bbox_assigner
self.roi_extractor = roi_extractor
self.bbox_head = bbox_head
self.rpn_only = rpn_only
# Cascade local cfg
self.cls_agnostic_bbox_reg = 2
(brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights
......@@ -88,6 +90,12 @@ class CascadeRCNN(object):
if mode == 'train':
rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
else:
if self.rpn_only:
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale
return {'proposal': rois}
proposal_list = []
roi_feat_list = []
......
......@@ -48,6 +48,7 @@ class FasterRCNN(object):
roi_extractor,
bbox_head='BBoxHead',
bbox_assigner='BBoxAssigner',
rpn_only=False,
fpn=None):
super(FasterRCNN, self).__init__()
self.backbone = backbone
......@@ -56,6 +57,7 @@ class FasterRCNN(object):
self.roi_extractor = roi_extractor
self.bbox_head = bbox_head
self.fpn = fpn
self.rpn_only = rpn_only
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
......@@ -90,7 +92,12 @@ class FasterRCNN(object):
bbox_targets = outs[2]
bbox_inside_weights = outs[3]
bbox_outside_weights = outs[4]
else:
if self.rpn_only:
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale
return {'proposal': rois}
if self.fpn is None:
# in models without FPN, roi extractor only uses the last level of
# feature maps. And body_feat_names[-1] represents the name of
......
......@@ -51,6 +51,7 @@ class MaskRCNN(object):
roi_extractor='RoIAlign',
mask_assigner='MaskAssigner',
mask_head='MaskHead',
rpn_only=False,
fpn=None):
super(MaskRCNN, self).__init__()
self.backbone = backbone
......@@ -60,6 +61,7 @@ class MaskRCNN(object):
self.bbox_head = bbox_head
self.mask_assigner = mask_assigner
self.mask_head = mask_head
self.rpn_only = rpn_only
self.fpn = fpn
def build(self, feed_vars, mode='train'):
......@@ -130,7 +132,11 @@ class MaskRCNN(object):
return loss
else:
if self.rpn_only:
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale
return {'proposal': rois}
if self.fpn is None:
last_feat = body_feats[list(body_feats.keys())[-1]]
roi_feat = self.roi_extractor(last_feat, rois)
......
......@@ -30,7 +30,12 @@ import logging
logger = logging.getLogger(__name__)
__all__ = [
'bbox_eval', 'mask_eval', 'bbox2out', 'mask2out', 'get_category_info'
'bbox_eval',
'mask_eval',
'bbox2out',
'mask2out',
'get_category_info',
'proposal_eval',
]
......@@ -42,6 +47,34 @@ def clip_bbox(bbox):
return xmin, ymin, xmax, ymax
def proposal_eval(results, anno_file, outfile, max_dets=(100, 300, 1000)):
assert 'proposal' in results[0]
assert outfile.endswith('.json')
xywh_results = proposal2out(results)
assert len(
xywh_results) > 0, "The number of valid proposal detected is zero.\n \
Please use reasonable model and check input data."
with open(outfile, 'w') as f:
json.dump(xywh_results, f)
coco_gt = COCO(anno_file)
logger.info("Start evaluate...")
coco_dt = coco_gt.loadRes(outfile)
coco_ev = COCOeval(coco_gt, coco_dt, 'bbox')
coco_ev.params.useCats = 0
coco_ev.params.maxDets = list(max_dets)
coco_ev.evaluate()
coco_ev.accumulate()
coco_ev.summarize()
# flush coco evaluation result
sys.stdout.flush()
def bbox_eval(results, anno_file, outfile, with_background=True):
assert 'bbox' in results[0]
assert outfile.endswith('.json')
......@@ -96,6 +129,44 @@ def mask_eval(results, anno_file, outfile, resolution, thresh_binarize=0.5):
coco_ev.summarize()
def proposal2out(results, is_bbox_normalized=False):
xywh_res = []
for t in results:
bboxes = t['proposal'][0]
lengths = t['proposal'][1][0]
im_ids = np.array(t['im_id'][0])
if bboxes.shape == (1, 1) or bboxes is None:
continue
k = 0
for i in range(len(lengths)):
num = lengths[i]
im_id = int(im_ids[i][0])
for j in range(num):
dt = bboxes[k]
xmin, ymin, xmax, ymax = dt.tolist()
if is_bbox_normalized:
xmin, ymin, xmax, ymax = \
clip_bbox([xmin, ymin, xmax, ymax])
w = xmax - xmin
h = ymax - ymin
else:
w = xmax - xmin + 1
h = ymax - ymin + 1
bbox = [xmin, ymin, w, h]
coco_res = {
'image_id': im_id,
'category_id': 1,
'bbox': bbox,
'score': 1.0
}
xywh_res.append(coco_res)
k += 1
return xywh_res
def bbox2out(results, clsid2catid, is_bbox_normalized=False):
xywh_res = []
for t in results:
......
......@@ -91,13 +91,19 @@ def eval_run(exe, compile_program, pyreader, keys, values, cls):
def eval_results(results, feed, metric, resolution=None, output_file=None):
"""Evaluation for evaluation program results"""
if metric == 'COCO':
from ppdet.utils.coco_eval import bbox_eval, mask_eval
from ppdet.utils.coco_eval import proposal_eval, bbox_eval, mask_eval
anno_file = getattr(feed.dataset, 'annotation', None)
with_background = getattr(feed, 'with_background', True)
output = 'bbox.json'
if output_file:
output = '{}_bbox.json'.format(output_file)
bbox_eval(results, anno_file, output, with_background)
if 'proposal' in results[0]:
output = 'proposal.json'
if output_file:
output = '{}_proposal.json'.format(output_file)
proposal_eval(results, anno_file, output)
if 'bbox' in results[0]:
output = 'bbox.json'
if output_file:
output = '{}_bbox.json'.format(output_file)
bbox_eval(results, anno_file, output, with_background)
if 'mask' in results[0]:
output = 'mask.json'
if output_file:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册