From 6b5d7100aaed9249b396b80c436af849d99868b7 Mon Sep 17 00:00:00 2001 From: Yuan Gao Date: Tue, 23 Jul 2019 10:03:47 +0800 Subject: [PATCH] add rpn recall function and class-aware rpn (#2814) * add rpn recall function and class-aware rpn --- ppdet/modeling/anchor_heads/rpn_head.py | 120 ++++++++++++++----- ppdet/modeling/architectures/cascade_rcnn.py | 8 ++ ppdet/modeling/architectures/faster_rcnn.py | 9 +- ppdet/modeling/architectures/mask_rcnn.py | 8 +- ppdet/utils/coco_eval.py | 73 ++++++++++- ppdet/utils/eval_utils.py | 16 ++- 6 files changed, 194 insertions(+), 40 deletions(-) diff --git a/ppdet/modeling/anchor_heads/rpn_head.py b/ppdet/modeling/anchor_heads/rpn_head.py index 9495cbd8d..527fb948d 100644 --- a/ppdet/modeling/anchor_heads/rpn_head.py +++ b/ppdet/modeling/anchor_heads/rpn_head.py @@ -22,8 +22,8 @@ from paddle.fluid.initializer import Normal from paddle.fluid.regularizer import L2Decay from ppdet.core.workspace import register -from ppdet.modeling.ops import (AnchorGenerator, - RPNTargetAssign, GenerateProposals) +from ppdet.modeling.ops import (AnchorGenerator, RPNTargetAssign, + GenerateProposals) __all__ = ['RPNTargetAssign', 'GenerateProposals', 'RPNHead', 'FPNRPNHead'] @@ -48,12 +48,14 @@ class RPNHead(object): anchor_generator=AnchorGenerator().__dict__, rpn_target_assign=RPNTargetAssign().__dict__, train_proposal=GenerateProposals(12000, 2000).__dict__, - test_proposal=GenerateProposals().__dict__): + test_proposal=GenerateProposals().__dict__, + num_classes=1): super(RPNHead, self).__init__() self.anchor_generator = anchor_generator self.rpn_target_assign = rpn_target_assign self.train_proposal = train_proposal self.test_proposal = test_proposal + self.num_classes = num_classes if isinstance(anchor_generator, dict): self.anchor_generator = AnchorGenerator(**anchor_generator) if isinstance(rpn_target_assign, dict): @@ -96,7 +98,7 @@ class RPNHead(object): # Proposal classification scores self.rpn_cls_score = fluid.layers.conv2d( rpn_conv, - num_filters=num_anchor, + num_filters=num_anchor * self.num_classes, filter_size=1, stride=1, padding=0, @@ -147,12 +149,27 @@ class RPNHead(object): body_feat = list(body_feats.values())[-1] rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat) - rpn_cls_score_prob = fluid.layers.sigmoid( - rpn_cls_score, name='rpn_cls_score_prob') - + if self.num_classes == 1: + rpn_cls_prob = fluid.layers.sigmoid( + rpn_cls_score, name='rpn_cls_prob') + else: + rpn_cls_score = fluid.layers.transpose( + rpn_cls_score, perm=[0, 2, 3, 1]) + rpn_cls_score = fluid.layers.reshape( + rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes)) + rpn_cls_prob_tmp = fluid.layers.softmax( + rpn_cls_score, use_cudnn=False, name='rpn_cls_prob') + rpn_cls_prob_slice = fluid.layers.slice( + rpn_cls_prob_tmp, axes=[4], starts=[1], + ends=[self.num_classes]) + rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1) + rpn_cls_prob = fluid.layers.reshape( + rpn_cls_prob, shape=(0, 0, 0, -1)) + rpn_cls_prob = fluid.layers.transpose( + rpn_cls_prob, perm=[0, 3, 1, 2]) prop_op = self.train_proposal if mode == 'train' else self.test_proposal rpn_rois, rpn_roi_probs = prop_op( - scores=rpn_cls_score_prob, + scores=rpn_cls_prob, bbox_deltas=rpn_bbox_pred, im_info=im_info, anchors=self.anchor, @@ -165,7 +182,8 @@ class RPNHead(object): rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1]) anchor = fluid.layers.reshape(anchor, shape=(-1, 4)) anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4)) - rpn_cls_score = fluid.layers.reshape(x=rpn_cls_score, shape=(0, -1, 1)) + rpn_cls_score = fluid.layers.reshape( + x=rpn_cls_score, shape=(0, -1, self.num_classes)) rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4)) return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var @@ -177,7 +195,7 @@ class RPNHead(object): return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred, self.anchor, self.anchor_var) - def get_loss(self, im_info, gt_box, is_crowd): + def get_loss(self, im_info, gt_box, is_crowd, gt_label=None): """ Sample proposals and Calculate rpn loss. @@ -196,20 +214,37 @@ class RPNHead(object): """ rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input() - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ - self.rpn_target_assign( - bbox_pred=rpn_bbox, - cls_logits=rpn_cls, - anchor_box=anchor, - anchor_var=anchor_var, - gt_boxes=gt_box, - is_crowd=is_crowd, - im_info=im_info) - - score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') - score_tgt.stop_gradient = True - rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=score_pred, label=score_tgt) + if self.num_classes == 1: + score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ + self.rpn_target_assign( + bbox_pred=rpn_bbox, + cls_logits=rpn_cls, + anchor_box=anchor, + anchor_var=anchor_var, + gt_boxes=gt_box, + is_crowd=is_crowd, + im_info=im_info) + score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32') + score_tgt.stop_gradient = True + rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits( + x=score_pred, label=score_tgt) + else: + score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \ + self.rpn_target_assign( + bbox_pred=rpn_bbox, + cls_logits=rpn_cls, + anchor_box=anchor, + anchor_var=anchor_var, + gt_boxes=gt_box, + gt_labels=gt_label, + is_crowd=is_crowd, + num_classes=self.num_classes, + im_info=im_info) + labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64') + labels_int64.stop_gradient = True + rpn_cls_loss = fluid.layers.softmax_with_cross_entropy( + logits=score_pred, label=labels_int64, numeric_stable_mode=True) + rpn_cls_loss = fluid.layers.reduce_mean( rpn_cls_loss, name='loss_rpn_cls') @@ -261,13 +296,15 @@ class FPNRPNHead(RPNHead): anchor_start_size=32, num_chan=256, min_level=2, - max_level=6): + max_level=6, + num_classes=1): super(FPNRPNHead, self).__init__(anchor_generator, rpn_target_assign, train_proposal, test_proposal) self.anchor_start_size = anchor_start_size self.num_chan = num_chan self.min_level = min_level self.max_level = max_level + self.num_classes = num_classes self.fpn_rpn_list = [] self.anchors_list = [] @@ -319,9 +356,10 @@ class FPNRPNHead(RPNHead): **(feat_lvl - self.min_level), ), stride=(2.**feat_lvl, 2.**feat_lvl)) + cls_num_filters = num_anchors * self.num_classes self.rpn_cls_score = fluid.layers.conv2d( input=conv_rpn_fpn, - num_filters=num_anchors, + num_filters=cls_num_filters, filter_size=1, act=None, name=cls_name, @@ -366,19 +404,37 @@ class FPNRPNHead(RPNHead): shape of (rois_num, 1). """ - rpn_cls_logits_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat, - feat_lvl) + rpn_cls_score_fpn, rpn_bbox_pred_fpn = self._get_output(body_feat, + feat_lvl) prop_op = self.train_proposal if mode == 'train' else self.test_proposal - rpn_cls_prob_fpn = fluid.layers.sigmoid( - rpn_cls_logits_fpn, name='rpn_cls_probs_fpn' + str(feat_lvl)) - rpn_rois_fpn, rpn_roi_probs_fpn = prop_op( + if self.num_classes == 1: + rpn_cls_prob_fpn = fluid.layers.sigmoid( + rpn_cls_score_fpn, name='rpn_cls_prob_fpn' + str(feat_lvl)) + else: + rpn_cls_score_fpn = fluid.layers.transpose( + rpn_cls_score_fpn, perm=[0, 2, 3, 1]) + rpn_cls_score_fpn = fluid.layers.reshape( + rpn_cls_score_fpn, shape=(0, 0, 0, -1, self.num_classes)) + rpn_cls_prob_fpn = fluid.layers.softmax( + rpn_cls_score_fpn, + use_cudnn=False, + name='rpn_cls_prob_fpn' + str(feat_lvl)) + rpn_cls_prob_fpn = fluid.layers.slice( + rpn_cls_prob_fpn, axes=[4], starts=[1], + ends=[self.num_classes]) + rpn_cls_prob_fpn, _ = fluid.layers.topk(rpn_cls_prob_fpn, 1) + rpn_cls_prob_fpn = fluid.layers.reshape( + rpn_cls_prob_fpn, shape=(0, 0, 0, -1)) + rpn_cls_prob_fpn = fluid.layers.transpose( + rpn_cls_prob_fpn, perm=[0, 3, 1, 2]) + rpn_rois_fpn, rpn_roi_prob_fpn = prop_op( scores=rpn_cls_prob_fpn, bbox_deltas=rpn_bbox_pred_fpn, im_info=im_info, anchors=self.anchors, variances=self.anchor_var) - return rpn_rois_fpn, rpn_roi_probs_fpn + return rpn_rois_fpn, rpn_roi_prob_fpn def get_proposals(self, fpn_feats, im_info, mode='train'): """ diff --git a/ppdet/modeling/architectures/cascade_rcnn.py b/ppdet/modeling/architectures/cascade_rcnn.py index 7fe999e5c..42b940b3d 100644 --- a/ppdet/modeling/architectures/cascade_rcnn.py +++ b/ppdet/modeling/architectures/cascade_rcnn.py @@ -49,6 +49,7 @@ class CascadeRCNN(object): roi_extractor='FPNRoIAlign', bbox_head='CascadeBBoxHead', bbox_assigner='CascadeBBoxAssigner', + rpn_only=False, fpn='FPN'): super(CascadeRCNN, self).__init__() assert fpn is not None, "cascade RCNN requires FPN" @@ -58,6 +59,7 @@ class CascadeRCNN(object): self.bbox_assigner = bbox_assigner self.roi_extractor = roi_extractor self.bbox_head = bbox_head + self.rpn_only = rpn_only # Cascade local cfg self.cls_agnostic_bbox_reg = 2 (brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights @@ -88,6 +90,12 @@ class CascadeRCNN(object): if mode == 'train': rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd) + else: + if self.rpn_only: + im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) + im_scale = fluid.layers.sequence_expand(im_scale, rois) + rois = rois / im_scale + return {'proposal': rois} proposal_list = [] roi_feat_list = [] diff --git a/ppdet/modeling/architectures/faster_rcnn.py b/ppdet/modeling/architectures/faster_rcnn.py index 641ae0708..1968289fd 100644 --- a/ppdet/modeling/architectures/faster_rcnn.py +++ b/ppdet/modeling/architectures/faster_rcnn.py @@ -48,6 +48,7 @@ class FasterRCNN(object): roi_extractor, bbox_head='BBoxHead', bbox_assigner='BBoxAssigner', + rpn_only=False, fpn=None): super(FasterRCNN, self).__init__() self.backbone = backbone @@ -56,6 +57,7 @@ class FasterRCNN(object): self.roi_extractor = roi_extractor self.bbox_head = bbox_head self.fpn = fpn + self.rpn_only = rpn_only def build(self, feed_vars, mode='train'): im = feed_vars['image'] @@ -90,7 +92,12 @@ class FasterRCNN(object): bbox_targets = outs[2] bbox_inside_weights = outs[3] bbox_outside_weights = outs[4] - + else: + if self.rpn_only: + im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) + im_scale = fluid.layers.sequence_expand(im_scale, rois) + rois = rois / im_scale + return {'proposal': rois} if self.fpn is None: # in models without FPN, roi extractor only uses the last level of # feature maps. And body_feat_names[-1] represents the name of diff --git a/ppdet/modeling/architectures/mask_rcnn.py b/ppdet/modeling/architectures/mask_rcnn.py index ce6b1a6d7..49a4c0aa9 100644 --- a/ppdet/modeling/architectures/mask_rcnn.py +++ b/ppdet/modeling/architectures/mask_rcnn.py @@ -51,6 +51,7 @@ class MaskRCNN(object): roi_extractor='RoIAlign', mask_assigner='MaskAssigner', mask_head='MaskHead', + rpn_only=False, fpn=None): super(MaskRCNN, self).__init__() self.backbone = backbone @@ -60,6 +61,7 @@ class MaskRCNN(object): self.bbox_head = bbox_head self.mask_assigner = mask_assigner self.mask_head = mask_head + self.rpn_only = rpn_only self.fpn = fpn def build(self, feed_vars, mode='train'): @@ -130,7 +132,11 @@ class MaskRCNN(object): return loss else: - + if self.rpn_only: + im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3]) + im_scale = fluid.layers.sequence_expand(im_scale, rois) + rois = rois / im_scale + return {'proposal': rois} if self.fpn is None: last_feat = body_feats[list(body_feats.keys())[-1]] roi_feat = self.roi_extractor(last_feat, rois) diff --git a/ppdet/utils/coco_eval.py b/ppdet/utils/coco_eval.py index f71f38ff4..9959bf36b 100644 --- a/ppdet/utils/coco_eval.py +++ b/ppdet/utils/coco_eval.py @@ -30,7 +30,12 @@ import logging logger = logging.getLogger(__name__) __all__ = [ - 'bbox_eval', 'mask_eval', 'bbox2out', 'mask2out', 'get_category_info' + 'bbox_eval', + 'mask_eval', + 'bbox2out', + 'mask2out', + 'get_category_info', + 'proposal_eval', ] @@ -42,6 +47,34 @@ def clip_bbox(bbox): return xmin, ymin, xmax, ymax +def proposal_eval(results, anno_file, outfile, max_dets=(100, 300, 1000)): + assert 'proposal' in results[0] + assert outfile.endswith('.json') + + xywh_results = proposal2out(results) + assert len( + xywh_results) > 0, "The number of valid proposal detected is zero.\n \ + Please use reasonable model and check input data." + + with open(outfile, 'w') as f: + json.dump(xywh_results, f) + + coco_gt = COCO(anno_file) + + logger.info("Start evaluate...") + coco_dt = coco_gt.loadRes(outfile) + coco_ev = COCOeval(coco_gt, coco_dt, 'bbox') + + coco_ev.params.useCats = 0 + coco_ev.params.maxDets = list(max_dets) + + coco_ev.evaluate() + coco_ev.accumulate() + coco_ev.summarize() + # flush coco evaluation result + sys.stdout.flush() + + def bbox_eval(results, anno_file, outfile, with_background=True): assert 'bbox' in results[0] assert outfile.endswith('.json') @@ -96,6 +129,44 @@ def mask_eval(results, anno_file, outfile, resolution, thresh_binarize=0.5): coco_ev.summarize() +def proposal2out(results, is_bbox_normalized=False): + xywh_res = [] + for t in results: + bboxes = t['proposal'][0] + lengths = t['proposal'][1][0] + im_ids = np.array(t['im_id'][0]) + if bboxes.shape == (1, 1) or bboxes is None: + continue + + k = 0 + for i in range(len(lengths)): + num = lengths[i] + im_id = int(im_ids[i][0]) + for j in range(num): + dt = bboxes[k] + xmin, ymin, xmax, ymax = dt.tolist() + + if is_bbox_normalized: + xmin, ymin, xmax, ymax = \ + clip_bbox([xmin, ymin, xmax, ymax]) + w = xmax - xmin + h = ymax - ymin + else: + w = xmax - xmin + 1 + h = ymax - ymin + 1 + + bbox = [xmin, ymin, w, h] + coco_res = { + 'image_id': im_id, + 'category_id': 1, + 'bbox': bbox, + 'score': 1.0 + } + xywh_res.append(coco_res) + k += 1 + return xywh_res + + def bbox2out(results, clsid2catid, is_bbox_normalized=False): xywh_res = [] for t in results: diff --git a/ppdet/utils/eval_utils.py b/ppdet/utils/eval_utils.py index de013617e..618a6adee 100644 --- a/ppdet/utils/eval_utils.py +++ b/ppdet/utils/eval_utils.py @@ -91,13 +91,19 @@ def eval_run(exe, compile_program, pyreader, keys, values, cls): def eval_results(results, feed, metric, resolution=None, output_file=None): """Evaluation for evaluation program results""" if metric == 'COCO': - from ppdet.utils.coco_eval import bbox_eval, mask_eval + from ppdet.utils.coco_eval import proposal_eval, bbox_eval, mask_eval anno_file = getattr(feed.dataset, 'annotation', None) with_background = getattr(feed, 'with_background', True) - output = 'bbox.json' - if output_file: - output = '{}_bbox.json'.format(output_file) - bbox_eval(results, anno_file, output, with_background) + if 'proposal' in results[0]: + output = 'proposal.json' + if output_file: + output = '{}_proposal.json'.format(output_file) + proposal_eval(results, anno_file, output) + if 'bbox' in results[0]: + output = 'bbox.json' + if output_file: + output = '{}_bbox.json'.format(output_file) + bbox_eval(results, anno_file, output, with_background) if 'mask' in results[0]: output = 'mask.json' if output_file: -- GitLab