Refine cascade models (#3219)

* refine cascade models

Refine cascade models (#3219)
* refine cascade models
2ef93ad2 · wangguanzhong · GitHub · 19122bd5 · 2ef93ad2 · 2ef93ad2
4 changed file
--- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_mask_rcnn.py
@@ -53,6 +53,7 @@ class CascadeMaskRCNN(object):
                 bbox_assigner='CascadeBBoxAssigner',
                 mask_assigner='MaskAssigner',
                 mask_head='MaskHead',
+                 rpn_only=False,
                 fpn='FPN'):
        super(CascadeMaskRCNN, self).__init__()
        assert fpn is not None, "cascade RCNN requires FPN"
@@ -64,6 +65,7 @@ class CascadeMaskRCNN(object):
        self.bbox_head = bbox_head
        self.mask_assigner = mask_assigner
        self.mask_head = mask_head
+        self.rpn_only = rpn_only
        # Cascade local cfg
        self.cls_agnostic_bbox_reg = 2
        (brw0, brw1, brw2) = self.bbox_assigner.bbox_reg_weights
@@ -191,8 +193,9 @@ class CascadeMaskRCNN(object):
                roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)
            bbox_pred = self.bbox_head.get_prediction(
-                im_info, roi_feat_list, rcnn_pred_list, proposal_list,
+                im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list,
-                self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg)
+                proposal_list, self.cascade_bbox_reg_weights,
+                self.cls_agnostic_bbox_reg)
            bbox_pred = bbox_pred['bbox']
@@ -204,7 +207,11 @@ class CascadeMaskRCNN(object):
            cond = fluid.layers.less_than(x=bbox_size, y=size)
            mask_pred = fluid.layers.create_global_var(
-                shape=[1], value=0.0, dtype='float32', persistable=False)
+                shape=[1],
+                value=0.0,
+                dtype='float32',
+                persistable=False,
+                name='mask_pred')
            with fluid.layers.control_flow.Switch() as switch:
                with switch.case(cond):

--- a/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/architectures/cascade_rcnn.py
@@ -72,7 +72,19 @@ class CascadeRCNN(object):
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']
+        assert mode in ['train', 'test'], \
+            "only 'train' and 'test' mode is supported"
+        if mode == 'train':
+            required_fields = [
+                'gt_label', 'gt_box', 'gt_mask', 'is_crowd', 'im_info'
+            ]
+        else:
+            required_fields = ['im_shape', 'im_info']
+        for var in required_fields:
+            assert var in feed_vars, \
+                "{} has no {} field".format(feed_vars, var)
        im_info = feed_vars['im_info']
        if mode == 'train':
            gt_box = feed_vars['gt_box']
            is_crowd = feed_vars['is_crowd']
@@ -92,7 +104,8 @@ class CascadeRCNN(object):
            rpn_loss = self.rpn_head.get_loss(im_info, gt_box, is_crowd)
        else:
            if self.rpn_only:
-                im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
+                im_scale = fluid.layers.slice(
+                    im_info, [1], starts=[2], ends=[3])
                im_scale = fluid.layers.sequence_expand(im_scale, rois)
                rois = rois / im_scale
                return {'proposal': rois}
@@ -143,8 +156,9 @@ class CascadeRCNN(object):
            return loss
        else:
            pred = self.bbox_head.get_prediction(
-                im_info, roi_feat_list, rcnn_pred_list, proposal_list,
+                im_info, feed_vars['im_shape'], roi_feat_list, rcnn_pred_list,
-                self.cascade_bbox_reg_weights, self.cls_agnostic_bbox_reg)
+                proposal_list, self.cascade_bbox_reg_weights,
+                self.cls_agnostic_bbox_reg)
            return pred
    def _decode_box(self, proposals, bbox_pred, curr_stage):

--- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/bbox_head.py
@@ -274,11 +274,14 @@ class BBoxHead(object):
        Get prediction bounding box in test stage.
        Args:
+            roi_feat (Variable): RoI feature from RoIExtractor.
            rois (Variable): Output of generate_proposals in rpn head.
            im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
                number of input images, each element consists of im_height,
                im_width, im_scale.
-            cls_score (Variable), bbox_pred(Variable): Output of get_output.
+            im_shape (Variable): Actual shape of original image with shape
+                [B, 3]. B is the number of images, each element consists of 
+                original_height, original_width, 1
        Returns:
            pred_result(Variable): Prediction result with shape [N, 6]. Each

--- a/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py
+++ b/PaddleCV/PaddleDetection/ppdet/modeling/roi_heads/cascade_head.py
@@ -139,6 +139,7 @@ class CascadeBBoxHead(object):
    def get_prediction(self,
                       im_info,
+                       im_shape,
                       roi_feat_list,
                       rcnn_pred_list,
                       proposal_list,
@@ -151,6 +152,9 @@ class CascadeBBoxHead(object):
            im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
                number of input images, each element consists
                of im_height, im_width, im_scale.
+            im_shape (Variable): Actual shape of original image with shape
+                [B, 3]. B is the number of images, each element consists of
+                original_height, original_width, 1
            rois_feat_list (List): RoI feature from RoIExtractor.
            rcnn_pred_list (Variable): Cascade rcnn's head's output
                including bbox_pred and cls_score
@@ -197,7 +201,8 @@ class CascadeBBoxHead(object):
            # only use fg box delta to decode box
            bbox_pred_new = fluid.layers.slice(
                bbox_pred_new, axes=[1], starts=[1], ends=[2])
-            bbox_pred_new = fluid.layers.expand(bbox_pred_new, [1, self.num_classes, 1])
+            bbox_pred_new = fluid.layers.expand(bbox_pred_new,
+                                                [1, self.num_classes, 1])
        decoded_box = fluid.layers.box_coder(
            prior_box=proposals_boxes,
            prior_box_var=bbox_reg_w,
@@ -206,8 +211,7 @@ class CascadeBBoxHead(object):
            box_normalized=False,
            axis=1)
-        # TODO: notice detectron use img.shape
+        box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
-        box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_info)
        pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean)
        return {"bbox": pred_result}