add generate_proposals (#1636)

a3364163 · wangguanzhong · GitHub · 36fac289 · a3364163 · a3364163
显示空白变更内容
内联并排

Showing with 206 addition and 2 deletion

ppdet/modeling/ops.py ppdet/modeling/ops.py +142 -2

ppdet/modeling/tests/test_ops.py ppdet/modeling/tests/test_ops.py +64 -0

未找到文件。
--- a/ppdet/modeling/ops.py
+++ b/ppdet/modeling/ops.py
@@ -29,7 +29,7 @@ __all__ = [
    'roi_align',
    'prior_box',
    'anchor_generator',
-    #'generate_proposals',
+    'generate_proposals',
    'iou_similarity',
    'box_coder',
    'yolo_box',
@@ -1031,7 +1031,6 @@ def multiclass_nms(bboxes,
    helper = LayerHelper('multiclass_nms3', **locals())

    if in_dygraph_mode():
-        assert rois_num is not None, "rois_num should not be None in dygraph mode."
        attrs = ('background_label', background_label, 'score_threshold',
                 score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold',
                 nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta,
@@ -1368,3 +1367,144 @@ def box_coder(prior_box,
        attrs=attrs,
        outputs={"OutputBox": output_box})
    return output_box
+
+
+def generate_proposals(scores,
+                       bbox_deltas,
+                       im_shape,
+                       anchors,
+                       variances,
+                       pre_nms_top_n=6000,
+                       post_nms_top_n=1000,
+                       nms_thresh=0.5,
+                       min_size=0.1,
+                       eta=1.0,
+                       return_rois_num=False,
+                       name=None):
+    """
+    **Generate proposal Faster-RCNN**
+    This operation proposes RoIs according to each box with their
+    probability to be a foreground object and 
+    the box can be calculated by anchors. Bbox_deltais and scores
+    to be an object are the output of RPN. Final proposals
+    could be used to train detection net.
+    For generating proposals, this operation performs following steps:
+    1. Transposes and resizes scores and bbox_deltas in size of
+       (H*W*A, 1) and (H*W*A, 4)
+    2. Calculate box locations as proposals candidates. 
+    3. Clip boxes to image
+    4. Remove predicted boxes with small area. 
+    5. Apply NMS to get final proposals as output.
+    Args:
+        scores(Tensor): A 4-D Tensor with shape [N, A, H, W] represents
+            the probability for each box to be an object.
+            N is batch size, A is number of anchors, H and W are height and
+            width of the feature map. The data type must be float32.
+        bbox_deltas(Tensor): A 4-D Tensor with shape [N, 4*A, H, W]
+            represents the difference between predicted box location and
+            anchor location. The data type must be float32.
+        im_shape(Tensor): A 2-D Tensor with shape [N, 2] represents H, W, the
+            origin image size or input size. The data type can be float32 or 
+            float64.
+        anchors(Tensor):   A 4-D Tensor represents the anchors with a layout
+            of [H, W, A, 4]. H and W are height and width of the feature map,
+            num_anchors is the box count of each position. Each anchor is
+            in (xmin, ymin, xmax, ymax) format an unnormalized. The data type must be float32.
+        variances(Tensor): A 4-D Tensor. The expanded variances of anchors with a layout of
+            [H, W, num_priors, 4]. Each variance is in
+            (xcenter, ycenter, w, h) format. The data type must be float32.
+        pre_nms_top_n(float): Number of total bboxes to be kept per
+            image before NMS. The data type must be float32. `6000` by default.
+        post_nms_top_n(float): Number of total bboxes to be kept per
+            image after NMS. The data type must be float32. `1000` by default.
+        nms_thresh(float): Threshold in NMS. The data type must be float32. `0.5` by default.
+        min_size(float): Remove predicted boxes with either height or
+            width < min_size. The data type must be float32. `0.1` by default.
+        eta(float): Apply in adaptive NMS, if adaptive `threshold > 0.5`,
+            `adaptive_threshold = adaptive_threshold * eta` in each iteration.
+        return_rois_num(bool): When setting True, it will return a 1D Tensor with shape [N, ] that includes Rois's 
+            num of each image in one batch. The N is the image's num. For example, the tensor has values [4,5] that represents
+            the first image has 4 Rois, the second image has 5 Rois. It only used in rcnn model. 
+            'False' by default. 
+        name(str, optional): For detailed information, please refer 
+            to :ref:`api_guide_Name`. Usually name is no need to set and 
+            None by default. 
+
+    Returns:
+        tuple:
+        A tuple with format ``(rpn_rois, rpn_roi_probs)``.
+        - **rpn_rois**: The generated RoIs. 2-D Tensor with shape ``[N, 4]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``.
+        - **rpn_roi_probs**: The scores of generated RoIs. 2-D Tensor with shape ``[N, 1]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``.
+
+    Examples:
+        .. code-block:: python
+        
+            import paddle
+            from ppdet.modeling import ops
+            paddle.enable_static()
+            scores = paddle.static.data(name='scores', shape=[None, 4, 5, 5], dtype='float32')
+            bbox_deltas = paddle.static.data(name='bbox_deltas', shape=[None, 16, 5, 5], dtype='float32')
+            im_shape = paddle.static.data(name='im_shape', shape=[None, 2], dtype='float32')
+            anchors = paddle.static.data(name='anchors', shape=[None, 5, 4, 4], dtype='float32')
+            variances = paddle.static.data(name='variances', shape=[None, 5, 10, 4], dtype='float32')
+            rois, roi_probs = ops.generate_proposals(scores, bbox_deltas,
+                         im_shape, anchors, variances)
+    """
+    if in_dygraph_mode():
+        assert return_rois_num, "return_rois_num should be True in dygraph mode."
+        attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n,
+                 'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta)
+        rpn_rois, rpn_roi_probs, rpn_rois_num = core.ops.generate_proposals_v2(
+            scores, bbox_deltas, im_shape, anchors, variances, *attrs)
+        return rpn_rois, rpn_roi_probs, rpn_rois_num
+
+    helper = LayerHelper('generate_proposals_v2', **locals())
+
+    check_variable_and_dtype(scores, 'scores', ['float32'],
+                             'generate_proposals_v2')
+    check_variable_and_dtype(bbox_deltas, 'bbox_deltas', ['float32'],
+                             'generate_proposals_v2')
+    check_variable_and_dtype(im_shape, 'im_shape', ['float32', 'float64'],
+                             'generate_proposals_v2')
+    check_variable_and_dtype(anchors, 'anchors', ['float32'],
+                             'generate_proposals_v2')
+    check_variable_and_dtype(variances, 'variances', ['float32'],
+                             'generate_proposals_v2')
+
+    rpn_rois = helper.create_variable_for_type_inference(
+        dtype=bbox_deltas.dtype)
+    rpn_roi_probs = helper.create_variable_for_type_inference(
+        dtype=scores.dtype)
+    outputs = {
+        'RpnRois': rpn_rois,
+        'RpnRoiProbs': rpn_roi_probs,
+    }
+    if return_rois_num:
+        rpn_rois_num = helper.create_variable_for_type_inference(dtype='int32')
+        rpn_rois_num.stop_gradient = True
+        outputs['RpnRoisNum'] = rpn_rois_num
+
+    helper.append_op(
+        type="generate_proposals_v2",
+        inputs={
+            'Scores': scores,
+            'BboxDeltas': bbox_deltas,
+            'ImShape': im_shape,
+            'Anchors': anchors,
+            'Variances': variances
+        },
+        attrs={
+            'pre_nms_topN': pre_nms_top_n,
+            'post_nms_topN': post_nms_top_n,
+            'nms_thresh': nms_thresh,
+            'min_size': min_size,
+            'eta': eta
+        },
+        outputs=outputs)
+    rpn_rois.stop_gradient = True
+    rpn_roi_probs.stop_gradient = True
+
+    if return_rois_num:
+        return rpn_rois, rpn_roi_probs, rpn_rois_num
+    else:
+        return rpn_rois, rpn_roi_probs
--- a/ppdet/modeling/tests/test_ops.py
+++ b/ppdet/modeling/tests/test_ops.py
@@ -764,5 +764,69 @@ class TestBoxCoder(LayerTest):
                              prior_box_var, target_box)


+class TestGenerateProposals(LayerTest):
+    def test_generate_proposals(self):
+        scores_np = np.random.rand(2, 3, 4, 4).astype('float32')
+        bbox_deltas_np = np.random.rand(2, 12, 4, 4).astype('float32')
+        im_shape_np = np.array([[8, 8], [6, 6]]).astype('float32')
+        anchors_np = np.reshape(np.arange(4 * 4 * 3 * 4),
+                                [4, 4, 3, 4]).astype('float32')
+        variances_np = np.ones((4, 4, 3, 4)).astype('float32')
+
+        with self.static_graph():
+            scores = paddle.static.data(
+                name='scores', shape=[2, 3, 4, 4], dtype='float32')
+            bbox_deltas = paddle.static.data(
+                name='bbox_deltas', shape=[2, 12, 4, 4], dtype='float32')
+            im_shape = paddle.static.data(
+                name='im_shape', shape=[2, 2], dtype='float32')
+            anchors = paddle.static.data(
+                name='anchors', shape=[4, 4, 3, 4], dtype='float32')
+            variances = paddle.static.data(
+                name='var', shape=[4, 4, 3, 4], dtype='float32')
+            rois, roi_probs, rois_num = ops.generate_proposals(
+                scores,
+                bbox_deltas,
+                im_shape,
+                anchors,
+                variances,
+                pre_nms_top_n=10,
+                post_nms_top_n=5,
+                return_rois_num=True)
+            rois_stat, roi_probs_stat, rois_num_stat = self.get_static_graph_result(
+                feed={
+                    'scores': scores_np,
+                    'bbox_deltas': bbox_deltas_np,
+                    'im_shape': im_shape_np,
+                    'anchors': anchors_np,
+                    'var': variances_np
+                },
+                fetch_list=[rois, roi_probs, rois_num],
+                with_lod=True)
+
+        with self.dynamic_graph():
+            scores_dy = base.to_variable(scores_np)
+            bbox_deltas_dy = base.to_variable(bbox_deltas_np)
+            im_shape_dy = base.to_variable(im_shape_np)
+            anchors_dy = base.to_variable(anchors_np)
+            variances_dy = base.to_variable(variances_np)
+            rois, roi_probs, rois_num = ops.generate_proposals(
+                scores_dy,
+                bbox_deltas_dy,
+                im_shape_dy,
+                anchors_dy,
+                variances_dy,
+                pre_nms_top_n=10,
+                post_nms_top_n=5,
+                return_rois_num=True)
+            rois_dy = rois.numpy()
+            roi_probs_dy = roi_probs.numpy()
+            rois_num_dy = rois_num.numpy()
+
+        self.assertTrue(np.array_equal(np.array(rois_stat), rois_dy))
+        self.assertTrue(np.array_equal(np.array(roi_probs_stat), roi_probs_dy))
+        self.assertTrue(np.array_equal(np.array(rois_num_stat), rois_num_dy))
+
+
 if __name__ == '__main__':
    unittest.main()