diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index 613b3235a6e10d76bd63eea97f7caca17299fea5..457ef1088d83be9de9d594c33ef625c5f3c9dee3 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -29,7 +29,7 @@ __all__ = [ 'roi_align', 'prior_box', 'anchor_generator', - #'generate_proposals', + 'generate_proposals', 'iou_similarity', 'box_coder', 'yolo_box', @@ -1031,7 +1031,6 @@ def multiclass_nms(bboxes, helper = LayerHelper('multiclass_nms3', **locals()) if in_dygraph_mode(): - assert rois_num is not None, "rois_num should not be None in dygraph mode." attrs = ('background_label', background_label, 'score_threshold', score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold', nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta, @@ -1368,3 +1367,144 @@ def box_coder(prior_box, attrs=attrs, outputs={"OutputBox": output_box}) return output_box + + +def generate_proposals(scores, + bbox_deltas, + im_shape, + anchors, + variances, + pre_nms_top_n=6000, + post_nms_top_n=1000, + nms_thresh=0.5, + min_size=0.1, + eta=1.0, + return_rois_num=False, + name=None): + """ + **Generate proposal Faster-RCNN** + This operation proposes RoIs according to each box with their + probability to be a foreground object and + the box can be calculated by anchors. Bbox_deltais and scores + to be an object are the output of RPN. Final proposals + could be used to train detection net. + For generating proposals, this operation performs following steps: + 1. Transposes and resizes scores and bbox_deltas in size of + (H*W*A, 1) and (H*W*A, 4) + 2. Calculate box locations as proposals candidates. + 3. Clip boxes to image + 4. Remove predicted boxes with small area. + 5. Apply NMS to get final proposals as output. + Args: + scores(Tensor): A 4-D Tensor with shape [N, A, H, W] represents + the probability for each box to be an object. + N is batch size, A is number of anchors, H and W are height and + width of the feature map. The data type must be float32. + bbox_deltas(Tensor): A 4-D Tensor with shape [N, 4*A, H, W] + represents the difference between predicted box location and + anchor location. The data type must be float32. + im_shape(Tensor): A 2-D Tensor with shape [N, 2] represents H, W, the + origin image size or input size. The data type can be float32 or + float64. + anchors(Tensor): A 4-D Tensor represents the anchors with a layout + of [H, W, A, 4]. H and W are height and width of the feature map, + num_anchors is the box count of each position. Each anchor is + in (xmin, ymin, xmax, ymax) format an unnormalized. The data type must be float32. + variances(Tensor): A 4-D Tensor. The expanded variances of anchors with a layout of + [H, W, num_priors, 4]. Each variance is in + (xcenter, ycenter, w, h) format. The data type must be float32. + pre_nms_top_n(float): Number of total bboxes to be kept per + image before NMS. The data type must be float32. `6000` by default. + post_nms_top_n(float): Number of total bboxes to be kept per + image after NMS. The data type must be float32. `1000` by default. + nms_thresh(float): Threshold in NMS. The data type must be float32. `0.5` by default. + min_size(float): Remove predicted boxes with either height or + width < min_size. The data type must be float32. `0.1` by default. + eta(float): Apply in adaptive NMS, if adaptive `threshold > 0.5`, + `adaptive_threshold = adaptive_threshold * eta` in each iteration. + return_rois_num(bool): When setting True, it will return a 1D Tensor with shape [N, ] that includes Rois's + num of each image in one batch. The N is the image's num. For example, the tensor has values [4,5] that represents + the first image has 4 Rois, the second image has 5 Rois. It only used in rcnn model. + 'False' by default. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + tuple: + A tuple with format ``(rpn_rois, rpn_roi_probs)``. + - **rpn_rois**: The generated RoIs. 2-D Tensor with shape ``[N, 4]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``. + - **rpn_roi_probs**: The scores of generated RoIs. 2-D Tensor with shape ``[N, 1]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``. + + Examples: + .. code-block:: python + + import paddle + from ppdet.modeling import ops + paddle.enable_static() + scores = paddle.static.data(name='scores', shape=[None, 4, 5, 5], dtype='float32') + bbox_deltas = paddle.static.data(name='bbox_deltas', shape=[None, 16, 5, 5], dtype='float32') + im_shape = paddle.static.data(name='im_shape', shape=[None, 2], dtype='float32') + anchors = paddle.static.data(name='anchors', shape=[None, 5, 4, 4], dtype='float32') + variances = paddle.static.data(name='variances', shape=[None, 5, 10, 4], dtype='float32') + rois, roi_probs = ops.generate_proposals(scores, bbox_deltas, + im_shape, anchors, variances) + """ + if in_dygraph_mode(): + assert return_rois_num, "return_rois_num should be True in dygraph mode." + attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n, + 'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta) + rpn_rois, rpn_roi_probs, rpn_rois_num = core.ops.generate_proposals_v2( + scores, bbox_deltas, im_shape, anchors, variances, *attrs) + return rpn_rois, rpn_roi_probs, rpn_rois_num + + helper = LayerHelper('generate_proposals_v2', **locals()) + + check_variable_and_dtype(scores, 'scores', ['float32'], + 'generate_proposals_v2') + check_variable_and_dtype(bbox_deltas, 'bbox_deltas', ['float32'], + 'generate_proposals_v2') + check_variable_and_dtype(im_shape, 'im_shape', ['float32', 'float64'], + 'generate_proposals_v2') + check_variable_and_dtype(anchors, 'anchors', ['float32'], + 'generate_proposals_v2') + check_variable_and_dtype(variances, 'variances', ['float32'], + 'generate_proposals_v2') + + rpn_rois = helper.create_variable_for_type_inference( + dtype=bbox_deltas.dtype) + rpn_roi_probs = helper.create_variable_for_type_inference( + dtype=scores.dtype) + outputs = { + 'RpnRois': rpn_rois, + 'RpnRoiProbs': rpn_roi_probs, + } + if return_rois_num: + rpn_rois_num = helper.create_variable_for_type_inference(dtype='int32') + rpn_rois_num.stop_gradient = True + outputs['RpnRoisNum'] = rpn_rois_num + + helper.append_op( + type="generate_proposals_v2", + inputs={ + 'Scores': scores, + 'BboxDeltas': bbox_deltas, + 'ImShape': im_shape, + 'Anchors': anchors, + 'Variances': variances + }, + attrs={ + 'pre_nms_topN': pre_nms_top_n, + 'post_nms_topN': post_nms_top_n, + 'nms_thresh': nms_thresh, + 'min_size': min_size, + 'eta': eta + }, + outputs=outputs) + rpn_rois.stop_gradient = True + rpn_roi_probs.stop_gradient = True + + if return_rois_num: + return rpn_rois, rpn_roi_probs, rpn_rois_num + else: + return rpn_rois, rpn_roi_probs diff --git a/ppdet/modeling/tests/test_ops.py b/ppdet/modeling/tests/test_ops.py index e6295f69b6d0ede75d4ebc2c49e681aab4bfb88e..1fb8b83a2f4a65bebf4fdbf0fbf02bd383224fc6 100644 --- a/ppdet/modeling/tests/test_ops.py +++ b/ppdet/modeling/tests/test_ops.py @@ -764,5 +764,69 @@ class TestBoxCoder(LayerTest): prior_box_var, target_box) +class TestGenerateProposals(LayerTest): + def test_generate_proposals(self): + scores_np = np.random.rand(2, 3, 4, 4).astype('float32') + bbox_deltas_np = np.random.rand(2, 12, 4, 4).astype('float32') + im_shape_np = np.array([[8, 8], [6, 6]]).astype('float32') + anchors_np = np.reshape(np.arange(4 * 4 * 3 * 4), + [4, 4, 3, 4]).astype('float32') + variances_np = np.ones((4, 4, 3, 4)).astype('float32') + + with self.static_graph(): + scores = paddle.static.data( + name='scores', shape=[2, 3, 4, 4], dtype='float32') + bbox_deltas = paddle.static.data( + name='bbox_deltas', shape=[2, 12, 4, 4], dtype='float32') + im_shape = paddle.static.data( + name='im_shape', shape=[2, 2], dtype='float32') + anchors = paddle.static.data( + name='anchors', shape=[4, 4, 3, 4], dtype='float32') + variances = paddle.static.data( + name='var', shape=[4, 4, 3, 4], dtype='float32') + rois, roi_probs, rois_num = ops.generate_proposals( + scores, + bbox_deltas, + im_shape, + anchors, + variances, + pre_nms_top_n=10, + post_nms_top_n=5, + return_rois_num=True) + rois_stat, roi_probs_stat, rois_num_stat = self.get_static_graph_result( + feed={ + 'scores': scores_np, + 'bbox_deltas': bbox_deltas_np, + 'im_shape': im_shape_np, + 'anchors': anchors_np, + 'var': variances_np + }, + fetch_list=[rois, roi_probs, rois_num], + with_lod=True) + + with self.dynamic_graph(): + scores_dy = base.to_variable(scores_np) + bbox_deltas_dy = base.to_variable(bbox_deltas_np) + im_shape_dy = base.to_variable(im_shape_np) + anchors_dy = base.to_variable(anchors_np) + variances_dy = base.to_variable(variances_np) + rois, roi_probs, rois_num = ops.generate_proposals( + scores_dy, + bbox_deltas_dy, + im_shape_dy, + anchors_dy, + variances_dy, + pre_nms_top_n=10, + post_nms_top_n=5, + return_rois_num=True) + rois_dy = rois.numpy() + roi_probs_dy = roi_probs.numpy() + rois_num_dy = rois_num.numpy() + + self.assertTrue(np.array_equal(np.array(rois_stat), rois_dy)) + self.assertTrue(np.array_equal(np.array(roi_probs_stat), roi_probs_dy)) + self.assertTrue(np.array_equal(np.array(rois_num_stat), rois_num_dy)) + + if __name__ == '__main__': unittest.main()