From 8fc1cf6097062373db6874f6343bf27ab0f2780f Mon Sep 17 00:00:00 2001 From: shangliang Xu Date: Wed, 27 Jul 2022 21:53:34 +0800 Subject: [PATCH] add matrix_nms in python/paddle/vision/ops.py (#44357) --- .../tests/unittests/test_matrix_nms_op.py | 62 +++++-- .../fluid/tests/unittests/test_ops_nms.py | 16 ++ python/paddle/vision/ops.py | 153 ++++++++++++++++-- 3 files changed, 201 insertions(+), 30 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py b/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py index 2e73e4d782..c85b715b0c 100644 --- a/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py +++ b/python/paddle/fluid/tests/unittests/test_matrix_nms_op.py @@ -19,6 +19,7 @@ import copy from op_test import OpTest import paddle.fluid as fluid from paddle.fluid import Program, program_guard +import paddle def softmax(x): @@ -237,22 +238,22 @@ class TestMatrixNMSOpGaussian(TestMatrixNMSOp): class TestMatrixNMSError(unittest.TestCase): def test_errors(self): - with program_guard(Program(), Program()): - M = 1200 - N = 7 - C = 21 - BOX_SIZE = 4 - nms_top_k = 400 - keep_top_k = 200 - score_threshold = 0.01 - post_threshold = 0. - - boxes_np = np.random.random((M, C, BOX_SIZE)).astype('float32') - scores = np.random.random((N * M, C)).astype('float32') - scores = np.apply_along_axis(softmax, 1, scores) - scores = np.reshape(scores, (N, M, C)) - scores_np = np.transpose(scores, (0, 2, 1)) + M = 1200 + N = 7 + C = 21 + BOX_SIZE = 4 + nms_top_k = 400 + keep_top_k = 200 + score_threshold = 0.01 + post_threshold = 0. + boxes_np = np.random.random((M, C, BOX_SIZE)).astype('float32') + scores = np.random.random((N * M, C)).astype('float32') + scores = np.apply_along_axis(softmax, 1, scores) + scores = np.reshape(scores, (N, M, C)) + scores_np = np.transpose(scores, (0, 2, 1)) + + with program_guard(Program(), Program()): boxes_data = fluid.data(name='bboxes', shape=[M, C, BOX_SIZE], dtype='float32') @@ -268,6 +269,12 @@ class TestMatrixNMSError(unittest.TestCase): keep_top_k=keep_top_k, score_threshold=score_threshold, post_threshold=post_threshold) + paddle.vision.ops.matrix_nms(bboxes=boxes_np, + scores=scores_data, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=score_threshold, + post_threshold=post_threshold) def test_scores_Variable(): # the scores type must be Variable @@ -277,6 +284,12 @@ class TestMatrixNMSError(unittest.TestCase): keep_top_k=keep_top_k, score_threshold=score_threshold, post_threshold=post_threshold) + paddle.vision.ops.matrix_nms(bboxes=boxes_data, + scores=scores_np, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=score_threshold, + post_threshold=post_threshold) def test_empty(): # when all score are lower than threshold @@ -289,6 +302,15 @@ class TestMatrixNMSError(unittest.TestCase): post_threshold=post_threshold) except Exception as e: self.fail(e) + try: + paddle.vision.ops.matrix_nms(bboxes=boxes_data, + scores=scores_data, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=10., + post_threshold=post_threshold) + except Exception as e: + self.fail(e) def test_coverage(): # cover correct workflow @@ -301,6 +323,16 @@ class TestMatrixNMSError(unittest.TestCase): post_threshold=post_threshold) except Exception as e: self.fail(e) + try: + paddle.vision.ops.matrix_nms( + bboxes=boxes_data, + scores=scores_data, + nms_top_k=nms_top_k, + keep_top_k=keep_top_k, + score_threshold=score_threshold, + post_threshold=post_threshold) + except Exception as e: + self.fail(e) self.assertRaises(TypeError, test_bboxes_Variable) self.assertRaises(TypeError, test_scores_Variable) diff --git a/python/paddle/fluid/tests/unittests/test_ops_nms.py b/python/paddle/fluid/tests/unittests/test_ops_nms.py index c775a47bd2..3d6f2b717f 100644 --- a/python/paddle/fluid/tests/unittests/test_ops_nms.py +++ b/python/paddle/fluid/tests/unittests/test_ops_nms.py @@ -197,6 +197,22 @@ class TestOpsNMS(unittest.TestCase): "origin out: {}\n inference model out: {}\n".format( origin, res)) + def test_matrix_nms_dynamic(self): + for device in self.devices: + for dtype in self.dtypes: + boxes, scores, category_idxs, categories = gen_args( + self.num_boxes, dtype) + scores = np.random.rand(1, 4, self.num_boxes).astype(dtype) + paddle.set_device(device) + out = paddle.vision.ops.matrix_nms( + paddle.to_tensor(boxes).unsqueeze(0), + paddle.to_tensor(scores), + self.threshold, + post_threshold=0., + nms_top_k=400, + keep_top_k=100, + ) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index cdb8417b6b..aef90bb140 100644 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -24,21 +24,10 @@ from paddle.common_ops_import import * from paddle import _C_ops __all__ = [ #noqa - 'yolo_loss', - 'yolo_box', - 'deform_conv2d', - 'DeformConv2D', - 'distribute_fpn_proposals', - 'generate_proposals', - 'read_file', - 'decode_jpeg', - 'roi_pool', - 'RoIPool', - 'psroi_pool', - 'PSRoIPool', - 'roi_align', - 'RoIAlign', - 'nms', + 'yolo_loss', 'yolo_box', 'deform_conv2d', 'DeformConv2D', + 'distribute_fpn_proposals', 'generate_proposals', 'read_file', + 'decode_jpeg', 'roi_pool', 'RoIPool', 'psroi_pool', 'PSRoIPool', + 'roi_align', 'RoIAlign', 'nms', 'matrix_nms' ] @@ -1802,3 +1791,137 @@ def generate_proposals(scores, rpn_rois_num = None return rpn_rois, rpn_roi_probs, rpn_rois_num + + +def matrix_nms(bboxes, + scores, + score_threshold, + post_threshold, + nms_top_k, + keep_top_k, + use_gaussian=False, + gaussian_sigma=2., + background_label=0, + normalized=True, + return_index=False, + return_rois_num=True, + name=None): + """ + This operator does matrix non maximum suppression (NMS). + First selects a subset of candidate bounding boxes that have higher scores + than score_threshold (if provided), then the top k candidate is selected if + nms_top_k is larger than -1. Score of the remaining candidate are then + decayed according to the Matrix NMS scheme. + Aftern NMS step, at most keep_top_k number of total bboxes are to be kept + per image if keep_top_k is larger than -1. + Args: + bboxes (Tensor): A 3-D Tensor with shape [N, M, 4] represents the + predicted locations of M bounding bboxes, + N is the batch size. Each bounding box has four + coordinate values and the layout is + [xmin, ymin, xmax, ymax], when box size equals to 4. + The data type is float32 or float64. + scores (Tensor): A 3-D Tensor with shape [N, C, M] + represents the predicted confidence predictions. + N is the batch size, C is the class number, M is + number of bounding boxes. For each category there + are total M scores which corresponding M bounding + boxes. Please note, M is equal to the 2nd dimension + of BBoxes. The data type is float32 or float64. + score_threshold (float): Threshold to filter out bounding boxes with + low confidence score. + post_threshold (float): Threshold to filter out bounding boxes with + low confidence score AFTER decaying. + nms_top_k (int): Maximum number of detections to be kept according to + the confidences after the filtering detections based + on score_threshold. + keep_top_k (int): Number of total bboxes to be kept per image after NMS + step. -1 means keeping all bboxes after NMS step. + use_gaussian (bool): Use Gaussian as the decay function. Default: False + gaussian_sigma (float): Sigma for Gaussian decay function. Default: 2.0 + background_label (int): The index of background label, the background + label will be ignored. If set to -1, then all + categories will be considered. Default: 0 + normalized (bool): Whether detections are normalized. Default: True + return_index(bool): Whether return selected index. Default: False + return_rois_num(bool): whether return rois_num. Default: True + name(str): Name of the matrix nms op. Default: None. + Returns: + A tuple with three Tensor: (Out, Index, RoisNum) if return_index is True, + otherwise, a tuple with two Tensor (Out, RoisNum) is returned. + Out (Tensor): A 2-D Tensor with shape [No, 6] containing the + detection results. + Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax] + Index (Tensor): A 2-D Tensor with shape [No, 1] containing the + selected indices, which are absolute values cross batches. + rois_num (Tensor): A 1-D Tensor with shape [N] containing + the number of detected boxes in each image. + Examples: + .. code-block:: python + import paddle + from paddle.vision.ops import matrix_nms + boxes = paddle.rand([4, 1, 4]) + boxes[..., 2] = boxes[..., 0] + boxes[..., 2] + boxes[..., 3] = boxes[..., 1] + boxes[..., 3] + scores = paddle.rand([4, 80, 1]) + out = matrix_nms(bboxes=boxes, scores=scores, background_label=0, + score_threshold=0.5, post_threshold=0.1, + nms_top_k=400, keep_top_k=200, normalized=False) + """ + check_variable_and_dtype(bboxes, 'BBoxes', ['float32', 'float64'], + 'matrix_nms') + check_variable_and_dtype(scores, 'Scores', ['float32', 'float64'], + 'matrix_nms') + check_type(score_threshold, 'score_threshold', float, 'matrix_nms') + check_type(post_threshold, 'post_threshold', float, 'matrix_nms') + check_type(nms_top_k, 'nums_top_k', int, 'matrix_nms') + check_type(keep_top_k, 'keep_top_k', int, 'matrix_nms') + check_type(normalized, 'normalized', bool, 'matrix_nms') + check_type(use_gaussian, 'use_gaussian', bool, 'matrix_nms') + check_type(gaussian_sigma, 'gaussian_sigma', float, 'matrix_nms') + check_type(background_label, 'background_label', int, 'matrix_nms') + + if in_dygraph_mode(): + attrs = ('background_label', background_label, 'score_threshold', + score_threshold, 'post_threshold', post_threshold, 'nms_top_k', + nms_top_k, 'gaussian_sigma', gaussian_sigma, 'use_gaussian', + use_gaussian, 'keep_top_k', keep_top_k, 'normalized', + normalized) + out, index, rois_num = _C_ops.matrix_nms(bboxes, scores, *attrs) + if not return_index: + index = None + if not return_rois_num: + rois_num = None + return out, rois_num, index + else: + helper = LayerHelper('matrix_nms', **locals()) + output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) + index = helper.create_variable_for_type_inference(dtype='int32') + outputs = {'Out': output, 'Index': index} + if return_rois_num: + rois_num = helper.create_variable_for_type_inference(dtype='int32') + outputs['RoisNum'] = rois_num + + helper.append_op(type="matrix_nms", + inputs={ + 'BBoxes': bboxes, + 'Scores': scores + }, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'post_threshold': post_threshold, + 'nms_top_k': nms_top_k, + 'gaussian_sigma': gaussian_sigma, + 'use_gaussian': use_gaussian, + 'keep_top_k': keep_top_k, + 'normalized': normalized + }, + outputs=outputs) + output.stop_gradient = True + + if not return_index: + index = None + if not return_rois_num: + rois_num = None + return output, rois_num, index -- GitLab