diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index bd8ea98e414a3ed40ab1191a76f435197add528c..5f20b66d0f433ae60e1bae816593a8ed0c709897 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -30,6 +30,7 @@ __all__ = [ 'var_conv_2d', 'match_matrix_tensor', 'tree_conv', + 'multiclass_nms2', ] @@ -427,3 +428,138 @@ def tree_conv(nodes_vector, else: pre_activation = out return helper.append_activation(pre_activation) + + +def multiclass_nms2(bboxes, + scores, + score_threshold, + nms_top_k, + keep_top_k, + nms_threshold=0.3, + normalized=True, + nms_eta=1., + background_label=0, + return_index=False, + name=None): + """ + **Multiclass NMS2** + + This operator is to do multi-class non maximum suppression (NMS) on + boxes and scores. + + In the NMS step, this operator greedily selects a subset of detection bounding + boxes that have high scores larger than score_threshold, if providing this + threshold, then selects the largest nms_top_k confidences scores if nms_top_k + is larger than -1. Then this operator pruns away boxes that have high IOU + (intersection over union) overlap with already selected boxes by adaptive + threshold NMS based on parameters of nms_threshold and nms_eta. + + Aftern NMS step, at most keep_top_k number of total bboxes are to be kept + per image if keep_top_k is larger than -1. + + Args: + bboxes (Variable): Two types of bboxes are supported: + 1. (Tensor) A 3-D Tensor with shape + [N, M, 4 or 8 16 24 32] represents the + predicted locations of M bounding bboxes, + N is the batch size. Each bounding box has four + coordinate values and the layout is + [xmin, ymin, xmax, ymax], when box size equals to 4. + 2. (LoDTensor) A 3-D Tensor with shape [M, C, 4] + M is the number of bounding boxes, C is the + class number + scores (Variable): Two types of scores are supported: + 1. (Tensor) A 3-D Tensor with shape [N, C, M] + represents the predicted confidence predictions. + N is the batch size, C is the class number, M is + number of bounding boxes. For each category there + are total M scores which corresponding M bounding + boxes. Please note, M is equal to the 2nd dimension + of BBoxes. + 2. (LoDTensor) A 2-D LoDTensor with shape [M, C]. + M is the number of bbox, C is the class number. + In this case, input BBoxes should be the second + case with shape [M, C, 4]. + background_label (int): The index of background label, the background + label will be ignored. If set to -1, then all + categories will be considered. Default: 0 + score_threshold (float): Threshold to filter out bounding boxes with + low confidence score. If not provided, + consider all boxes. + nms_top_k (int): Maximum number of detections to be kept according to + the confidences aftern the filtering detections based + on score_threshold. + nms_threshold (float): The threshold to be used in NMS. Default: 0.3 + nms_eta (float): The threshold to be used in NMS. Default: 1.0 + keep_top_k (int): Number of total bboxes to be kept per image after NMS + step. -1 means keeping all bboxes after NMS step. + normalized (bool): Whether detections are normalized. Default: True + return_index(bool): Whether return selected index. Default: False + name(str): Name of the multiclass nms op. Default: None. + + Returns: + A tuple with two Variables: (Out, Index) if return_index is True, + otherwise, a tuple with one Variable(Out) is returned. + + Out: A 2-D LoDTensor with shape [No, 6] represents the detections. + Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax] + or A 2-D LoDTensor with shape [No, 10] represents the detections. + Each row has 10 values: [label, confidence, x1, y1, x2, y2, x3, y3, + x4, y4]. No is the total number of detections. + + If all images have not detected results, all elements in LoD will be + 0, and output tensor is empty (None). + + Index: Only return when return_index is True. A 2-D LoDTensor with + shape [No, 1] represents the selected index which type is Integer. + The index is the absolute value cross batches. No is the same number + as Out. If the index is used to gather other attribute such as age, + one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where + N is the batch size and M is the number of boxes. + + + Examples: + .. code-block:: python + + + import paddle.fluid as fluid + boxes = fluid.layers.data(name='bboxes', shape=[81, 4], + dtype='float32', lod_level=1) + scores = fluid.layers.data(name='scores', shape=[81], + dtype='float32', lod_level=1) + out, index = fluid.layers.multiclass_nms2(bboxes=boxes, + scores=scores, + background_label=0, + score_threshold=0.5, + nms_top_k=400, + nms_threshold=0.3, + keep_top_k=200, + normalized=False, + return_index=True) + """ + helper = LayerHelper('multiclass_nms2', **locals()) + + output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) + index = helper.create_variable_for_type_inference(dtype='int') + helper.append_op( + type="multiclass_nms2", + inputs={'BBoxes': bboxes, + 'Scores': scores}, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'nms_top_k': nms_top_k, + 'nms_threshold': nms_threshold, + 'nms_eta': nms_eta, + 'keep_top_k': keep_top_k, + 'nms_eta': nms_eta, + 'normalized': normalized + }, + outputs={'Out': output, + 'Index': index}) + output.stop_gradient = True + index.stop_gradient = True + + if return_index: + return output, index + return output diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 8bc56f5f637a62a88a35e9e3fc350d468478ec45..e84510292f26a2732a70695b08948fcdf2c22cc7 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -53,7 +53,6 @@ __all__ = [ 'yolo_box', 'box_clip', 'multiclass_nms', - 'multiclass_nms2', 'retinanet_detection_output', 'distribute_fpn_proposals', 'box_decoder_and_assign', @@ -3148,141 +3147,6 @@ def multiclass_nms(bboxes, return output -def multiclass_nms2(bboxes, - scores, - score_threshold, - nms_top_k, - keep_top_k, - nms_threshold=0.3, - normalized=True, - nms_eta=1., - background_label=0, - return_index=False, - name=None): - """ - **Multiclass NMS2** - - This operator is to do multi-class non maximum suppression (NMS) on - boxes and scores. - - In the NMS step, this operator greedily selects a subset of detection bounding - boxes that have high scores larger than score_threshold, if providing this - threshold, then selects the largest nms_top_k confidences scores if nms_top_k - is larger than -1. Then this operator pruns away boxes that have high IOU - (intersection over union) overlap with already selected boxes by adaptive - threshold NMS based on parameters of nms_threshold and nms_eta. - - Aftern NMS step, at most keep_top_k number of total bboxes are to be kept - per image if keep_top_k is larger than -1. - - Args: - bboxes (Variable): Two types of bboxes are supported: - 1. (Tensor) A 3-D Tensor with shape - [N, M, 4 or 8 16 24 32] represents the - predicted locations of M bounding bboxes, - N is the batch size. Each bounding box has four - coordinate values and the layout is - [xmin, ymin, xmax, ymax], when box size equals to 4. - 2. (LoDTensor) A 3-D Tensor with shape [M, C, 4] - M is the number of bounding boxes, C is the - class number - scores (Variable): Two types of scores are supported: - 1. (Tensor) A 3-D Tensor with shape [N, C, M] - represents the predicted confidence predictions. - N is the batch size, C is the class number, M is - number of bounding boxes. For each category there - are total M scores which corresponding M bounding - boxes. Please note, M is equal to the 2nd dimension - of BBoxes. - 2. (LoDTensor) A 2-D LoDTensor with shape [M, C]. - M is the number of bbox, C is the class number. - In this case, input BBoxes should be the second - case with shape [M, C, 4]. - background_label (int): The index of background label, the background - label will be ignored. If set to -1, then all - categories will be considered. Default: 0 - score_threshold (float): Threshold to filter out bounding boxes with - low confidence score. If not provided, - consider all boxes. - nms_top_k (int): Maximum number of detections to be kept according to - the confidences aftern the filtering detections based - on score_threshold. - nms_threshold (float): The threshold to be used in NMS. Default: 0.3 - nms_eta (float): The threshold to be used in NMS. Default: 1.0 - keep_top_k (int): Number of total bboxes to be kept per image after NMS - step. -1 means keeping all bboxes after NMS step. - normalized (bool): Whether detections are normalized. Default: True - return_index(bool): Whether return selected index. Default: False - name(str): Name of the multiclass nms op. Default: None. - - Returns: - A tuple with two Variables: (Out, Index) if return_index is True, - otherwise, a tuple with one Variable(Out) is returned. - - Out: A 2-D LoDTensor with shape [No, 6] represents the detections. - Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax] - or A 2-D LoDTensor with shape [No, 10] represents the detections. - Each row has 10 values: [label, confidence, x1, y1, x2, y2, x3, y3, - x4, y4]. No is the total number of detections. - - If all images have not detected results, all elements in LoD will be - 0, and output tensor is empty (None). - - Index: Only return when return_index is True. A 2-D LoDTensor with - shape [No, 1] represents the selected index which type is Integer. - The index is the absolute value cross batches. No is the same number - as Out. If the index is used to gather other attribute such as age, - one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where - N is the batch size and M is the number of boxes. - - - Examples: - .. code-block:: python - - - import paddle.fluid as fluid - boxes = fluid.layers.data(name='bboxes', shape=[81, 4], - dtype='float32', lod_level=1) - scores = fluid.layers.data(name='scores', shape=[81], - dtype='float32', lod_level=1) - out, index = fluid.layers.multiclass_nms2(bboxes=boxes, - scores=scores, - background_label=0, - score_threshold=0.5, - nms_top_k=400, - nms_threshold=0.3, - keep_top_k=200, - normalized=False, - return_index=True) - """ - helper = LayerHelper('multiclass_nms2', **locals()) - - output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) - index = helper.create_variable_for_type_inference(dtype='int') - helper.append_op( - type="multiclass_nms2", - inputs={'BBoxes': bboxes, - 'Scores': scores}, - attrs={ - 'background_label': background_label, - 'score_threshold': score_threshold, - 'nms_top_k': nms_top_k, - 'nms_threshold': nms_threshold, - 'nms_eta': nms_eta, - 'keep_top_k': keep_top_k, - 'nms_eta': nms_eta, - 'normalized': normalized - }, - outputs={'Out': output, - 'Index': index}) - output.stop_gradient = True - index.stop_gradient = True - - if return_index: - return output, index - return output - - def distribute_fpn_proposals(fpn_rois, min_level, max_level, diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 9215267dfbf875d32394250afd40be5f9b0874f5..cb798c8ed595d13dd8ff5e33323d6e796aaac6f9 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -557,8 +557,9 @@ class TestMulticlassNMS2(unittest.TestCase): bboxes = layers.data( name='bboxes', shape=[-1, 10, 4], dtype='float32') scores = layers.data(name='scores', shape=[-1, 10], dtype='float32') - output = layers.multiclass_nms2(bboxes, scores, 0.3, 400, 200, 0.7) - output2, index = layers.multiclass_nms2( + output = fluid.contrib.multiclass_nms2(bboxes, scores, 0.3, 400, + 200, 0.7) + output2, index = fluid.contrib.multiclass_nms2( bboxes, scores, 0.3, 400, 200, 0.7, return_index=True) self.assertIsNotNone(output) self.assertIsNotNone(output2)