cherry-pick, move nms2 to contrib, test=release/1.6 (#20710)

c2f86f95 · wangguanzhong · GitHub · 8fb760da · c2f86f95 · c2f86f95
3 changed file
--- a/python/paddle/fluid/contrib/layers/nn.py
+++ b/python/paddle/fluid/contrib/layers/nn.py
@@ -30,6 +30,7 @@ __all__ = [
    'var_conv_2d',
    'match_matrix_tensor',
    'tree_conv',
+    'multiclass_nms2',
 ]
@@ -427,3 +428,138 @@ def tree_conv(nodes_vector,
    else:
        pre_activation = out
    return helper.append_activation(pre_activation)
+def multiclass_nms2(bboxes,
+                    scores,
+                    score_threshold,
+                    nms_top_k,
+                    keep_top_k,
+                    nms_threshold=0.3,
+                    normalized=True,
+                    nms_eta=1.,
+                    background_label=0,
+                    return_index=False,
+                    name=None):
+    """
+    **Multiclass NMS2**
+    This operator is to do multi-class non maximum suppression (NMS) on
+    boxes and scores.
+    In the NMS step, this operator greedily selects a subset of detection bounding
+    boxes that have high scores larger than score_threshold, if providing this
+    threshold, then selects the largest nms_top_k confidences scores if nms_top_k
+    is larger than -1. Then this operator pruns away boxes that have high IOU
+    (intersection over union) overlap with already selected boxes by adaptive
+    threshold NMS based on parameters of nms_threshold and nms_eta.
+    Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
+    per image if keep_top_k is larger than -1.
+    Args:
+        bboxes (Variable): Two types of bboxes are supported:
+                           1. (Tensor) A 3-D Tensor with shape
+                           [N, M, 4 or 8 16 24 32] represents the
+                           predicted locations of M bounding bboxes,
+                           N is the batch size. Each bounding box has four
+                           coordinate values and the layout is 
+                           [xmin, ymin, xmax, ymax], when box size equals to 4.
+                           2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]
+                           M is the number of bounding boxes, C is the 
+                           class number   
+        scores (Variable): Two types of scores are supported:
+                           1. (Tensor) A 3-D Tensor with shape [N, C, M]
+                           represents the predicted confidence predictions.
+                           N is the batch size, C is the class number, M is 
+                           number of bounding boxes. For each category there 
+                           are total M scores which corresponding M bounding
+                           boxes. Please note, M is equal to the 2nd dimension
+                           of BBoxes.
+                           2. (LoDTensor) A 2-D LoDTensor with shape [M, C].
+                           M is the number of bbox, C is the class number.
+                           In this case, input BBoxes should be the second
+                           case with shape [M, C, 4].
+        background_label (int): The index of background label, the background 
+                                label will be ignored. If set to -1, then all
+                                categories will be considered. Default: 0
+        score_threshold (float): Threshold to filter out bounding boxes with
+                                 low confidence score. If not provided, 
+                                 consider all boxes.
+        nms_top_k (int): Maximum number of detections to be kept according to
+                         the confidences aftern the filtering detections based
+                         on score_threshold.
+        nms_threshold (float): The threshold to be used in NMS. Default: 0.3
+        nms_eta (float): The threshold to be used in NMS. Default: 1.0
+        keep_top_k (int): Number of total bboxes to be kept per image after NMS
+                          step. -1 means keeping all bboxes after NMS step.
+        normalized (bool): Whether detections are normalized. Default: True
+        return_index(bool): Whether return selected index. Default: False
+        name(str): Name of the multiclass nms op. Default: None.
+    Returns:
+        A tuple with two Variables: (Out, Index) if return_index is True,
+        otherwise, a tuple with one Variable(Out) is returned. 
+        Out: A 2-D LoDTensor with shape [No, 6] represents the detections. 
+        Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax] 
+        or A 2-D LoDTensor with shape [No, 10] represents the detections. 
+        Each row has 10 values: [label, confidence, x1, y1, x2, y2, x3, y3, 
+        x4, y4]. No is the total number of detections. 
+        If all images have not detected results, all elements in LoD will be
+        0, and output tensor is empty (None).
+        Index: Only return when return_index is True. A 2-D LoDTensor with 
+        shape [No, 1] represents the selected index which type is Integer. 
+        The index is the absolute value cross batches. No is the same number 
+        as Out. If the index is used to gather other attribute such as age, 
+        one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where 
+        N is the batch size and M is the number of boxes.
+    Examples:
+        .. code-block:: python
+            import paddle.fluid as fluid
+            boxes = fluid.layers.data(name='bboxes', shape=[81, 4],
+                                      dtype='float32', lod_level=1)
+            scores = fluid.layers.data(name='scores', shape=[81],
+                                      dtype='float32', lod_level=1)
+            out, index = fluid.layers.multiclass_nms2(bboxes=boxes,
+                                              scores=scores,
+                                              background_label=0,
+                                              score_threshold=0.5,
+                                              nms_top_k=400,
+                                              nms_threshold=0.3,
+                                              keep_top_k=200,
+                                              normalized=False,
+                                              return_index=True)
+    """
+    helper = LayerHelper('multiclass_nms2', **locals())
+    output = helper.create_variable_for_type_inference(dtype=bboxes.dtype)
+    index = helper.create_variable_for_type_inference(dtype='int')
+    helper.append_op(
+        type="multiclass_nms2",
+        inputs={'BBoxes': bboxes,
+                'Scores': scores},
+        attrs={
+            'background_label': background_label,
+            'score_threshold': score_threshold,
+            'nms_top_k': nms_top_k,
+            'nms_threshold': nms_threshold,
+            'nms_eta': nms_eta,
+            'keep_top_k': keep_top_k,
+            'nms_eta': nms_eta,
+            'normalized': normalized
+        },
+        outputs={'Out': output,
+                 'Index': index})
+    output.stop_gradient = True
+    index.stop_gradient = True
+    if return_index:
+        return output, index
+    return output
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -53,7 +53,6 @@ __all__ = [
    'yolo_box',
    'box_clip',
    'multiclass_nms',
-    'multiclass_nms2',
    'retinanet_detection_output',
    'distribute_fpn_proposals',
    'box_decoder_and_assign',
@@ -3148,141 +3147,6 @@ def multiclass_nms(bboxes,
    return output
-def multiclass_nms2(bboxes,
-                    scores,
-                    score_threshold,
-                    nms_top_k,
-                    keep_top_k,
-                    nms_threshold=0.3,
-                    normalized=True,
-                    nms_eta=1.,
-                    background_label=0,
-                    return_index=False,
-                    name=None):
-    """
-    **Multiclass NMS2**
-    This operator is to do multi-class non maximum suppression (NMS) on
-    boxes and scores.
-    In the NMS step, this operator greedily selects a subset of detection bounding
-    boxes that have high scores larger than score_threshold, if providing this
-    threshold, then selects the largest nms_top_k confidences scores if nms_top_k
-    is larger than -1. Then this operator pruns away boxes that have high IOU
-    (intersection over union) overlap with already selected boxes by adaptive
-    threshold NMS based on parameters of nms_threshold and nms_eta.
-    Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
-    per image if keep_top_k is larger than -1.
-    Args:
-        bboxes (Variable): Two types of bboxes are supported:
-                           1. (Tensor) A 3-D Tensor with shape
-                           [N, M, 4 or 8 16 24 32] represents the
-                           predicted locations of M bounding bboxes,
-                           N is the batch size. Each bounding box has four
-                           coordinate values and the layout is 
-                           [xmin, ymin, xmax, ymax], when box size equals to 4.
-                           2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]
-                           M is the number of bounding boxes, C is the 
-                           class number   
-        scores (Variable): Two types of scores are supported:
-                           1. (Tensor) A 3-D Tensor with shape [N, C, M]
-                           represents the predicted confidence predictions.
-                           N is the batch size, C is the class number, M is 
-                           number of bounding boxes. For each category there 
-                           are total M scores which corresponding M bounding
-                           boxes. Please note, M is equal to the 2nd dimension
-                           of BBoxes.
-                           2. (LoDTensor) A 2-D LoDTensor with shape [M, C].
-                           M is the number of bbox, C is the class number.
-                           In this case, input BBoxes should be the second
-                           case with shape [M, C, 4].
-        background_label (int): The index of background label, the background 
-                                label will be ignored. If set to -1, then all
-                                categories will be considered. Default: 0
-        score_threshold (float): Threshold to filter out bounding boxes with
-                                 low confidence score. If not provided, 
-                                 consider all boxes.
-        nms_top_k (int): Maximum number of detections to be kept according to
-                         the confidences aftern the filtering detections based
-                         on score_threshold.
-        nms_threshold (float): The threshold to be used in NMS. Default: 0.3
-        nms_eta (float): The threshold to be used in NMS. Default: 1.0
-        keep_top_k (int): Number of total bboxes to be kept per image after NMS
-                          step. -1 means keeping all bboxes after NMS step.
-        normalized (bool): Whether detections are normalized. Default: True
-        return_index(bool): Whether return selected index. Default: False
-        name(str): Name of the multiclass nms op. Default: None.
-    Returns:
-        A tuple with two Variables: (Out, Index) if return_index is True,
-        otherwise, a tuple with one Variable(Out) is returned. 
-        Out: A 2-D LoDTensor with shape [No, 6] represents the detections. 
-        Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax] 
-        or A 2-D LoDTensor with shape [No, 10] represents the detections. 
-        Each row has 10 values: [label, confidence, x1, y1, x2, y2, x3, y3, 
-        x4, y4]. No is the total number of detections. 
-        If all images have not detected results, all elements in LoD will be
-        0, and output tensor is empty (None).
-        Index: Only return when return_index is True. A 2-D LoDTensor with 
-        shape [No, 1] represents the selected index which type is Integer. 
-        The index is the absolute value cross batches. No is the same number 
-        as Out. If the index is used to gather other attribute such as age, 
-        one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where 
-        N is the batch size and M is the number of boxes.
-    Examples:
-        .. code-block:: python
-            import paddle.fluid as fluid
-            boxes = fluid.layers.data(name='bboxes', shape=[81, 4],
-                                      dtype='float32', lod_level=1)
-            scores = fluid.layers.data(name='scores', shape=[81],
-                                      dtype='float32', lod_level=1)
-            out, index = fluid.layers.multiclass_nms2(bboxes=boxes,
-                                              scores=scores,
-                                              background_label=0,
-                                              score_threshold=0.5,
-                                              nms_top_k=400,
-                                              nms_threshold=0.3,
-                                              keep_top_k=200,
-                                              normalized=False,
-                                              return_index=True)
-    """
-    helper = LayerHelper('multiclass_nms2', **locals())
-    output = helper.create_variable_for_type_inference(dtype=bboxes.dtype)
-    index = helper.create_variable_for_type_inference(dtype='int')
-    helper.append_op(
-        type="multiclass_nms2",
-        inputs={'BBoxes': bboxes,
-                'Scores': scores},
-        attrs={
-            'background_label': background_label,
-            'score_threshold': score_threshold,
-            'nms_top_k': nms_top_k,
-            'nms_threshold': nms_threshold,
-            'nms_eta': nms_eta,
-            'keep_top_k': keep_top_k,
-            'nms_eta': nms_eta,
-            'normalized': normalized
-        },
-        outputs={'Out': output,
-                 'Index': index})
-    output.stop_gradient = True
-    index.stop_gradient = True
-    if return_index:
-        return output, index
-    return output
 def distribute_fpn_proposals(fpn_rois,
                             min_level,
                             max_level,

--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@@ -557,8 +557,9 @@ class TestMulticlassNMS2(unittest.TestCase):
            bboxes = layers.data(
                name='bboxes', shape=[-1, 10, 4], dtype='float32')
            scores = layers.data(name='scores', shape=[-1, 10], dtype='float32')
-            output = layers.multiclass_nms2(bboxes, scores, 0.3, 400, 200, 0.7)
+            output = fluid.contrib.multiclass_nms2(bboxes, scores, 0.3, 400,
-            output2, index = layers.multiclass_nms2(
+                                                   200, 0.7)
+            output2, index = fluid.contrib.multiclass_nms2(
                bboxes, scores, 0.3, 400, 200, 0.7, return_index=True)
            self.assertIsNotNone(output)
            self.assertIsNotNone(output2)