modified multiclass_nms example (#19553)

test=develop, test=document_preview

modified multiclass_nms example (#19553)
test=develop, test=document_preview
7a867063 · xiaoting · lvmengsi · 57f0f0f2 · 7a867063 · 7a867063
隐藏空白更改
内联并排

Showing with 24 addition and 2 deletion

paddle/fluid/API.spec paddle/fluid/API.spec +1 -1

python/paddle/fluid/layers/detection.py python/paddle/fluid/layers/detection.py +23 -1

未找到文件。
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -410,7 +410,7 @@ paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varar
 paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gt_box', 'gt_label', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'gt_score', 'use_label_smooth', 'name'], varargs=None, keywords=None, defaults=(None, True, None)), ('document', '400403175718d5a632402cdae88b01b8'))
 paddle.fluid.layers.yolo_box (ArgSpec(args=['x', 'img_size', 'anchors', 'class_num', 'conf_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ed56ff21536ca5c8ad418d0cfaf6a7b9'))
 paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9ddee76cb808db83768bf68010e39b2b'))
-paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', '76d74056e9eedcacf013d8e3b115cbd3'))
+paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', '51a388c4d067ea93a6a60492db40c7af'))
 paddle.fluid.layers.retinanet_detection_output (ArgSpec(args=['bboxes', 'scores', 'anchors', 'im_info', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0.05, 1000, 100, 0.3, 1.0)), ('document', '078d28607ce261a0cba2b965a79f6bb8'))
 paddle.fluid.layers.distribute_fpn_proposals (ArgSpec(args=['fpn_rois', 'min_level', 'max_level', 'refer_level', 'refer_scale', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6c023b9401214ae387a8b2d92638e5e4'))
 paddle.fluid.layers.box_decoder_and_assign (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'box_score', 'box_clip', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3619a7847709f5868f5e929065947b38'))

--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -2694,6 +2694,28 @@ def multiclass_nms(bboxes,
    Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
    per image if keep_top_k is larger than -1.
+    See below for an example:
+    .. code-block:: text
+        if:
+            box1.data = (2.0, 3.0, 7.0, 5.0) format is (xmin, ymin, xmax, ymax)
+            box1.scores = (0.7, 0.2, 0.4)  which is (label0.score=0.7, label1.score=0.2, label2.cores=0.4)
+            box2.data = (3.0, 4.0, 8.0, 5.0)
+            box2.score = (0.3, 0.3, 0.1)
+            nms_threshold = 0.3
+            background_label = 0
+            score_threshold = 0
+        Then:
+            iou = 4/11 > 0.3
+            out.data = [[1, 0.3, 3.0, 4.0, 8.0, 5.0],    
+                         [2, 0.4, 2.0, 3.0, 7.0, 5.0]]
+            Out format is (label, confidence, xmin, ymin, xmax, ymax)
    Args:
        bboxes (Variable): Two types of bboxes are supported:
                           1. (Tensor) A 3-D Tensor with shape
@@ -2734,7 +2756,7 @@ def multiclass_nms(bboxes,
        name(str): Name of the multiclass nms op. Default: None.
    Returns:
-        Out: A 2-D LoDTensor with shape [No, 6] represents the detections.
+        Out(Variable): A 2-D LoDTensor with shape [No, 6] represents the detections.
             Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]
             or A 2-D LoDTensor with shape [No, 10] represents the detections.
             Each row has 10 values: