diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 14d19f79aa4c630e752d2757587fe663c890ad3e..55b65e0d9af28c846a960df65eccf53194c17f1a 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -328,7 +328,7 @@ paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=Non paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '032d0f4b7d8f6235ee5d91e473344f0e')) paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0e5ac2507723a0b5adec473f9556799b')) paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '991e934c3e09abf0edec7c9c978b4691')) -paddle.fluid.layers.yolo_box (ArgSpec(args=['x', 'img_size', 'anchors', 'class_num', 'conf_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '991e934c3e09abf0edec7c9c978b4691')) +paddle.fluid.layers.yolo_box (ArgSpec(args=['x', 'img_size', 'anchors', 'class_num', 'conf_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '170091cef6ebfcba6e54c55b496d0021')) paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '397e9e02b451d99c56e20f268fa03f2e')) paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0')) paddle.fluid.layers.distribute_fpn_proposals (ArgSpec(args=['fpn_rois', 'min_level', 'max_level', 'refer_level', 'refer_scale', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7bb011ec26bace2bc23235aa4a17647d')) diff --git a/paddle/fluid/operators/detection/yolo_box_op.cc b/paddle/fluid/operators/detection/yolo_box_op.cc index e6cf3f58ddcc8e03ed2902f6936f122441d32a7c..6d8dac38f7f485de06907e9759bc0f48136fcee8 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cc +++ b/paddle/fluid/operators/detection/yolo_box_op.cc @@ -83,7 +83,7 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("ImgSize", "The image size tensor of YoloBox operator, " "This is a 2-D tensor with shape of [N, 2]. This tensor holds " - "height and width of each input image using for resize output " + "height and width of each input image used for resizing output " "box in input image scale."); AddOutput("Boxes", "The output tensor of detection boxes of YoloBox operator, " @@ -117,9 +117,9 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { The output of previous network is in shape [N, C, H, W], while H and W should be the same, H and W specify the grid size, each grid point predict given number boxes, this given number, which following will be represented as S, - is specified by the number of anchors, In the second dimension(the channel - dimension), C should be equal to S * (class_num + 5), class_num is the object - category number of source dataset(such as 80 in coco dataset), so in the + is specified by the number of anchors. In the second dimension(the channel + dimension), C should be equal to S * (5 + class_num), class_num is the object + category number of source dataset(such as 80 in coco dataset), so the second(channel) dimension, apart from 4 box location coordinates x, y, w, h, also includes confidence score of the box and class one-hot key of each anchor box. @@ -143,10 +143,10 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { in the equation above, :math:`c_x, c_y` is the left top corner of current grid and :math:`p_w, p_h` is specified by anchors. - The logistic regression value of the 5rd channel of each anchor prediction boxes - represent the confidence score of each prediction box, and the logistic + The logistic regression value of the 5th channel of each anchor prediction boxes + represents the confidence score of each prediction box, and the logistic regression value of the last :attr:`class_num` channels of each anchor prediction - boxes represent the classifcation scores. Boxes with confidence scores less than + boxes represents the classifcation scores. Boxes with confidence scores less than :attr:`conf_thresh` should be ignored, and box final scores is the product of confidence scores and classification scores.