diff --git a/paddle/fluid/operators/detection/yolo_box_op.cc b/paddle/fluid/operators/detection/yolo_box_op.cc index 6cc9b241c6d167a7d348d97d643c41eb3259a72a..e6cf3f58ddcc8e03ed2902f6936f122441d32a7c 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cc +++ b/paddle/fluid/operators/detection/yolo_box_op.cc @@ -75,25 +75,25 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "The input tensor of YoloBox operator, " - "This is a 4-D tensor with shape of [N, C, H, W]." - "H and W should be same, and the second dimension(C) stores" - "box locations, confidence score and classification one-hot" - "keys of each anchor box. Generally, X should be the output" + "This is a 4-D tensor with shape of [N, C, H, W]. " + "H and W should be same, and the second dimension(C) stores " + "box locations, confidence score and classification one-hot " + "keys of each anchor box. Generally, X should be the output " "of YOLOv3 network."); AddInput("ImgSize", "The image size tensor of YoloBox operator, " - "This is a 2-D tensor with shape of [N, 2]. This tensor holds" - "height and width of each input image using for resize output" + "This is a 2-D tensor with shape of [N, 2]. This tensor holds " + "height and width of each input image using for resize output " "box in input image scale."); AddOutput("Boxes", "The output tensor of detection boxes of YoloBox operator, " - "This is a 3-D tensor with shape of [N, M, 4], N is the" - "batch num, M is output box number, and the 3rd dimension" + "This is a 3-D tensor with shape of [N, M, 4], N is the " + "batch num, M is output box number, and the 3rd dimension " "stores [xmin, ymin, xmax, ymax] coordinates of boxes."); AddOutput("Scores", - "The output tensor ofdetection boxes scores of YoloBox" - "operator, This is a 3-D tensor with shape of [N, M, C]," - "N is the batch num, M is output box number, C is the" + "The output tensor ofdetection boxes scores of YoloBox " + "operator, This is a 3-D tensor with shape of [N, M, C], " + "N is the batch num, M is output box number, C is the " "class number."); AddAttr("class_num", "The number of classes to predict."); @@ -107,30 +107,31 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { "and thrid YoloBox operators.") .SetDefault(32); AddAttr("conf_thresh", - "The confidence scores threshold of detection boxes." - "boxes with confidence scores under threshold should" + "The confidence scores threshold of detection boxes. " + "Boxes with confidence scores under threshold should " "be ignored.") .SetDefault(0.01); AddComment(R"DOC( This operator generate YOLO detection boxes from output of YOLOv3 network. The output of previous network is in shape [N, C, H, W], while H and W - should be the same, specify the grid size, each grid point predict given - number boxes, this given number is specified by anchors, it should be - half anchors length, which following will be represented as S. In the - second dimension(the channel dimension), C should be S * (class_num + 5), - class_num is the box categoriy number of source dataset(such as coco), - so in the second dimension, stores 4 box location coordinates x, y, w, h - and confidence score of the box and class one-hot key of each anchor box. - - While the 4 location coordinates if :math:`tx, ty, tw, th`, the box - predictions correspnd to: + should be the same, H and W specify the grid size, each grid point predict + given number boxes, this given number, which following will be represented as S, + is specified by the number of anchors, In the second dimension(the channel + dimension), C should be equal to S * (class_num + 5), class_num is the object + category number of source dataset(such as 80 in coco dataset), so in the + second(channel) dimension, apart from 4 box location coordinates x, y, w, h, + also includes confidence score of the box and class one-hot key of each anchor + box. + + Assume the 4 location coordinates are :math:`t_x, t_y, t_w, t_h`, the box + predictions should be as follows: $$ - b_x = \sigma(t_x) + c_x + b_x = \\sigma(t_x) + c_x $$ $$ - b_y = \sigma(t_y) + c_y + b_y = \\sigma(t_y) + c_y $$ $$ b_w = p_w e^{t_w} @@ -139,14 +140,14 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { b_h = p_h e^{t_h} $$ - While :math:`c_x, c_y` is the left top corner of current grid and - :math:`p_w, p_h` is specified by anchors. + in the equation above, :math:`c_x, c_y` is the left top corner of current grid + and :math:`p_w, p_h` is specified by anchors. - The logistic scores of the 5rd channel of each anchor prediction boxes - represent the confidence score of each prediction scores, and the logistic - scores of the last class_num channels of each anchor prediction boxes - represent the classifcation scores. Boxes with confidence scores less than - conf_thresh should be ignored, and box final scores is the product of + The logistic regression value of the 5rd channel of each anchor prediction boxes + represent the confidence score of each prediction box, and the logistic + regression value of the last :attr:`class_num` channels of each anchor prediction + boxes represent the classifcation scores. Boxes with confidence scores less than + :attr:`conf_thresh` should be ignored, and box final scores is the product of confidence scores and classification scores. )DOC"); diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index aca5f0f1d64b7d76298f4985094b34679111ee8e..6cfd852fa5e226c223a2fa05332d4d72c586848e 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -628,10 +628,12 @@ def yolo_box(x, class_num (int): ${class_num_comment} conf_thresh (float): ${conf_thresh_comment} downsample_ratio (int): ${downsample_ratio_comment} - name (string): the name of yolov3 loss + name (string): the name of yolo box layer Returns: - Variable: A 1-D tensor with shape [1], the value of yolov3 loss + Variable: A 3-D tensor with shape [N, M, 4], the coordinates of boxes, + and a 3-D tensor with shape [N, M, C], the classification scores + of boxes. Raises: TypeError: Input x of yolov_box must be Variable @@ -640,6 +642,7 @@ def yolo_box(x, TypeError: Attr conf_thresh of yolo box must be a float number Examples: + .. code-block:: python x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32')