diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc index d7a53f1bef98ecda3ba7b36323678a11a632a15c..c5b2f97b139ed701d86451d13b46af7bccb8b5cf 100644 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -439,31 +439,88 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - // TODO(buxingyuan): Add Document - AddInput("RpnRois", "RpnRois."); - AddInput("GtClasses", "GtClasses."); - AddInput("IsCrowd", "IsCrowd."); - AddInput("GtBoxes", "GtBoxes."); - AddInput("ImInfo", "ImInfo."); - - AddOutput("Rois", "Rois."); - AddOutput("LabelsInt32", "LabelsInt32."); - AddOutput("BboxTargets", "BboxTargets."); - AddOutput("BboxInsideWeights", "BboxInsideWeights."); - AddOutput("BboxOutsideWeights", "BboxOutsideWeights."); - - AddAttr("batch_size_per_im", "batch_size_per_im"); - AddAttr("fg_fraction", "fg_fraction"); - AddAttr("fg_thresh", "fg_thresh"); - AddAttr("bg_thresh_hi", "bg_thresh_hi"); - AddAttr("bg_thresh_lo", "bg_thresh_lo"); - AddAttr>("bbox_reg_weights", "bbox_reg_weights"); - AddAttr("class_nums", "class_nums"); - AddAttr("use_random", "use_random").SetDefault(true); + AddInput( + "RpnRois", + "(LoDTensor), This input is a 2D LoDTensor with shape [N, 4]. " + "N is the number of the GenerateProposalOp's output, " + "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); + AddInput("GtClasses", + "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. " + "M is the number of groundtruth, " + "each element is a class label of groundtruth."); + AddInput( + "IsCrowd", + "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. " + "M is the number of groundtruth, " + "each element is a flag indicates whether a groundtruth is crowd."); + AddInput( + "GtBoxes", + "(LoDTensor), This input is a 2D LoDTensor with shape [M, 4]. " + "M is the number of groundtruth, " + "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); + AddInput("ImInfo", + "(Tensor), This input is a 2D Tensor with shape [B, 3]. " + "B is the number of input images, " + "each element consists of im_height, im_width, im_scale."); + + AddOutput( + "Rois", + "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. " + "P usuall equal to batch_size_per_im * batch_size, " + "each element is a bounding box with [xmin, ymin, xmax, ymax] format."); + AddOutput("LabelsInt32", + "(LoDTensor), This output is a 2D LoDTensor with shape [P], " + "each element repersents a class label of a roi"); + AddOutput("BboxTargets", + "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * " + "class_nums], " + "each element repersents a box label of a roi"); + AddOutput( + "BboxInsideWeights", + "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * " + "class_nums], " + "each element indicates whether a box should contribute to loss."); + AddOutput( + "BboxOutsideWeights", + "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * " + "class_nums], " + "each element indicates whether a box should contribute to loss."); + + AddAttr("batch_size_per_im", "Batch size of rois per images."); + AddAttr("fg_fraction", + "Foreground fraction in total batch_size_per_im."); + AddAttr( + "fg_thresh", + "Overlap threshold which is used to chose foreground sample."); + AddAttr("bg_thresh_hi", + "Overlap threshold upper bound which is used to chose " + "background sample."); + AddAttr("bg_thresh_lo", + "Overlap threshold lower bound which is used to chose " + "background sample."); + AddAttr>("bbox_reg_weights", "Box regression weights."); + AddAttr("class_nums", "Class number."); + AddAttr( + "use_random", + "Use random sampling to choose foreground and background boxes.") + .SetDefault(true); AddComment(R"DOC( -Generate Proposals Labels Operator. -)DOC"); +This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth, +to sample foregroud boxes and background boxes, and compute loss target. + +RpnRois is the output boxes of RPN and was processed by generate_proposal_op, these boxes +were combined with groundtruth boxes and sampled according to batch_size_per_im and fg_fraction, +If an instance with a groundtruth overlap greater than fg_thresh, then it was considered as a foregroud sample. +If an instance with a groundtruth overlap greater than bg_thresh_lo and lower than bg_thresh_hi, +then it was considered as a background sample. +After all foregroud and background boxes are chosen (so called Rois), +then we apply random sampling to make sure +the number of foregroud boxes is no more than batch_size_per_im * fg_fraction. + +For each box in Rois, we assign the classification (class label) and regression targets (box label) to it. +Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss. + )DOC"); } }; diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 1cfcbbb9c1614f21848e62cce79befc673e1739c..cc107fc749c20ac37f199321c297b7e429666a1a 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -1413,7 +1413,36 @@ def generate_proposal_labels(rpn_rois, use_random=True): """ ** Generate proposal labels Faster-RCNN ** - TODO(buxingyuan): Add Document + This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth, + to sample foregroud boxes and background boxes, and compute loss target. + + RpnRois is the output boxes of RPN and was processed by generate_proposal_op, these boxes + were combined with groundtruth boxes and sampled according to batch_size_per_im and fg_fraction, + If an instance with a groundtruth overlap greater than fg_thresh, then it was considered as a foregroud sample. + If an instance with a groundtruth overlap greater than bg_thresh_lo and lower than bg_thresh_hi, + then it was considered as a background sample. + After all foregroud and background boxes are chosen (so called Rois), + then we apply random sampling to make sure + the number of foregroud boxes is no more than batch_size_per_im * fg_fraction. + + For each box in Rois, we assign the classification (class label) and regression targets (box label) to it. + Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss. + + Args: + rpn_rois(Variable): A 2-D LoDTensor with shape [N, 4]. N is the number of the GenerateProposalOp's output, each element is a bounding box with [xmin, ymin, xmax, ymax] format. + gt_classes(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a class label of groundtruth. + is_crowd(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a flag indicates whether a groundtruth is crowd. + gt_boxes(Variable): A 2-D LoDTensor with shape [M, 4]. M is the number of groundtruth, each element is a bounding box with [xmin, ymin, xmax, ymax] format. + im_info(Variable): A 2-D LoDTensor with shape [B, 3]. B is the number of input images, each element consists of im_height, im_width, im_scale. + + batch_size_per_im(int): Batch size of rois per images. + fg_fraction(float): Foreground fraction in total batch_size_per_im. + fg_thresh(float): Overlap threshold which is used to chose foreground sample. + bg_thresh_hi(float): Overlap threshold upper bound which is used to chose background sample. + bg_thresh_lo(float): Overlap threshold lower bound which is used to chose background sample. + bbox_reg_weights(list|tuple): Box regression weights. + class_nums(int): Class number. + use_random(bool): Use random sampling to choose foreground and background boxes. """ helper = LayerHelper('generate_proposal_labels', **locals()) @@ -1472,7 +1501,7 @@ def generate_proposals(scores, eta=1.0, name=None): """ - ** Generate proposal labels Faster-RCNN ** + ** Generate proposal Faster-RCNN ** This operation proposes RoIs according to each box with their probability to be a foreground object and the box can be calculated by anchors. Bbox_deltais and scores to be an object are the output of RPN. Final proposals