Merge pull request #13971 from sefira/FasterOpDoc

generate proposal labels doc

Merge pull request #13971 from sefira/FasterOpDoc
generate proposal labels doc
cb27a921 · qingqing01 · GitHub · 3c957af1 · 6c1d74bb · cb27a921
Showing with 111 addition and 25 deletion

paddle/fluid/operators/detection/generate_proposal_labels_op.cc .../fluid/operators/detection/generate_proposal_labels_op.cc +80 -23

python/paddle/fluid/layers/detection.py python/paddle/fluid/layers/detection.py +31 -2

未找到文件。
--- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
@@ -439,31 +439,88 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
 class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
-    // TODO(buxingyuan): Add Document
+    AddInput(
-    AddInput("RpnRois", "RpnRois.");
+        "RpnRois",
-    AddInput("GtClasses", "GtClasses.");
+        "(LoDTensor), This input is a 2D LoDTensor with shape [N, 4]. "
-    AddInput("IsCrowd", "IsCrowd.");
+        "N is the number of the GenerateProposalOp's output, "
-    AddInput("GtBoxes", "GtBoxes.");
+        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
-    AddInput("ImInfo", "ImInfo.");
+    AddInput("GtClasses",
+             "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
-    AddOutput("Rois", "Rois.");
+             "M is the number of groundtruth, "
-    AddOutput("LabelsInt32", "LabelsInt32.");
+             "each element is a class label of groundtruth.");
-    AddOutput("BboxTargets", "BboxTargets.");
+    AddInput(
-    AddOutput("BboxInsideWeights", "BboxInsideWeights.");
+        "IsCrowd",
-    AddOutput("BboxOutsideWeights", "BboxOutsideWeights.");
+        "(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
+        "M is the number of groundtruth, "
-    AddAttr<int>("batch_size_per_im", "batch_size_per_im");
+        "each element is a flag indicates whether a groundtruth is crowd.");
-    AddAttr<float>("fg_fraction", "fg_fraction");
+    AddInput(
-    AddAttr<float>("fg_thresh", "fg_thresh");
+        "GtBoxes",
-    AddAttr<float>("bg_thresh_hi", "bg_thresh_hi");
+        "(LoDTensor), This input is a 2D LoDTensor with shape [M, 4]. "
-    AddAttr<float>("bg_thresh_lo", "bg_thresh_lo");
+        "M is the number of groundtruth, "
-    AddAttr<std::vector<float>>("bbox_reg_weights", "bbox_reg_weights");
+        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
-    AddAttr<int>("class_nums", "class_nums");
+    AddInput("ImInfo",
-    AddAttr<bool>("use_random", "use_random").SetDefault(true);
+             "(Tensor), This input is a 2D Tensor with shape [B, 3]. "
+             "B is the number of input images, "
+             "each element consists of im_height, im_width, im_scale.");
+    AddOutput(
+        "Rois",
+        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
+        "P usuall equal to  batch_size_per_im * batch_size, "
+        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
+    AddOutput("LabelsInt32",
+              "(LoDTensor), This output is a 2D LoDTensor with shape [P], "
+              "each element repersents a class label of a roi");
+    AddOutput("BboxTargets",
+              "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
+              "class_nums], "
+              "each element repersents a box label of a roi");
+    AddOutput(
+        "BboxInsideWeights",
+        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
+        "class_nums], "
+        "each element indicates whether a box should contribute to loss.");
+    AddOutput(
+        "BboxOutsideWeights",
+        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
+        "class_nums], "
+        "each element indicates whether a box should contribute to loss.");
+    AddAttr<int>("batch_size_per_im", "Batch size of rois per images.");
+    AddAttr<float>("fg_fraction",
+                   "Foreground fraction in total batch_size_per_im.");
+    AddAttr<float>(
+        "fg_thresh",
+        "Overlap threshold which is used to chose foreground sample.");
+    AddAttr<float>("bg_thresh_hi",
+                   "Overlap threshold upper bound which is used to chose "
+                   "background sample.");
+    AddAttr<float>("bg_thresh_lo",
+                   "Overlap threshold lower bound which is used to chose "
+                   "background sample.");
+    AddAttr<std::vector<float>>("bbox_reg_weights", "Box regression weights.");
+    AddAttr<int>("class_nums", "Class number.");
+    AddAttr<bool>(
+        "use_random",
+        "Use random sampling to choose foreground and background boxes.")
+        .SetDefault(true);
    AddComment(R"DOC(
-Generate Proposals Labels Operator.
+This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
-)DOC");
+to sample foreground boxes and background boxes, and compute loss target.
+RpnRois is the output boxes of RPN and was processed by generate_proposal_op, these boxes
+were combined with groundtruth boxes and sampled according to batch_size_per_im and fg_fraction,
+If an instance with a groundtruth overlap greater than fg_thresh, then it was considered as a foreground sample.
+If an instance with a groundtruth overlap greater than bg_thresh_lo and lower than bg_thresh_hi,
+then it was considered as a background sample.
+After all foreground and background boxes are chosen (so called Rois),
+then we apply random sampling to make sure
+the number of foreground boxes is no more than batch_size_per_im * fg_fraction.
+For each box in Rois, we assign the classification (class label) and regression targets (box label) to it.
+Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss.
+    )DOC");
  }
 };

--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -1424,7 +1424,36 @@ def generate_proposal_labels(rpn_rois,
                             use_random=True):
    """
    ** Generate proposal labels Faster-RCNN **
-    TODO(buxingyuan): Add Document
+    This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
+    to sample foreground boxes and background boxes, and compute loss target.
+    RpnRois is the output boxes of RPN and was processed by generate_proposal_op, these boxes
+    were combined with groundtruth boxes and sampled according to batch_size_per_im and fg_fraction,
+    If an instance with a groundtruth overlap greater than fg_thresh, then it was considered as a foreground sample.
+    If an instance with a groundtruth overlap greater than bg_thresh_lo and lower than bg_thresh_hi,
+    then it was considered as a background sample.
+    After all foreground and background boxes are chosen (so called Rois),
+    then we apply random sampling to make sure
+    the number of foreground boxes is no more than batch_size_per_im * fg_fraction.
+    For each box in Rois, we assign the classification (class label) and regression targets (box label) to it.
+    Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss.
+    Args:
+        rpn_rois(Variable): A 2-D LoDTensor with shape [N, 4]. N is the number of the GenerateProposalOp's output, each element is a bounding box with [xmin, ymin, xmax, ymax] format.
+        gt_classes(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a class label of groundtruth.
+        is_crowd(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a flag indicates whether a groundtruth is crowd.
+        gt_boxes(Variable): A 2-D LoDTensor with shape [M, 4]. M is the number of groundtruth, each element is a bounding box with [xmin, ymin, xmax, ymax] format.
+        im_info(Variable): A 2-D LoDTensor with shape [B, 3]. B is the number of input images, each element consists of im_height, im_width, im_scale.
+        batch_size_per_im(int): Batch size of rois per images.
+        fg_fraction(float): Foreground fraction in total batch_size_per_im.
+        fg_thresh(float): Overlap threshold which is used to chose foreground sample.
+        bg_thresh_hi(float): Overlap threshold upper bound which is used to chose background sample.
+        bg_thresh_lo(float): Overlap threshold lower bound which is used to chose background sample.
+        bbox_reg_weights(list|tuple): Box regression weights.
+        class_nums(int): Class number.
+        use_random(bool): Use random sampling to choose foreground and background boxes.
    """
    helper = LayerHelper('generate_proposal_labels', **locals())
@@ -1487,7 +1516,7 @@ def generate_proposals(scores,
                       eta=1.0,
                       name=None):
    """
-    ** Generate proposal labels Faster-RCNN **
+    ** Generate proposal Faster-RCNN **
 	This operation proposes RoIs according to each box with their probability to be a foreground object and 
 	the box can be calculated by anchors. Bbox_deltais and scores to be an object are the output of RPN. Final proposals