Polish En doc for some APIs. (#20418)

* Polish En doc for some APIs * Update some comments and API.spec

Polish En doc for some APIs. (#20418)
* Polish En doc for some APIs * Update some comments and API.spec
df3025c3 · qingqing01 · GitHub · 6b612a28 · df3025c3 · df3025c3
Showing with 199 addition and 142 deletion

paddle/fluid/API.spec paddle/fluid/API.spec +5 -5

python/paddle/fluid/layers/detection.py python/paddle/fluid/layers/detection.py +181 -134

python/paddle/fluid/layers/nn.py python/paddle/fluid/layers/nn.py +13 -3

未找到文件。
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -214,7 +214,7 @@ paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name',
 paddle.fluid.layers.scatter_nd_add (ArgSpec(args=['ref', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2607b5c9369fbc52f208de066a80fc25'))
 paddle.fluid.layers.scatter_nd (ArgSpec(args=['index', 'updates', 'shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e43f1d3a938b35da246aea3e72a020ec'))
 paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'abe3f714120117a5a3d3e639853932bf'))
-paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', '042af0b8abea96b40c22f6e70d99e042'))
+paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', '44f35002962cf24e14dd2958f6584e3d'))
 paddle.fluid.layers.mean_iou (ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None), ('document', 'dea29c0c3cdbd5b498afef60e58c9d7c'))
 paddle.fluid.layers.relu (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0942c174f4f6fb274976d4357356f6a2'))
 paddle.fluid.layers.selu (ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ee40bc474b4bccdaf112d3f0d847318'))
@@ -408,10 +408,10 @@ paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'],
 paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '94c71025bf11ab8172fd455350274138'))
 paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '0fdf82762fd0a5acb2578a72771b5b44'))
 paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', '7a484a0da5e993a7734867a3dfa86571'))
-paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'fd58078fdfffd899b91f992ba224628f'))
+paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', '61360150b911fa4097f1a221f5d49877'))
 paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '6f795f407a8e3a3ec3da52726c73405a'))
-paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'e9685f32d21bec8c013626c0254502c5'))
+paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '4670c1be208835fc8edd61025c21d0e4'))
-paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', '5485bcaceb0cde2695565a2ffd5bbd40'))
+paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', 'ed19f55b366e68ed686318ef7aff120d'))
 paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '1f1ab4f908ceddef1d99a8363e6826af'))
 paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', 'd46629656b4ce9b07809e32c0482cbef'))
 paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', '543b2a40641260e745a76b1f7a25fb2a'))
@@ -420,7 +420,7 @@ paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'as
 paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'b007f545ad41e66b814203bdb76516c6'))
 paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', 'f2342042127b536a0a16390f149f1bba'))
 paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '5cba014b41610431f8949e2d7336f1cc'))
-paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'b319b10ddaf17fb4ddf03518685a17ef'))
+paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', '2bacc35429f4fffe72a30c5a49a61eb7'))
 paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e24478fd1fcf1727d4947fe14356b3d4'))
 paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '511d7033c0cfce1a5b88c04ad6e7ed5b'))
 paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2183f03c4f16712dcef6a474dbcefa24'))

--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -525,12 +525,11 @@ def detection_output(loc,
                     nms_eta=1.0,
                     return_index=False):
    """
-    **Detection Output Layer for Single Shot Multibox Detector (SSD).**
+    Given the regression locations, classification confidences and prior boxes,
+    calculate the detection outputs by performing following steps:
-    This operation is to get the detection results by performing following
+    1. Decode input bounding box predictions according to the prior boxes and
-    two steps:
+       regression locations.
-    1. Decode input bounding box predictions according to the prior boxes.
    2. Get the final detection results by applying multi-class non maximum
       suppression (NMS).
@@ -539,33 +538,33 @@ def detection_output(loc,
    Args:
        loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the
-            predicted locations of M bounding bboxes. N is the batch size,
+            predicted locations of M bounding bboxes. Data type should be
+            float32 or float64. N is the batch size,
            and each bounding box has four coordinate values and the layout
            is [xmin, ymin, xmax, ymax].
        scores(Variable): A 3-D Tensor with shape [N, M, C] represents the
-            predicted confidence predictions. N is the batch size, C is the
+            predicted confidence predictions. Data type should be float32
-            class number, M is number of bounding boxes. For each category
+            or float64. N is the batch size, C is the
-            there are total M scores which corresponding M bounding boxes.
+            class number, M is number of bounding boxes.
        prior_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
-            each box is represented as [xmin, ymin, xmax, ymax],
+            each box is represented as [xmin, ymin, xmax, ymax]. Data type
-            [xmin, ymin] is the left top coordinate of the anchor box,
+            should be float32 or float64.
-            if the input is image feature map, they are close to the origin
-            of the coordinate system. [xmax, ymax] is the right bottom
-            coordinate of the anchor box.
        prior_box_var(Variable): A 2-D Tensor with shape [M, 4] holds M group
-            of variance.
+            of variance. Data type should be float32 or float64.
-        background_label(float): The index of background label,
+        background_label(int): The index of background label,
            the background label will be ignored. If set to -1, then all
-            categories will be considered.
+            categories will be considered. Default: 0.
-        nms_threshold(float): The threshold to be used in NMS.
+        nms_threshold(float): The threshold to be used in NMS. Default: 0.3.
        nms_top_k(int): Maximum number of detections to be kept according
-            to the confidences aftern the filtering detections based on
+            to the confidences aftern filtering detections based on
-            score_threshold.
+            score_threshold and before NMS. Default: 400.
        keep_top_k(int): Number of total bboxes to be kept per image after
-            NMS step. -1 means keeping all bboxes after NMS step.
+            NMS step. -1 means keeping all bboxes after NMS step. Default: 200.
        score_threshold(float): Threshold to filter out bounding boxes with
            low confidence score. If not provided, consider all boxes.
-        nms_eta(float): The parameter for adaptive NMS.
+            Default: 0.01.
+        nms_eta(float): The parameter for adaptive NMS. It works only when the
+            value is less than 1.0. Default: 1.0.
        return_index(bool): Whether return selected index. Default: False
    Returns:
@@ -573,20 +572,16 @@ def detection_output(loc,
        A tuple with two Variables: (Out, Index) if return_index is True,
        otherwise, a tuple with one Variable(Out) is returned. 
-        Out: The detection outputs is a LoDTensor with shape [No, 6]. Each row 
+        Out (Variable): The detection outputs is a LoDTensor with shape [No, 6].
-        has six values: [label, confidence, xmin, ymin, xmax, ymax]. `No` is 
+        Data type is the same as input (loc). Each row has six values:
+        [label, confidence, xmin, ymin, xmax, ymax]. `No` is
        the total number of detections in this mini-batch. For each instance,
        the offsets in first dimension are called LoD, the offset number is
        N + 1, N is the batch size. The i-th image has `LoD[i + 1] - LoD[i]`
        detected results, if it is 0, the i-th image has no detected results.
-        If all images have not detected results, LoD will be set to {1}, and 
+        Index (Variable): Only return when return_index is True. A 2-D LoDTensor
-        output tensor only contains one value, which is -1.
+        with shape [No, 1] represents the selected index which type is Integer.
-        (After version 1.3, when no boxes detected, the lod is changed
-        from {0} to {1}.)       
-        Index: Only return when return_index is True. A 2-D LoDTensor with 
-        shape [No, 1] represents the selected index which type is Integer. 
        The index is the absolute value cross batches. No is the same number
        as Out. If the index is used to gather other attribute such as age,
        one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where
@@ -598,14 +593,10 @@ def detection_output(loc,
            import paddle.fluid as fluid
-            pb = fluid.layers.data(name='prior_box', shape=[10, 4],
+            pb = fluid.data(name='prior_box', shape=[10, 4], dtype='float32')
-                         append_batch_size=False, dtype='float32')
+            pbv = fluid.data(name='prior_box_var', shape=[10, 4], dtype='float32')
-            pbv = fluid.layers.data(name='prior_box_var', shape=[10, 4],
+            loc = fluid.data(name='target_box', shape=[2, 21, 4], dtype='float32')
-                          append_batch_size=False, dtype='float32')
+            scores = fluid.data(name='scores', shape=[2, 21, 10], dtype='float32')
-            loc = fluid.layers.data(name='target_box', shape=[2, 21, 4],
-                          append_batch_size=False, dtype='float32')
-            scores = fluid.layers.data(name='scores', shape=[2, 21, 10],
-                          append_batch_size=False, dtype='float32')
            nmsed_outs, index = fluid.layers.detection_output(scores=scores,
                                       loc=loc,
                                       prior_box=pb,
@@ -1318,51 +1309,57 @@ def target_assign(input,
            out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
            out_weight[i][j] = 0.
-    2. Assigning out_weight based on `neg_indices` if `neg_indices` is provided:
+    2. Assigning outputs based on `neg_indices` if `neg_indices` is provided:
-    Assumed that the row offset for each instance in `neg_indices` is called neg_lod,
+    Assumed that i-th instance in `neg_indices` is called `neg_indice`,
-    for i-th instance and each `id` of neg_indices in this instance:
+    for i-th instance:
    .. code-block:: text
+        for id in neg_indice:
            out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
            out_weight[i][id] = 1.0
    Args:
-       inputs (Variable): This input is a 3D LoDTensor with shape [M, P, K].
+       input (Variable): This input is a 3D LoDTensor with shape [M, P, K].
-       matched_indices (Variable): Tensor<int>), The input matched indices
+           Data type should be int32 or float32.
+       matched_indices (Variable): The input matched indices
           is 2D Tenosr<int32> with shape [N, P], If MatchIndices[i][j] is -1,
           the j-th entity of column is not matched to any entity of row in
           i-th instance.
-       negative_indices (Variable): The input negative example indices are
+       negative_indices (Variable, optional): The input negative example indices
-           an optional input with shape [Neg, 1] and int32 type, where Neg is
+           are an optional input with shape [Neg, 1] and int32 type, where Neg is
           the total number of negative example indices.
-       mismatch_value (float32): Fill this value to the mismatched location.
+       mismatch_value (float32, optional): Fill this value to the mismatched
+           location.
+       name (string): The default value is None.  Normally there is no need for
+           user to set this property.  For more information, please refer
+           to :ref:`api_guide_Name`.
    Returns:
-        tuple:
+        tuple: A tuple(out, out_weight) is returned.
-               A tuple(out, out_weight) is returned. out is a 3D Tensor with
-               shape [N, P, K], N and P is the same as they are in
+        out (Variable): a 3D Tensor with shape [N, P, K] and same data type
-               `neg_indices`, K is the same as it in input of X. If
+        with `input`, N and P is the same as they are in `matched_indices`,
-               `match_indices[i][j]`. out_weight is the weight for output with
+        K is the same as it in input of X.
-               the shape of [N, P, 1].
+        out_weight (Variable): the weight for output with the shape of [N, P, 1].
+        Data type is float32.
    Examples:
        .. code-block:: python
            import paddle.fluid as fluid
-            x = fluid.layers.data(
+            x = fluid.data(
                name='x',
                shape=[4, 20, 4],
                dtype='float',
-                lod_level=1,
+                lod_level=1)
-                append_batch_size=False)
+            matched_id = fluid.data(
-            matched_id = fluid.layers.data(
                name='indices',
                shape=[8, 20],
-                dtype='int32',
+                dtype='int32')
-                append_batch_size=False)
            trg, trg_weight = fluid.layers.target_assign(
                x,
                matched_id,
@@ -1905,21 +1902,37 @@ def multi_box_head(inputs,
                   name=None,
                   min_max_aspect_ratios_order=False):
    """
-    Generate prior boxes for SSD(Single Shot MultiBox Detector)
+    Base on SSD ((Single Shot MultiBox Detector) algorithm, generate prior boxes,
-    algorithm. The details of this algorithm, please refer the
+    regression location and classification confidence on multiple input feature
-    section 2.2 of SSD paper `SSD: Single Shot MultiBox Detector
+    maps, then output the concatenate results. The details of this algorithm,
+    please refer the section 2.2 of SSD paper `SSD: Single Shot MultiBox Detector
    <https://arxiv.org/abs/1512.02325>`_ .
    Args:
-       inputs(list|tuple): The list of input Variables, the format
+       inputs (list(Variable)|tuple(Variable)): The list of input variables,
-            of all Variables is NCHW.
+           the format of all Variables are 4-D Tensor, layout is NCHW.
-       image(Variable): The input image data of PriorBoxOp,
+           Data type should be float32 or float64.
-            the layout is NCHW.
+       image (Variable): The input image, layout is NCHW. Data type should be
-       base_size(int): the base_size is used to get min_size
+           the same as inputs.
-            and max_size according to min_ratio and max_ratio.
+       base_size(int): the base_size is input image size. When len(inputs) > 2
+           and `min_size` and `max_size` are None, the `min_size` and `max_size`
+           are calculated by `baze_size`, 'min_ratio' and `max_ratio`. The
+           formula is as follows:
+              ..  code-block:: text
+                  min_sizes = []
+                  max_sizes = []
+                  step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2)))
+                  for ratio in six.moves.range(min_ratio, max_ratio + 1, step):
+                      min_sizes.append(base_size * ratio / 100.)
+                      max_sizes.append(base_size * (ratio + step) / 100.)
+                      min_sizes = [base_size * .10] + min_sizes
+                      max_sizes = [base_size * .20] + max_sizes
       num_classes(int): The number of classes.
-       aspect_ratios(list|tuple): the aspect ratios of generated prior
+       aspect_ratios(list(float) | tuple(float)): the aspect ratios of generated
-            boxes. The length of input and aspect_ratios must be equal.
+           prior boxes. The length of input and aspect_ratios must be equal.
       min_ratio(int): the min ratio of generated prior boxes.
       max_ratio(int): the max ratio of generated prior boxes.
       min_sizes(list|tuple|None): If `len(inputs) <=2`,
@@ -1945,7 +1958,9 @@ def multi_box_head(inputs,
       kernel_size(int): The kernel size of conv2d. Default: 1.
       pad(int|list|tuple): The padding of conv2d. Default:0.
       stride(int|list|tuple): The stride of conv2d. Default:1,
-       name(str): Name of the prior box layer. Default: None.
+       name(str): The default value is None.  Normally there is no need
+           for user to set this property.  For more information, please
+           refer to :ref:`api_guide_Name`.
       min_max_aspect_ratios_order(bool): If set True, the output prior box is
            in order of [min, max, aspect_ratios], which is consistent with
            Caffe. Please note, this order affects the weights order of
@@ -1955,33 +1970,34 @@ def multi_box_head(inputs,
    Returns:
        tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances)
-        mbox_loc: The predicted boxes' location of the inputs. The layout
+        mbox_loc (Variable): The predicted boxes' location of the inputs. The
-        is [N, H*W*Priors, 4]. where Priors is the number of predicted
+        layout is [N, num_priors, 4], where N is batch size, ``num_priors``
-        boxes each position of each input.
+        is the number of prior boxes. Data type is the same as input.
-        mbox_conf: The predicted boxes' confidence of the inputs. The layout
+        mbox_conf (Variable): The predicted boxes' confidence of the inputs.
-        is [N, H*W*Priors, C]. where Priors is the number of predicted boxes
+        The layout is [N, num_priors, C], where ``N`` and ``num_priors`` 
-        each position of each input and C is the number of Classes.
+        has the same meaning as above. C is the number of Classes.
+        Data type is the same as input.
-        boxes: the output prior boxes of PriorBox. The layout is [num_priors, 4].
+        boxes (Variable): the output prior boxes. The layout is [num_priors, 4].
-        num_priors is the total box count of each position of inputs.
+        The meaning of num_priors is the same as above.
+        Data type is the same as input.
-        variances: the expanded variances of PriorBox. The layout is
+        variances (Variable): the expanded variances for prior boxes.
-        [num_priors, 4]. num_priors is the total box count of each position of inputs
+        The layout is [num_priors, 4]. Data type is the same as input.
+    Examples 1: set min_ratio and max_ratio:
-    Examples:
        .. code-block:: python
          import paddle.fluid as fluid
-          images = fluid.layers.data(name='data', shape=[3, 300, 300], dtype='float32')
+          images = fluid.data(name='data', shape=[None, 3, 300, 300], dtype='float32')
-          conv1 = fluid.layers.data(name='conv1', shape=[512, 19, 19], dtype='float32')
+          conv1 = fluid.data(name='conv1', shape=[None, 512, 19, 19], dtype='float32')
-          conv2 = fluid.layers.data(name='conv2', shape=[1024, 10, 10], dtype='float32')
+          conv2 = fluid.data(name='conv2', shape=[None, 1024, 10, 10], dtype='float32')
-          conv3 = fluid.layers.data(name='conv3', shape=[512, 5, 5], dtype='float32')
+          conv3 = fluid.data(name='conv3', shape=[None, 512, 5, 5], dtype='float32')
-          conv4 = fluid.layers.data(name='conv4', shape=[256, 3, 3], dtype='float32')
+          conv4 = fluid.data(name='conv4', shape=[None, 256, 3, 3], dtype='float32')
-          conv5 = fluid.layers.data(name='conv5', shape=[256, 2, 2], dtype='float32')
+          conv5 = fluid.data(name='conv5', shape=[None, 256, 2, 2], dtype='float32')
-          conv6 = fluid.layers.data(name='conv6', shape=[128, 1, 1], dtype='float32')
+          conv6 = fluid.data(name='conv6', shape=[None, 128, 1, 1], dtype='float32')
          mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head(
            inputs=[conv1, conv2, conv3, conv4, conv5, conv6],
@@ -1994,6 +2010,32 @@ def multi_box_head(inputs,
            offset=0.5,
            flip=True,
            clip=True)
+    Examples 2: set min_sizes and max_sizes:
+        .. code-block:: python
+          import paddle.fluid as fluid
+          images = fluid.data(name='data', shape=[None, 3, 300, 300], dtype='float32')
+          conv1 = fluid.data(name='conv1', shape=[None, 512, 19, 19], dtype='float32')
+          conv2 = fluid.data(name='conv2', shape=[None, 1024, 10, 10], dtype='float32')
+          conv3 = fluid.data(name='conv3', shape=[None, 512, 5, 5], dtype='float32')
+          conv4 = fluid.data(name='conv4', shape=[None, 256, 3, 3], dtype='float32')
+          conv5 = fluid.data(name='conv5', shape=[None, 256, 2, 2], dtype='float32')
+          conv6 = fluid.data(name='conv6', shape=[None, 128, 1, 1], dtype='float32')
+          mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head(
+            inputs=[conv1, conv2, conv3, conv4, conv5, conv6],
+            image=images,
+            num_classes=21,
+            min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
+            max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
+            aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
+            base_size=300,
+            offset=0.5,
+            flip=True,
+            clip=True)
    """
    def _reshape_with_axis_(input, axis=1):
@@ -2439,7 +2481,7 @@ def generate_proposal_labels(rpn_rois,
 def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois,
                         labels_int32, num_classes, resolution):
    """
-    ** Generate Mask Labels for Mask-RCNN **
+    **Generate Mask Labels for Mask-RCNN**
    This operator can be, for given the RoIs and corresponding labels,
    to sample foreground RoIs. This mask branch also has
@@ -2475,62 +2517,67 @@ def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois,
            feeder.feed(batch_masks)
    Args:
-        im_info(Variable): A 2-D Tensor with shape [N, 3]. N is the batch size,
+        im_info (Variable): A 2-D Tensor with shape [N, 3] and float32
-            each element is [height, width, scale] of image. Image scale is
+            data type. N is the batch size, each element is
-            target_size) / original_size.
+            [height, width, scale] of image. Image scale is
-        gt_classes(Variable): A 2-D LoDTensor with shape [M, 1]. M is the total
+            target_size / original_size, target_size is the size after resize,
-            number of ground-truth, each element is a class label.
+            original_size is the original image size.
-        is_crowd(Variable): A 2-D LoDTensor with shape as gt_classes,
+        gt_classes (Variable): A 2-D LoDTensor with shape [M, 1]. Data type
-            each element is a flag indicating whether a groundtruth is crowd.
+            shoule be int. M is the total number of ground-truth, each
-        gt_segms(Variable): This input is a 2D LoDTensor with shape [S, 2],
+            element is a class label.
-            it's LoD level is 3. Usually users do not needs to understand LoD,
+        is_crowd (Variable): A 2-D LoDTensor with same shape and same data type
+            as gt_classes, each element is a flag indicating whether a
+            groundtruth is crowd.
+        gt_segms (Variable): This input is a 2D LoDTensor with shape [S, 2] and
+            float32 data type, it's LoD level is 3.
+            Usually users do not needs to understand LoD,
            The users should return correct data format in reader.
+            The LoD[0] represents the ground-truth objects number of
-            The LoD[0] represents the gt objects number of
            each instance. LoD[1] represents the segmentation counts of each
            objects. LoD[2] represents the polygons number of each segmentation.
            S the total number of polygons coordinate points. Each element is
            (x, y) coordinate points.
-        rois(Variable): A 2-D LoDTensor with shape [R, 4]. R is the total
+        rois (Variable): A 2-D LoDTensor with shape [R, 4] and float32 data type
-            number of RoIs, each element is a bounding box with
+            float32. R is the total number of RoIs, each element is a bounding
-            (xmin, ymin, xmax, ymax) format in the range of original image.
+            box with (xmin, ymin, xmax, ymax) format in the range of original image.
-        labels_int32(Variable): A 2-D LoDTensor in shape of [R, 1] with type
+        labels_int32 (Variable): A 2-D LoDTensor in shape of [R, 1] with type
            of int32. R is the same as it in `rois`. Each element repersents
            a class label of a RoI.
-        num_classes(int): Class number.
+        num_classes (int): Class number.
-        resolution(int): Resolution of mask predictions.
+        resolution (int): Resolution of mask predictions.
    Returns:
-        mask_rois (Variable):  A 2D LoDTensor with shape [P, 4]. P is the total
+        mask_rois (Variable):  A 2D LoDTensor with shape [P, 4] and same data
-            number of sampled RoIs. Each element is a bounding box with
+        type as `rois`. P is the total number of sampled RoIs. Each element
-            [xmin, ymin, xmax, ymax] format in range of orignal image size.
+        is a bounding box with [xmin, ymin, xmax, ymax] format in range of
-        mask_rois_has_mask_int32 (Variable): A 2D LoDTensor with shape [P, 1],
+        orignal image size.
-            each element repersents the output mask RoI index with regard to
-            to input RoIs.
+        mask_rois_has_mask_int32 (Variable): A 2D LoDTensor with shape [P, 1]
-        mask_int32 (Variable): A 2D LoDTensor with shape [P, K * M * M],
+        and int data type, each element repersents the output mask RoI
-            K is the classes number and M is the resolution of mask predictions.
+        index with regard to input RoIs.
-            Each element repersents the binary mask targets.
+        mask_int32 (Variable): A 2D LoDTensor with shape [P, K * M * M] and int
+        data type, K is the classes number and M is the resolution of mask
+        predictions. Each element repersents the binary mask targets.
    Examples:
        .. code-block:: python
          import paddle.fluid as fluid
-          im_info = fluid.layers.data(name="im_info", shape=[3],
+          im_info = fluid.data(name="im_info", shape=[None, 3],
              dtype="float32")
-          gt_classes = fluid.layers.data(name="gt_classes", shape=[1],
+          gt_classes = fluid.data(name="gt_classes", shape=[None, 1],
              dtype="float32", lod_level=1)
-          is_crowd = fluid.layers.data(name="is_crowd", shape=[1],
+          is_crowd = fluid.data(name="is_crowd", shape=[None, 1],
              dtype="float32", lod_level=1)
-          gt_masks = fluid.layers.data(name="gt_masks", shape=[2],
+          gt_masks = fluid.data(name="gt_masks", shape=[None, 2],
              dtype="float32", lod_level=3)
          # rois, roi_labels can be the output of
          # fluid.layers.generate_proposal_labels.
-          rois = fluid.layers.data(name="rois", shape=[4],
+          rois = fluid.data(name="rois", shape=[None, 4],
              dtype="float32", lod_level=1)
-          roi_labels = fluid.layers.data(name="roi_labels", shape=[1],
+          roi_labels = fluid.data(name="roi_labels", shape=[None, 1],
              dtype="int32", lod_level=1)
          mask_rois, mask_index, mask_int32 = fluid.layers.generate_mask_labels(
              im_info=im_info,

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -10510,9 +10510,19 @@ def random_crop(x, shape, seed=None):
        ${out_comment}
    Examples:
-        >>> import paddle.fluid as fluid
+        .. code-block:: python
-        >>> img = fluid.layers.data("img", [3, 256, 256])
-        >>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
+            import paddle.fluid as fluid
+            img = fluid.data("img", [None, 3, 256, 256])
+            # cropped_img is [-1, 3, 224, 224]
+            cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
+            # cropped_img2 shape: [-1, 2, 224, 224]
+            # cropped_img2 = fluid.layers.random_crop(img, shape=[2, 224, 224])
+            # cropped_img3 shape: [-1, 3, 128, 224]
+            # cropped_img3 = fluid.layers.random_crop(img, shape=[128, 224])
    """
    helper = LayerHelper("random_crop", **locals())
    dtype = x.dtype