未验证 提交 df3025c3 编写于 作者: Q qingqing01 提交者: GitHub

Polish En doc for some APIs. (#20418)

* Polish En doc for some APIs
* Update some comments and API.spec
上级 6b612a28
...@@ -214,7 +214,7 @@ paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name', ...@@ -214,7 +214,7 @@ paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name',
paddle.fluid.layers.scatter_nd_add (ArgSpec(args=['ref', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2607b5c9369fbc52f208de066a80fc25')) paddle.fluid.layers.scatter_nd_add (ArgSpec(args=['ref', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2607b5c9369fbc52f208de066a80fc25'))
paddle.fluid.layers.scatter_nd (ArgSpec(args=['index', 'updates', 'shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e43f1d3a938b35da246aea3e72a020ec')) paddle.fluid.layers.scatter_nd (ArgSpec(args=['index', 'updates', 'shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e43f1d3a938b35da246aea3e72a020ec'))
paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'abe3f714120117a5a3d3e639853932bf')) paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'abe3f714120117a5a3d3e639853932bf'))
paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', '042af0b8abea96b40c22f6e70d99e042')) paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', '44f35002962cf24e14dd2958f6584e3d'))
paddle.fluid.layers.mean_iou (ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None), ('document', 'dea29c0c3cdbd5b498afef60e58c9d7c')) paddle.fluid.layers.mean_iou (ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None), ('document', 'dea29c0c3cdbd5b498afef60e58c9d7c'))
paddle.fluid.layers.relu (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0942c174f4f6fb274976d4357356f6a2')) paddle.fluid.layers.relu (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0942c174f4f6fb274976d4357356f6a2'))
paddle.fluid.layers.selu (ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ee40bc474b4bccdaf112d3f0d847318')) paddle.fluid.layers.selu (ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ee40bc474b4bccdaf112d3f0d847318'))
...@@ -408,10 +408,10 @@ paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], ...@@ -408,10 +408,10 @@ paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'],
paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '94c71025bf11ab8172fd455350274138')) paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '94c71025bf11ab8172fd455350274138'))
paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '0fdf82762fd0a5acb2578a72771b5b44')) paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '0fdf82762fd0a5acb2578a72771b5b44'))
paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', '7a484a0da5e993a7734867a3dfa86571')) paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', '7a484a0da5e993a7734867a3dfa86571'))
paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'fd58078fdfffd899b91f992ba224628f')) paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', '61360150b911fa4097f1a221f5d49877'))
paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '6f795f407a8e3a3ec3da52726c73405a')) paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '6f795f407a8e3a3ec3da52726c73405a'))
paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'e9685f32d21bec8c013626c0254502c5')) paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '4670c1be208835fc8edd61025c21d0e4'))
paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', '5485bcaceb0cde2695565a2ffd5bbd40')) paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', 'ed19f55b366e68ed686318ef7aff120d'))
paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '1f1ab4f908ceddef1d99a8363e6826af')) paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '1f1ab4f908ceddef1d99a8363e6826af'))
paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', 'd46629656b4ce9b07809e32c0482cbef')) paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', 'd46629656b4ce9b07809e32c0482cbef'))
paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', '543b2a40641260e745a76b1f7a25fb2a')) paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', '543b2a40641260e745a76b1f7a25fb2a'))
...@@ -420,7 +420,7 @@ paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'as ...@@ -420,7 +420,7 @@ paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'as
paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'b007f545ad41e66b814203bdb76516c6')) paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'b007f545ad41e66b814203bdb76516c6'))
paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', 'f2342042127b536a0a16390f149f1bba')) paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', 'f2342042127b536a0a16390f149f1bba'))
paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '5cba014b41610431f8949e2d7336f1cc')) paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '5cba014b41610431f8949e2d7336f1cc'))
paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'b319b10ddaf17fb4ddf03518685a17ef')) paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', '2bacc35429f4fffe72a30c5a49a61eb7'))
paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e24478fd1fcf1727d4947fe14356b3d4')) paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e24478fd1fcf1727d4947fe14356b3d4'))
paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '511d7033c0cfce1a5b88c04ad6e7ed5b')) paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '511d7033c0cfce1a5b88c04ad6e7ed5b'))
paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2183f03c4f16712dcef6a474dbcefa24')) paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2183f03c4f16712dcef6a474dbcefa24'))
......
...@@ -525,12 +525,11 @@ def detection_output(loc, ...@@ -525,12 +525,11 @@ def detection_output(loc,
nms_eta=1.0, nms_eta=1.0,
return_index=False): return_index=False):
""" """
**Detection Output Layer for Single Shot Multibox Detector (SSD).** Given the regression locations, classification confidences and prior boxes,
calculate the detection outputs by performing following steps:
This operation is to get the detection results by performing following 1. Decode input bounding box predictions according to the prior boxes and
two steps: regression locations.
1. Decode input bounding box predictions according to the prior boxes.
2. Get the final detection results by applying multi-class non maximum 2. Get the final detection results by applying multi-class non maximum
suppression (NMS). suppression (NMS).
...@@ -539,33 +538,33 @@ def detection_output(loc, ...@@ -539,33 +538,33 @@ def detection_output(loc,
Args: Args:
loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the
predicted locations of M bounding bboxes. N is the batch size, predicted locations of M bounding bboxes. Data type should be
float32 or float64. N is the batch size,
and each bounding box has four coordinate values and the layout and each bounding box has four coordinate values and the layout
is [xmin, ymin, xmax, ymax]. is [xmin, ymin, xmax, ymax].
scores(Variable): A 3-D Tensor with shape [N, M, C] represents the scores(Variable): A 3-D Tensor with shape [N, M, C] represents the
predicted confidence predictions. N is the batch size, C is the predicted confidence predictions. Data type should be float32
class number, M is number of bounding boxes. For each category or float64. N is the batch size, C is the
there are total M scores which corresponding M bounding boxes. class number, M is number of bounding boxes.
prior_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes, prior_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
each box is represented as [xmin, ymin, xmax, ymax], each box is represented as [xmin, ymin, xmax, ymax]. Data type
[xmin, ymin] is the left top coordinate of the anchor box, should be float32 or float64.
if the input is image feature map, they are close to the origin
of the coordinate system. [xmax, ymax] is the right bottom
coordinate of the anchor box.
prior_box_var(Variable): A 2-D Tensor with shape [M, 4] holds M group prior_box_var(Variable): A 2-D Tensor with shape [M, 4] holds M group
of variance. of variance. Data type should be float32 or float64.
background_label(float): The index of background label, background_label(int): The index of background label,
the background label will be ignored. If set to -1, then all the background label will be ignored. If set to -1, then all
categories will be considered. categories will be considered. Default: 0.
nms_threshold(float): The threshold to be used in NMS. nms_threshold(float): The threshold to be used in NMS. Default: 0.3.
nms_top_k(int): Maximum number of detections to be kept according nms_top_k(int): Maximum number of detections to be kept according
to the confidences aftern the filtering detections based on to the confidences aftern filtering detections based on
score_threshold. score_threshold and before NMS. Default: 400.
keep_top_k(int): Number of total bboxes to be kept per image after keep_top_k(int): Number of total bboxes to be kept per image after
NMS step. -1 means keeping all bboxes after NMS step. NMS step. -1 means keeping all bboxes after NMS step. Default: 200.
score_threshold(float): Threshold to filter out bounding boxes with score_threshold(float): Threshold to filter out bounding boxes with
low confidence score. If not provided, consider all boxes. low confidence score. If not provided, consider all boxes.
nms_eta(float): The parameter for adaptive NMS. Default: 0.01.
nms_eta(float): The parameter for adaptive NMS. It works only when the
value is less than 1.0. Default: 1.0.
return_index(bool): Whether return selected index. Default: False return_index(bool): Whether return selected index. Default: False
Returns: Returns:
...@@ -573,20 +572,16 @@ def detection_output(loc, ...@@ -573,20 +572,16 @@ def detection_output(loc,
A tuple with two Variables: (Out, Index) if return_index is True, A tuple with two Variables: (Out, Index) if return_index is True,
otherwise, a tuple with one Variable(Out) is returned. otherwise, a tuple with one Variable(Out) is returned.
Out: The detection outputs is a LoDTensor with shape [No, 6]. Each row Out (Variable): The detection outputs is a LoDTensor with shape [No, 6].
has six values: [label, confidence, xmin, ymin, xmax, ymax]. `No` is Data type is the same as input (loc). Each row has six values:
[label, confidence, xmin, ymin, xmax, ymax]. `No` is
the total number of detections in this mini-batch. For each instance, the total number of detections in this mini-batch. For each instance,
the offsets in first dimension are called LoD, the offset number is the offsets in first dimension are called LoD, the offset number is
N + 1, N is the batch size. The i-th image has `LoD[i + 1] - LoD[i]` N + 1, N is the batch size. The i-th image has `LoD[i + 1] - LoD[i]`
detected results, if it is 0, the i-th image has no detected results. detected results, if it is 0, the i-th image has no detected results.
If all images have not detected results, LoD will be set to {1}, and Index (Variable): Only return when return_index is True. A 2-D LoDTensor
output tensor only contains one value, which is -1. with shape [No, 1] represents the selected index which type is Integer.
(After version 1.3, when no boxes detected, the lod is changed
from {0} to {1}.)
Index: Only return when return_index is True. A 2-D LoDTensor with
shape [No, 1] represents the selected index which type is Integer.
The index is the absolute value cross batches. No is the same number The index is the absolute value cross batches. No is the same number
as Out. If the index is used to gather other attribute such as age, as Out. If the index is used to gather other attribute such as age,
one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where
...@@ -598,14 +593,10 @@ def detection_output(loc, ...@@ -598,14 +593,10 @@ def detection_output(loc,
import paddle.fluid as fluid import paddle.fluid as fluid
pb = fluid.layers.data(name='prior_box', shape=[10, 4], pb = fluid.data(name='prior_box', shape=[10, 4], dtype='float32')
append_batch_size=False, dtype='float32') pbv = fluid.data(name='prior_box_var', shape=[10, 4], dtype='float32')
pbv = fluid.layers.data(name='prior_box_var', shape=[10, 4], loc = fluid.data(name='target_box', shape=[2, 21, 4], dtype='float32')
append_batch_size=False, dtype='float32') scores = fluid.data(name='scores', shape=[2, 21, 10], dtype='float32')
loc = fluid.layers.data(name='target_box', shape=[2, 21, 4],
append_batch_size=False, dtype='float32')
scores = fluid.layers.data(name='scores', shape=[2, 21, 10],
append_batch_size=False, dtype='float32')
nmsed_outs, index = fluid.layers.detection_output(scores=scores, nmsed_outs, index = fluid.layers.detection_output(scores=scores,
loc=loc, loc=loc,
prior_box=pb, prior_box=pb,
...@@ -1318,51 +1309,57 @@ def target_assign(input, ...@@ -1318,51 +1309,57 @@ def target_assign(input,
out[j][j][0 : K] = {mismatch_value, mismatch_value, ...} out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
out_weight[i][j] = 0. out_weight[i][j] = 0.
2. Assigning out_weight based on `neg_indices` if `neg_indices` is provided: 2. Assigning outputs based on `neg_indices` if `neg_indices` is provided:
Assumed that the row offset for each instance in `neg_indices` is called neg_lod, Assumed that i-th instance in `neg_indices` is called `neg_indice`,
for i-th instance and each `id` of neg_indices in this instance: for i-th instance:
.. code-block:: text .. code-block:: text
for id in neg_indice:
out[i][id][0 : K] = {mismatch_value, mismatch_value, ...} out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
out_weight[i][id] = 1.0 out_weight[i][id] = 1.0
Args: Args:
inputs (Variable): This input is a 3D LoDTensor with shape [M, P, K]. input (Variable): This input is a 3D LoDTensor with shape [M, P, K].
matched_indices (Variable): Tensor<int>), The input matched indices Data type should be int32 or float32.
matched_indices (Variable): The input matched indices
is 2D Tenosr<int32> with shape [N, P], If MatchIndices[i][j] is -1, is 2D Tenosr<int32> with shape [N, P], If MatchIndices[i][j] is -1,
the j-th entity of column is not matched to any entity of row in the j-th entity of column is not matched to any entity of row in
i-th instance. i-th instance.
negative_indices (Variable): The input negative example indices are negative_indices (Variable, optional): The input negative example indices
an optional input with shape [Neg, 1] and int32 type, where Neg is are an optional input with shape [Neg, 1] and int32 type, where Neg is
the total number of negative example indices. the total number of negative example indices.
mismatch_value (float32): Fill this value to the mismatched location. mismatch_value (float32, optional): Fill this value to the mismatched
location.
name (string): The default value is None. Normally there is no need for
user to set this property. For more information, please refer
to :ref:`api_guide_Name`.
Returns: Returns:
tuple: tuple: A tuple(out, out_weight) is returned.
A tuple(out, out_weight) is returned. out is a 3D Tensor with
shape [N, P, K], N and P is the same as they are in out (Variable): a 3D Tensor with shape [N, P, K] and same data type
`neg_indices`, K is the same as it in input of X. If with `input`, N and P is the same as they are in `matched_indices`,
`match_indices[i][j]`. out_weight is the weight for output with K is the same as it in input of X.
the shape of [N, P, 1].
out_weight (Variable): the weight for output with the shape of [N, P, 1].
Data type is float32.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
x = fluid.layers.data( x = fluid.data(
name='x', name='x',
shape=[4, 20, 4], shape=[4, 20, 4],
dtype='float', dtype='float',
lod_level=1, lod_level=1)
append_batch_size=False) matched_id = fluid.data(
matched_id = fluid.layers.data(
name='indices', name='indices',
shape=[8, 20], shape=[8, 20],
dtype='int32', dtype='int32')
append_batch_size=False)
trg, trg_weight = fluid.layers.target_assign( trg, trg_weight = fluid.layers.target_assign(
x, x,
matched_id, matched_id,
...@@ -1905,21 +1902,37 @@ def multi_box_head(inputs, ...@@ -1905,21 +1902,37 @@ def multi_box_head(inputs,
name=None, name=None,
min_max_aspect_ratios_order=False): min_max_aspect_ratios_order=False):
""" """
Generate prior boxes for SSD(Single Shot MultiBox Detector) Base on SSD ((Single Shot MultiBox Detector) algorithm, generate prior boxes,
algorithm. The details of this algorithm, please refer the regression location and classification confidence on multiple input feature
section 2.2 of SSD paper `SSD: Single Shot MultiBox Detector maps, then output the concatenate results. The details of this algorithm,
please refer the section 2.2 of SSD paper `SSD: Single Shot MultiBox Detector
<https://arxiv.org/abs/1512.02325>`_ . <https://arxiv.org/abs/1512.02325>`_ .
Args: Args:
inputs(list|tuple): The list of input Variables, the format inputs (list(Variable)|tuple(Variable)): The list of input variables,
of all Variables is NCHW. the format of all Variables are 4-D Tensor, layout is NCHW.
image(Variable): The input image data of PriorBoxOp, Data type should be float32 or float64.
the layout is NCHW. image (Variable): The input image, layout is NCHW. Data type should be
base_size(int): the base_size is used to get min_size the same as inputs.
and max_size according to min_ratio and max_ratio. base_size(int): the base_size is input image size. When len(inputs) > 2
and `min_size` and `max_size` are None, the `min_size` and `max_size`
are calculated by `baze_size`, 'min_ratio' and `max_ratio`. The
formula is as follows:
.. code-block:: text
min_sizes = []
max_sizes = []
step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2)))
for ratio in six.moves.range(min_ratio, max_ratio + 1, step):
min_sizes.append(base_size * ratio / 100.)
max_sizes.append(base_size * (ratio + step) / 100.)
min_sizes = [base_size * .10] + min_sizes
max_sizes = [base_size * .20] + max_sizes
num_classes(int): The number of classes. num_classes(int): The number of classes.
aspect_ratios(list|tuple): the aspect ratios of generated prior aspect_ratios(list(float) | tuple(float)): the aspect ratios of generated
boxes. The length of input and aspect_ratios must be equal. prior boxes. The length of input and aspect_ratios must be equal.
min_ratio(int): the min ratio of generated prior boxes. min_ratio(int): the min ratio of generated prior boxes.
max_ratio(int): the max ratio of generated prior boxes. max_ratio(int): the max ratio of generated prior boxes.
min_sizes(list|tuple|None): If `len(inputs) <=2`, min_sizes(list|tuple|None): If `len(inputs) <=2`,
...@@ -1945,7 +1958,9 @@ def multi_box_head(inputs, ...@@ -1945,7 +1958,9 @@ def multi_box_head(inputs,
kernel_size(int): The kernel size of conv2d. Default: 1. kernel_size(int): The kernel size of conv2d. Default: 1.
pad(int|list|tuple): The padding of conv2d. Default:0. pad(int|list|tuple): The padding of conv2d. Default:0.
stride(int|list|tuple): The stride of conv2d. Default:1, stride(int|list|tuple): The stride of conv2d. Default:1,
name(str): Name of the prior box layer. Default: None. name(str): The default value is None. Normally there is no need
for user to set this property. For more information, please
refer to :ref:`api_guide_Name`.
min_max_aspect_ratios_order(bool): If set True, the output prior box is min_max_aspect_ratios_order(bool): If set True, the output prior box is
in order of [min, max, aspect_ratios], which is consistent with in order of [min, max, aspect_ratios], which is consistent with
Caffe. Please note, this order affects the weights order of Caffe. Please note, this order affects the weights order of
...@@ -1955,33 +1970,34 @@ def multi_box_head(inputs, ...@@ -1955,33 +1970,34 @@ def multi_box_head(inputs,
Returns: Returns:
tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances) tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances)
mbox_loc: The predicted boxes' location of the inputs. The layout mbox_loc (Variable): The predicted boxes' location of the inputs. The
is [N, H*W*Priors, 4]. where Priors is the number of predicted layout is [N, num_priors, 4], where N is batch size, ``num_priors``
boxes each position of each input. is the number of prior boxes. Data type is the same as input.
mbox_conf: The predicted boxes' confidence of the inputs. The layout mbox_conf (Variable): The predicted boxes' confidence of the inputs.
is [N, H*W*Priors, C]. where Priors is the number of predicted boxes The layout is [N, num_priors, C], where ``N`` and ``num_priors``
each position of each input and C is the number of Classes. has the same meaning as above. C is the number of Classes.
Data type is the same as input.
boxes: the output prior boxes of PriorBox. The layout is [num_priors, 4]. boxes (Variable): the output prior boxes. The layout is [num_priors, 4].
num_priors is the total box count of each position of inputs. The meaning of num_priors is the same as above.
Data type is the same as input.
variances: the expanded variances of PriorBox. The layout is variances (Variable): the expanded variances for prior boxes.
[num_priors, 4]. num_priors is the total box count of each position of inputs The layout is [num_priors, 4]. Data type is the same as input.
Examples 1: set min_ratio and max_ratio:
Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
images = fluid.layers.data(name='data', shape=[3, 300, 300], dtype='float32') images = fluid.data(name='data', shape=[None, 3, 300, 300], dtype='float32')
conv1 = fluid.layers.data(name='conv1', shape=[512, 19, 19], dtype='float32') conv1 = fluid.data(name='conv1', shape=[None, 512, 19, 19], dtype='float32')
conv2 = fluid.layers.data(name='conv2', shape=[1024, 10, 10], dtype='float32') conv2 = fluid.data(name='conv2', shape=[None, 1024, 10, 10], dtype='float32')
conv3 = fluid.layers.data(name='conv3', shape=[512, 5, 5], dtype='float32') conv3 = fluid.data(name='conv3', shape=[None, 512, 5, 5], dtype='float32')
conv4 = fluid.layers.data(name='conv4', shape=[256, 3, 3], dtype='float32') conv4 = fluid.data(name='conv4', shape=[None, 256, 3, 3], dtype='float32')
conv5 = fluid.layers.data(name='conv5', shape=[256, 2, 2], dtype='float32') conv5 = fluid.data(name='conv5', shape=[None, 256, 2, 2], dtype='float32')
conv6 = fluid.layers.data(name='conv6', shape=[128, 1, 1], dtype='float32') conv6 = fluid.data(name='conv6', shape=[None, 128, 1, 1], dtype='float32')
mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head( mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head(
inputs=[conv1, conv2, conv3, conv4, conv5, conv6], inputs=[conv1, conv2, conv3, conv4, conv5, conv6],
...@@ -1994,6 +2010,32 @@ def multi_box_head(inputs, ...@@ -1994,6 +2010,32 @@ def multi_box_head(inputs,
offset=0.5, offset=0.5,
flip=True, flip=True,
clip=True) clip=True)
Examples 2: set min_sizes and max_sizes:
.. code-block:: python
import paddle.fluid as fluid
images = fluid.data(name='data', shape=[None, 3, 300, 300], dtype='float32')
conv1 = fluid.data(name='conv1', shape=[None, 512, 19, 19], dtype='float32')
conv2 = fluid.data(name='conv2', shape=[None, 1024, 10, 10], dtype='float32')
conv3 = fluid.data(name='conv3', shape=[None, 512, 5, 5], dtype='float32')
conv4 = fluid.data(name='conv4', shape=[None, 256, 3, 3], dtype='float32')
conv5 = fluid.data(name='conv5', shape=[None, 256, 2, 2], dtype='float32')
conv6 = fluid.data(name='conv6', shape=[None, 128, 1, 1], dtype='float32')
mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head(
inputs=[conv1, conv2, conv3, conv4, conv5, conv6],
image=images,
num_classes=21,
min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
base_size=300,
offset=0.5,
flip=True,
clip=True)
""" """
def _reshape_with_axis_(input, axis=1): def _reshape_with_axis_(input, axis=1):
...@@ -2439,7 +2481,7 @@ def generate_proposal_labels(rpn_rois, ...@@ -2439,7 +2481,7 @@ def generate_proposal_labels(rpn_rois,
def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois, def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois,
labels_int32, num_classes, resolution): labels_int32, num_classes, resolution):
""" """
** Generate Mask Labels for Mask-RCNN ** **Generate Mask Labels for Mask-RCNN**
This operator can be, for given the RoIs and corresponding labels, This operator can be, for given the RoIs and corresponding labels,
to sample foreground RoIs. This mask branch also has to sample foreground RoIs. This mask branch also has
...@@ -2475,62 +2517,67 @@ def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois, ...@@ -2475,62 +2517,67 @@ def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois,
feeder.feed(batch_masks) feeder.feed(batch_masks)
Args: Args:
im_info(Variable): A 2-D Tensor with shape [N, 3]. N is the batch size, im_info (Variable): A 2-D Tensor with shape [N, 3] and float32
each element is [height, width, scale] of image. Image scale is data type. N is the batch size, each element is
target_size) / original_size. [height, width, scale] of image. Image scale is
gt_classes(Variable): A 2-D LoDTensor with shape [M, 1]. M is the total target_size / original_size, target_size is the size after resize,
number of ground-truth, each element is a class label. original_size is the original image size.
is_crowd(Variable): A 2-D LoDTensor with shape as gt_classes, gt_classes (Variable): A 2-D LoDTensor with shape [M, 1]. Data type
each element is a flag indicating whether a groundtruth is crowd. shoule be int. M is the total number of ground-truth, each
gt_segms(Variable): This input is a 2D LoDTensor with shape [S, 2], element is a class label.
it's LoD level is 3. Usually users do not needs to understand LoD, is_crowd (Variable): A 2-D LoDTensor with same shape and same data type
as gt_classes, each element is a flag indicating whether a
groundtruth is crowd.
gt_segms (Variable): This input is a 2D LoDTensor with shape [S, 2] and
float32 data type, it's LoD level is 3.
Usually users do not needs to understand LoD,
The users should return correct data format in reader. The users should return correct data format in reader.
The LoD[0] represents the ground-truth objects number of
The LoD[0] represents the gt objects number of
each instance. LoD[1] represents the segmentation counts of each each instance. LoD[1] represents the segmentation counts of each
objects. LoD[2] represents the polygons number of each segmentation. objects. LoD[2] represents the polygons number of each segmentation.
S the total number of polygons coordinate points. Each element is S the total number of polygons coordinate points. Each element is
(x, y) coordinate points. (x, y) coordinate points.
rois(Variable): A 2-D LoDTensor with shape [R, 4]. R is the total rois (Variable): A 2-D LoDTensor with shape [R, 4] and float32 data type
number of RoIs, each element is a bounding box with float32. R is the total number of RoIs, each element is a bounding
(xmin, ymin, xmax, ymax) format in the range of original image. box with (xmin, ymin, xmax, ymax) format in the range of original image.
labels_int32(Variable): A 2-D LoDTensor in shape of [R, 1] with type labels_int32 (Variable): A 2-D LoDTensor in shape of [R, 1] with type
of int32. R is the same as it in `rois`. Each element repersents of int32. R is the same as it in `rois`. Each element repersents
a class label of a RoI. a class label of a RoI.
num_classes(int): Class number. num_classes (int): Class number.
resolution(int): Resolution of mask predictions. resolution (int): Resolution of mask predictions.
Returns: Returns:
mask_rois (Variable): A 2D LoDTensor with shape [P, 4]. P is the total mask_rois (Variable): A 2D LoDTensor with shape [P, 4] and same data
number of sampled RoIs. Each element is a bounding box with type as `rois`. P is the total number of sampled RoIs. Each element
[xmin, ymin, xmax, ymax] format in range of orignal image size. is a bounding box with [xmin, ymin, xmax, ymax] format in range of
mask_rois_has_mask_int32 (Variable): A 2D LoDTensor with shape [P, 1], orignal image size.
each element repersents the output mask RoI index with regard to
to input RoIs. mask_rois_has_mask_int32 (Variable): A 2D LoDTensor with shape [P, 1]
mask_int32 (Variable): A 2D LoDTensor with shape [P, K * M * M], and int data type, each element repersents the output mask RoI
K is the classes number and M is the resolution of mask predictions. index with regard to input RoIs.
Each element repersents the binary mask targets.
mask_int32 (Variable): A 2D LoDTensor with shape [P, K * M * M] and int
data type, K is the classes number and M is the resolution of mask
predictions. Each element repersents the binary mask targets.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
im_info = fluid.layers.data(name="im_info", shape=[3], im_info = fluid.data(name="im_info", shape=[None, 3],
dtype="float32") dtype="float32")
gt_classes = fluid.layers.data(name="gt_classes", shape=[1], gt_classes = fluid.data(name="gt_classes", shape=[None, 1],
dtype="float32", lod_level=1) dtype="float32", lod_level=1)
is_crowd = fluid.layers.data(name="is_crowd", shape=[1], is_crowd = fluid.data(name="is_crowd", shape=[None, 1],
dtype="float32", lod_level=1) dtype="float32", lod_level=1)
gt_masks = fluid.layers.data(name="gt_masks", shape=[2], gt_masks = fluid.data(name="gt_masks", shape=[None, 2],
dtype="float32", lod_level=3) dtype="float32", lod_level=3)
# rois, roi_labels can be the output of # rois, roi_labels can be the output of
# fluid.layers.generate_proposal_labels. # fluid.layers.generate_proposal_labels.
rois = fluid.layers.data(name="rois", shape=[4], rois = fluid.data(name="rois", shape=[None, 4],
dtype="float32", lod_level=1) dtype="float32", lod_level=1)
roi_labels = fluid.layers.data(name="roi_labels", shape=[1], roi_labels = fluid.data(name="roi_labels", shape=[None, 1],
dtype="int32", lod_level=1) dtype="int32", lod_level=1)
mask_rois, mask_index, mask_int32 = fluid.layers.generate_mask_labels( mask_rois, mask_index, mask_int32 = fluid.layers.generate_mask_labels(
im_info=im_info, im_info=im_info,
......
...@@ -10510,9 +10510,19 @@ def random_crop(x, shape, seed=None): ...@@ -10510,9 +10510,19 @@ def random_crop(x, shape, seed=None):
${out_comment} ${out_comment}
Examples: Examples:
>>> import paddle.fluid as fluid .. code-block:: python
>>> img = fluid.layers.data("img", [3, 256, 256])
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224]) import paddle.fluid as fluid
img = fluid.data("img", [None, 3, 256, 256])
# cropped_img is [-1, 3, 224, 224]
cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
# cropped_img2 shape: [-1, 2, 224, 224]
# cropped_img2 = fluid.layers.random_crop(img, shape=[2, 224, 224])
# cropped_img3 shape: [-1, 3, 128, 224]
# cropped_img3 = fluid.layers.random_crop(img, shape=[128, 224])
""" """
helper = LayerHelper("random_crop", **locals()) helper = LayerHelper("random_crop", **locals())
dtype = x.dtype dtype = x.dtype
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册