未验证 提交 54df1ad4 编写于 作者: W whs 提交者: GitHub

Fix en doc of warpctc, affine_grid, generate_proposals (#20262)

上级 89c4b3dd
...@@ -177,7 +177,7 @@ paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', ...@@ -177,7 +177,7 @@ paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized',
paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', 'c1df110ea65998984f564c5c10abc54a')) paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', 'c1df110ea65998984f564c5c10abc54a'))
paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', '3720b4a386585094435993deb028b592')) paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', '3720b4a386585094435993deb028b592'))
paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e50940f3ce5a08cc477b72f517491bf3')) paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e50940f3ce5a08cc477b72f517491bf3'))
paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'input_length', 'label_length'], varargs=None, keywords=None, defaults=(0, False, None, None)), ('document', 'a5be881ada816e47ea7a6ee4396da357')) paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'input_length', 'label_length'], varargs=None, keywords=None, defaults=(0, False, None, None)), ('document', '79aaea078ddea57a82ed7906d71dedc7'))
paddle.fluid.layers.sequence_reshape (ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None), ('document', 'eeb1591cfc854c6ffdac77b376313c44')) paddle.fluid.layers.sequence_reshape (ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None), ('document', 'eeb1591cfc854c6ffdac77b376313c44'))
paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '8e72db173d4c082e27cb11f31d8c9bfa')) paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '8e72db173d4c082e27cb11f31d8c9bfa'))
paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', 'fe352915a543cec434f74e9b32ac49da')) paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', 'fe352915a543cec434f74e9b32ac49da'))
...@@ -277,7 +277,7 @@ paddle.fluid.layers.mul (ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_di ...@@ -277,7 +277,7 @@ paddle.fluid.layers.mul (ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_di
paddle.fluid.layers.sigmoid_cross_entropy_with_logits (ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False)), ('document', '7637c974f2d749d359acae9062c4d96f')) paddle.fluid.layers.sigmoid_cross_entropy_with_logits (ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False)), ('document', '7637c974f2d749d359acae9062c4d96f'))
paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '169882eb87fb693198e0153629134c22')) paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '169882eb87fb693198e0153629134c22'))
paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '26decdea9376b6b9a0d3432d82ca207b')) paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '26decdea9376b6b9a0d3432d82ca207b'))
paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'f85b263b7b6698d000977529a28f202b')) paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '315b50c1cbd9569375b098c56f1e91c9'))
paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5b32ed21ab89140a8e758002923a0da3')) paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5b32ed21ab89140a8e758002923a0da3'))
paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', '9f303c67538e468a36c5904a0a3aa110')) paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', '9f303c67538e468a36c5904a0a3aa110'))
paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '18ec2e3afeb90e70c8b73d2b71c40fdb')) paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '18ec2e3afeb90e70c8b73d2b71c40fdb'))
...@@ -416,13 +416,13 @@ paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', ...@@ -416,13 +416,13 @@ paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type',
paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'e9685f32d21bec8c013626c0254502c5')) paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'e9685f32d21bec8c013626c0254502c5'))
paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', '5485bcaceb0cde2695565a2ffd5bbd40')) paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', '5485bcaceb0cde2695565a2ffd5bbd40'))
paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '14d1eeae0f41b6792be43c1c0be0589b')) paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '14d1eeae0f41b6792be43c1c0be0589b'))
paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', '651d98d51879dfa1bc1cd40391786a41')) paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', 'd46629656b4ce9b07809e32c0482cbef'))
paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', 'fa1d1c9d5e0111684c0db705f86a2595')) paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', 'fa1d1c9d5e0111684c0db705f86a2595'))
paddle.fluid.layers.sigmoid_focal_loss (ArgSpec(args=['x', 'label', 'fg_num', 'gamma', 'alpha'], varargs=None, keywords=None, defaults=(2, 0.25)), ('document', 'aeac6aae100173b3fc7f102cf3023a3d')) paddle.fluid.layers.sigmoid_focal_loss (ArgSpec(args=['x', 'label', 'fg_num', 'gamma', 'alpha'], varargs=None, keywords=None, defaults=(2, 0.25)), ('document', 'aeac6aae100173b3fc7f102cf3023a3d'))
paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', 'd25e5e90f9a342764f32b5cd48657148')) paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', 'd25e5e90f9a342764f32b5cd48657148'))
paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', 'a82016342789ba9d85737e405f824ff1')) paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', 'a82016342789ba9d85737e405f824ff1'))
paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', '69def376b42ef0681d0cc7f53a2dac4b')) paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', 'f2342042127b536a0a16390f149f1bba'))
paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', 'b7d707822b6af2a586bce608040235b1')) paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '5cba014b41610431f8949e2d7336f1cc'))
paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'b319b10ddaf17fb4ddf03518685a17ef')) paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'b319b10ddaf17fb4ddf03518685a17ef'))
paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '72fca4a39ccf82d5c746ae62d1868a99')) paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '72fca4a39ccf82d5c746ae62d1868a99'))
paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '1d5144c3856673d05c29c752c7c8f821')) paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '1d5144c3856673d05c29c752c7c8f821'))
......
...@@ -254,70 +254,66 @@ def rpn_target_assign(bbox_pred, ...@@ -254,70 +254,66 @@ def rpn_target_assign(bbox_pred,
bbox_pred(Variable): A 3-D Tensor with shape [N, M, 4] represents the bbox_pred(Variable): A 3-D Tensor with shape [N, M, 4] represents the
predicted locations of M bounding bboxes. N is the batch size, predicted locations of M bounding bboxes. N is the batch size,
and each bounding box has four coordinate values and the layout and each bounding box has four coordinate values and the layout
is [xmin, ymin, xmax, ymax]. is [xmin, ymin, xmax, ymax]. The data type can be float32 or float64.
cls_logits(Variable): A 3-D Tensor with shape [N, M, 1] represents the cls_logits(Variable): A 3-D Tensor with shape [N, M, 1] represents the
predicted confidence predictions. N is the batch size, 1 is the predicted confidence predictions. N is the batch size, 1 is the
frontground and background sigmoid, M is number of bounding boxes. frontground and background sigmoid, M is number of bounding boxes.
The data type can be float32 or float64.
anchor_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes, anchor_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
each box is represented as [xmin, ymin, xmax, ymax], each box is represented as [xmin, ymin, xmax, ymax],
[xmin, ymin] is the left top coordinate of the anchor box, [xmin, ymin] is the left top coordinate of the anchor box,
if the input is image feature map, they are close to the origin if the input is image feature map, they are close to the origin
of the coordinate system. [xmax, ymax] is the right bottom of the coordinate system. [xmax, ymax] is the right bottom
coordinate of the anchor box. coordinate of the anchor box. The data type can be float32 or float64.
anchor_var(Variable): A 2-D Tensor with shape [M,4] holds expanded anchor_var(Variable): A 2-D Tensor with shape [M,4] holds expanded
variances of anchors. variances of anchors. The data type can be float32 or float64.
gt_boxes (Variable): The ground-truth bounding boxes (bboxes) are a 2D gt_boxes (Variable): The ground-truth bounding boxes (bboxes) are a 2D
LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth
bboxes of mini-batch input. bboxes of mini-batch input. The data type can be float32 or float64.
is_crowd (Variable): A 1-D LoDTensor which indicates groud-truth is crowd. is_crowd (Variable): A 1-D LoDTensor which indicates groud-truth is crowd.
The data type must be int32.
im_info (Variable): A 2-D LoDTensor with shape [N, 3]. N is the batch size, im_info (Variable): A 2-D LoDTensor with shape [N, 3]. N is the batch size,
3 is the height, width and scale. 3 is the height, width and scale.
rpn_batch_size_per_im(int): Total number of RPN examples per image. rpn_batch_size_per_im(int): Total number of RPN examples per image.
The data type must be int32.
rpn_straddle_thresh(float): Remove RPN anchors that go outside the image rpn_straddle_thresh(float): Remove RPN anchors that go outside the image
by straddle_thresh pixels. by straddle_thresh pixels. The data type must be float32.
rpn_fg_fraction(float): Target fraction of RoI minibatch that is labeled rpn_fg_fraction(float): Target fraction of RoI minibatch that is labeled
foreground (i.e. class > 0), 0-th class is background. foreground (i.e. class > 0), 0-th class is background. The data type must be float32.
rpn_positive_overlap(float): Minimum overlap required between an anchor rpn_positive_overlap(float): Minimum overlap required between an anchor
and ground-truth box for the (anchor, gt box) pair to be a positive and ground-truth box for the (anchor, gt box) pair to be a positive
example. example. The data type must be float32.
rpn_negative_overlap(float): Maximum overlap allowed between an anchor rpn_negative_overlap(float): Maximum overlap allowed between an anchor
and ground-truth box for the (anchor, gt box) pair to be a negative and ground-truth box for the (anchor, gt box) pair to be a negative
examples. examples. The data type must be float32.
Returns: Returns:
tuple: tuple:
A tuple(predicted_scores, predicted_location, target_label, A tuple(predicted_scores, predicted_location, target_label,
target_bbox, bbox_inside_weight) is returned. The predicted_scores target_bbox, bbox_inside_weight) is returned. The predicted_scores
and predicted_location is the predicted result of the RPN. and predicted_location is the predicted result of the RPN.
The target_label and target_bbox is the ground truth, The target_label and target_bbox is the ground truth,
respectively. The predicted_location is a 2D Tensor with shape respectively. The predicted_location is a 2D Tensor with shape
[F, 4], and the shape of target_bbox is same as the shape of [F, 4], and the shape of target_bbox is same as the shape of
the predicted_location, F is the number of the foreground the predicted_location, F is the number of the foreground
anchors. The predicted_scores is a 2D Tensor with shape anchors. The predicted_scores is a 2D Tensor with shape
[F + B, 1], and the shape of target_label is same as the shape [F + B, 1], and the shape of target_label is same as the shape
of the predicted_scores, B is the number of the background of the predicted_scores, B is the number of the background
anchors, the F and B is depends on the input of this operator. anchors, the F and B is depends on the input of this operator.
Bbox_inside_weight represents whether the predicted loc is fake_fg Bbox_inside_weight represents whether the predicted loc is fake_fg
or not and the shape is [F, 4]. or not and the shape is [F, 4].
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
bbox_pred = fluid.layers.data(name='bbox_pred', shape=[100, 4], bbox_pred = fluid.data(name='bbox_pred', shape=[None, 4], dtype='float32')
append_batch_size=False, dtype='float32') cls_logits = fluid.data(name='cls_logits', shape=[None, 1], dtype='float32')
cls_logits = fluid.layers.data(name='cls_logits', shape=[100, 1], anchor_box = fluid.data(name='anchor_box', shape=[None, 4], dtype='float32')
append_batch_size=False, dtype='float32') anchor_var = fluid.data(name='anchor_var', shape=[None, 4], dtype='float32')
anchor_box = fluid.layers.data(name='anchor_box', shape=[20, 4], gt_boxes = fluid.data(name='gt_boxes', shape=[None, 4], dtype='float32')
append_batch_size=False, dtype='float32') is_crowd = fluid.data(name='is_crowd', shape=[None], dtype='float32')
anchor_var = fluid.layers.data(name='anchor_var', shape=[20, 4], im_info = fluid.data(name='im_infoss', shape=[None, 3], dtype='float32')
append_batch_size=False, dtype='float32')
gt_boxes = fluid.layers.data(name='gt_boxes', shape=[10, 4],
append_batch_size=False, dtype='float32')
is_crowd = fluid.layers.data(name='is_crowd', shape=[1],
append_batch_size=False, dtype='float32')
im_info = fluid.layers.data(name='im_infoss', shape=[1, 3],
append_batch_size=False, dtype='float32')
loc, score, loc_target, score_target, inside_weight = fluid.layers.rpn_target_assign( loc, score, loc_target, score_target, inside_weight = fluid.layers.rpn_target_assign(
bbox_pred, cls_logits, anchor_box, anchor_var, gt_boxes, is_crowd, im_info) bbox_pred, cls_logits, anchor_box, anchor_var, gt_boxes, is_crowd, im_info)
...@@ -2217,8 +2213,7 @@ def generate_proposal_labels(rpn_rois, ...@@ -2217,8 +2213,7 @@ def generate_proposal_labels(rpn_rois,
is_cls_agnostic=False, is_cls_agnostic=False,
is_cascade_rcnn=False): is_cascade_rcnn=False):
""" """
**Generate Proposal Labels of Faster-RCNN**
** Generate Proposal Labels of Faster-RCNN **
This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth, This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
to sample foreground boxes and background boxes, and compute loss target. to sample foreground boxes and background boxes, and compute loss target.
...@@ -2236,37 +2231,43 @@ def generate_proposal_labels(rpn_rois, ...@@ -2236,37 +2231,43 @@ def generate_proposal_labels(rpn_rois,
Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss. Finally BboxInsideWeights and BboxOutsideWeights are used to specify whether it would contribute to training loss.
Args: Args:
rpn_rois(Variable): A 2-D LoDTensor with shape [N, 4]. N is the number of the GenerateProposalOp's output, each element is a bounding box with [xmin, ymin, xmax, ymax] format. rpn_rois(Variable): A 2-D LoDTensor with shape [N, 4]. N is the number of the GenerateProposalOp's output, each element is a bounding box with [xmin, ymin, xmax, ymax] format. The data type can be float32 or float64.
gt_classes(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a class label of groundtruth. gt_classes(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a class label of groundtruth. The data type must be int32.
is_crowd(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a flag indicates whether a groundtruth is crowd. is_crowd(Variable): A 2-D LoDTensor with shape [M, 1]. M is the number of groundtruth, each element is a flag indicates whether a groundtruth is crowd. The data type must be int32.
gt_boxes(Variable): A 2-D LoDTensor with shape [M, 4]. M is the number of groundtruth, each element is a bounding box with [xmin, ymin, xmax, ymax] format. gt_boxes(Variable): A 2-D LoDTensor with shape [M, 4]. M is the number of groundtruth, each element is a bounding box with [xmin, ymin, xmax, ymax] format.
im_info(Variable): A 2-D LoDTensor with shape [B, 3]. B is the number of input images, each element consists of im_height, im_width, im_scale. im_info(Variable): A 2-D LoDTensor with shape [B, 3]. B is the number of input images, each element consists of im_height, im_width, im_scale.
batch_size_per_im(int): Batch size of rois per images. batch_size_per_im(int): Batch size of rois per images. The data type must be int32.
fg_fraction(float): Foreground fraction in total batch_size_per_im. fg_fraction(float): Foreground fraction in total batch_size_per_im. The data type must be float32.
fg_thresh(float): Overlap threshold which is used to chose foreground sample. fg_thresh(float): Overlap threshold which is used to chose foreground sample. The data type must be float32.
bg_thresh_hi(float): Overlap threshold upper bound which is used to chose background sample. bg_thresh_hi(float): Overlap threshold upper bound which is used to chose background sample. The data type must be float32.
bg_thresh_lo(float): Overlap threshold lower bound which is used to chose background sample. bg_thresh_lo(float): Overlap threshold lower bound which is used to chose background sample. The data type must be float32.
bbox_reg_weights(list|tuple): Box regression weights. bbox_reg_weights(list|tuple): Box regression weights. The data type must be float32.
class_nums(int): Class number. class_nums(int): Class number. The data type must be int32.
use_random(bool): Use random sampling to choose foreground and background boxes. use_random(bool): Use random sampling to choose foreground and background boxes.
is_cls_agnostic(bool): bbox regression use class agnostic simply which only represent fg and bg boxes. is_cls_agnostic(bool): bbox regression use class agnostic simply which only represent fg and bg boxes.
is_cascade_rcnn(bool): it will filter some bbox crossing the image's boundary when setting True. is_cascade_rcnn(bool): it will filter some bbox crossing the image's boundary when setting True.
Returns:
tuple:
A tuple with format``(rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights)``.
- **rois**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4]``. The data type is the same as ``rpn_rois``.
- **labels_int32**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 1]``. The data type must be int32.
- **bbox_targets**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The regression targets of all RoIs. The data type is the same as ``rpn_rois``.
- **bbox_inside_weights**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The weights of foreground boxes' regression loss. The data type is the same as ``rpn_rois``.
- **bbox_outside_weights**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The weights of regression loss. The data type is the same as ``rpn_rois``.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
rpn_rois = fluid.layers.data(name='rpn_rois', shape=[2, 4], rpn_rois = fluid.data(name='rpn_rois', shape=[None, 4], dtype='float32')
append_batch_size=False, dtype='float32') gt_classes = fluid.data(name='gt_classes', shape=[None, 1], dtype='float32')
gt_classes = fluid.layers.data(name='gt_classes', shape=[8, 1], is_crowd = fluid.data(name='is_crowd', shape=[None, 1], dtype='float32')
append_batch_size=False, dtype='float32') gt_boxes = fluid.data(name='gt_boxes', shape=[None, 4], dtype='float32')
is_crowd = fluid.layers.data(name='is_crowd', shape=[8, 1], im_info = fluid.data(name='im_info', shape=[None, 3], dtype='float32')
append_batch_size=False, dtype='float32')
gt_boxes = fluid.layers.data(name='gt_boxes', shape=[8, 4],
append_batch_size=False, dtype='float32')
im_info = fluid.layers.data(name='im_info', shape=[10, 3],
append_batch_size=False, dtype='float32')
rois, labels, bbox, inside_weights, outside_weights = fluid.layers.generate_proposal_labels( rois, labels, bbox, inside_weights, outside_weights = fluid.layers.generate_proposal_labels(
rpn_rois, gt_classes, is_crowd, gt_boxes, im_info, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
class_nums=10) class_nums=10)
...@@ -2496,44 +2497,47 @@ def generate_proposals(scores, ...@@ -2496,44 +2497,47 @@ def generate_proposals(scores,
scores(Variable): A 4-D Tensor with shape [N, A, H, W] represents scores(Variable): A 4-D Tensor with shape [N, A, H, W] represents
the probability for each box to be an object. the probability for each box to be an object.
N is batch size, A is number of anchors, H and W are height and N is batch size, A is number of anchors, H and W are height and
width of the feature map. width of the feature map. The data type must be float32.
bbox_deltas(Variable): A 4-D Tensor with shape [N, 4*A, H, W] bbox_deltas(Variable): A 4-D Tensor with shape [N, 4*A, H, W]
represents the differece between predicted box locatoin and represents the differece between predicted box locatoin and
anchor location. anchor location. The data type must be float32.
im_info(Variable): A 2-D Tensor with shape [N, 3] represents origin im_info(Variable): A 2-D Tensor with shape [N, 3] represents origin
image information for N batch. Info contains height, width and scale image information for N batch. Info contains height, width and scale
between origin image size and the size of feature map. between origin image size and the size of feature map.
The data type must be int32.
anchors(Variable): A 4-D Tensor represents the anchors with a layout anchors(Variable): A 4-D Tensor represents the anchors with a layout
of [H, W, A, 4]. H and W are height and width of the feature map, of [H, W, A, 4]. H and W are height and width of the feature map,
num_anchors is the box count of each position. Each anchor is num_anchors is the box count of each position. Each anchor is
in (xmin, ymin, xmax, ymax) format an unnormalized. in (xmin, ymin, xmax, ymax) format an unnormalized. The data type must be float32.
variances(Variable): The expanded variances of anchors with a layout of variances(Variable): A 4-D Tensor. The expanded variances of anchors with a layout of
[H, W, num_priors, 4]. Each variance is in [H, W, num_priors, 4]. Each variance is in
(xcenter, ycenter, w, h) format. (xcenter, ycenter, w, h) format. The data type must be float32.
pre_nms_top_n(float): Number of total bboxes to be kept per pre_nms_top_n(float): Number of total bboxes to be kept per
image before NMS. 6000 by default. image before NMS. The data type must be float32. `6000` by default.
post_nms_top_n(float): Number of total bboxes to be kept per post_nms_top_n(float): Number of total bboxes to be kept per
image after NMS. 1000 by default. image after NMS. The data type must be float32. `1000` by default.
nms_thresh(float): Threshold in NMS, 0.5 by default. nms_thresh(float): Threshold in NMS. The data type must be float32. `0.5` by default.
min_size(float): Remove predicted boxes with either height or min_size(float): Remove predicted boxes with either height or
width < min_size. 0.1 by default. width < min_size. The data type must be float32. `0.1` by default.
eta(float): Apply in adaptive NMS, if adaptive threshold > 0.5, eta(float): Apply in adaptive NMS, if adaptive `threshold > 0.5`,
adaptive_threshold = adaptive_threshold * eta in each iteration. `adaptive_threshold = adaptive_threshold * eta` in each iteration.
Returns:
tuple:
A tuple with format ``(rpn_rois, rpn_roi_probs)``.
- **rpn_rois**: The generated RoIs. 2-D Tensor with shape ``[N, 4]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``.
- **rpn_roi_probs**: The scores of generated RoIs. 2-D Tensor with shape ``[N, 1]`` while ``N`` is the number of RoIs. The data type is the same as ``scores``.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
scores = fluid.layers.data(name='scores', shape=[2, 4, 5, 5], scores = fluid.data(name='scores', shape=[None, 4, 5, 5], dtype='float32')
append_batch_size=False, dtype='float32') bbox_deltas = fluid.data(name='bbox_deltas', shape=[None, 16, 5, 5], dtype='float32')
bbox_deltas = fluid.layers.data(name='bbox_deltas', shape=[2, 16, 5, 5], im_info = fluid.data(name='im_info', shape=[None, 3], dtype='float32')
append_batch_size=False, dtype='float32') anchors = fluid.data(name='anchors', shape=[None, 5, 4, 4], dtype='float32')
im_info = fluid.layers.data(name='im_info', shape=[2, 3], variances = fluid.data(name='variances', shape=[None, 5, 10, 4], dtype='float32')
append_batch_size=False, dtype='float32')
anchors = fluid.layers.data(name='anchors', shape=[5, 5, 4, 4],
append_batch_size=False, dtype='float32')
variances = fluid.layers.data(name='variances', shape=[5, 5, 10, 4],
append_batch_size=False, dtype='float32')
rois, roi_probs = fluid.layers.generate_proposals(scores, bbox_deltas, rois, roi_probs = fluid.layers.generate_proposals(scores, bbox_deltas,
im_info, anchors, variances) im_info, anchors, variances)
......
...@@ -6761,7 +6761,7 @@ def warpctc(input, ...@@ -6761,7 +6761,7 @@ def warpctc(input,
(https://github.com/baidu-research/warp-ctc) (https://github.com/baidu-research/warp-ctc)
to compute Connectionist Temporal Classification (CTC) loss. to compute Connectionist Temporal Classification (CTC) loss.
It can be aliased as softmax with CTC, since a native softmax activation is It can be aliased as softmax with CTC, since a native softmax activation is
interated to the Warp-CTC library, to to normlize values for each row of the interated to the Warp-CTC library to normlize values for each row of the
input tensor. input tensor.
Args: Args:
...@@ -6773,14 +6773,15 @@ def warpctc(input, ...@@ -6773,14 +6773,15 @@ def warpctc(input,
(not including the blank label). When it is a 3-D Tensor, it's shape (not including the blank label). When it is a 3-D Tensor, it's shape
is [max_logit_length, batch_size, num_classes + 1], is [max_logit_length, batch_size, num_classes + 1],
where max_logit_length is the length of the longest where max_logit_length is the length of the longest
input logit sequence. input logit sequence. The data type must be float32.
label (Variable): The ground truth of variable-length sequence, label (Variable): The ground truth of variable-length sequence,
which is a 2-D Tensor with LoD information or a 2-D Tensor without which is a 2-D Tensor with LoD information or a 2-D Tensor without
LoD information. When it is a 2-D LoDTensor or 2-D Tensor, LoD information. When it is a 2-D LoDTensor or 2-D Tensor,
it is of the shape [Lg, 1], where Lg is th sum of all labels' length. it is of the shape [Lg, 1], where Lg is th sum of all labels' length.
The data type must be int32.
blank (int, default 0): The blank label index of Connectionist blank (int, default 0): The blank label index of Connectionist
Temporal Classification (CTC) loss, which is in the Temporal Classification (CTC) loss, which is in the
half-opened interval [0, num_classes + 1). half-opened interval [0, num_classes + 1). The data type must be int32.
norm_by_times(bool, default false): Whether to normalize the gradients norm_by_times(bool, default false): Whether to normalize the gradients
by the number of time-step, which is also the sequence's length. by the number of time-step, which is also the sequence's length.
There is no need to normalize the gradients if warpctc layer was There is no need to normalize the gradients if warpctc layer was
...@@ -6792,40 +6793,72 @@ def warpctc(input, ...@@ -6792,40 +6793,72 @@ def warpctc(input,
Returns: Returns:
Variable: The Connectionist Temporal Classification (CTC) loss, Variable: The Connectionist Temporal Classification (CTC) loss,
which is a 2-D Tensor of the shape [batch_size, 1]. which is a 2-D Tensor with the shape [batch_size, 1].
The date type is the same as input.
Examples: Examples:
.. code-block:: python .. code-block:: python
# using LoDTensor # using LoDTensor
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
label = fluid.layers.data(name='label', shape=[12, 1], predict = fluid.data(name='predict',
dtype='float32', lod_level=1) shape=[None, 5],
predict = fluid.layers.data(name='predict',
shape=[11, 8],
dtype='float32',lod_level=1) dtype='float32',lod_level=1)
label = fluid.data(name='label', shape=[None, 1],
dtype='int32', lod_level=1)
cost = fluid.layers.warpctc(input=predict, label=label) cost = fluid.layers.warpctc(input=predict, label=label)
place = fluid.CPUPlace()
x=fluid.LoDTensor()
data = np.random.rand(8, 5).astype("float32")
x.set(data, place)
x.set_lod([[0,4,8]])
y=fluid.LoDTensor()
data = np.random.randint(0, 5, [4, 1]).astype("int32")
y.set(data, place)
y.set_lod([[0,2,4]])
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output= exe.run(feed={"predict": x,"label": y},
fetch_list=[cost.name])
print output
.. code-block:: python
# using Tensor # using Tensor
input_length = fluid.layers.data(name='logits_length', shape=[11], import paddle.fluid as fluid
dtype='int64') import numpy as np
label_length = fluid.layers.data(name='labels_length', shape=[12],
dtype='int64')
target = fluid.layers.data(name='target', shape=[12, 1],
dtype='int32')
# length of the longest logit sequence # length of the longest logit sequence
max_seq_length = 4 max_seq_length = 5
# number of logit sequences # number of logit sequences
batch_size = 4 batch_size = None
output = fluid.layers.data(name='output', logits = fluid.data(name='logits',
shape=[max_seq_length, batch_size, 8], shape=[max_seq_length, batch_size, 5],
dtype='float32') dtype='float32')
loss = fluid.layers.warpctc(input=output,label=target, logits_length = fluid.data(name='logits_length', shape=[None],
input_length=input_length, dtype='int64')
label = fluid.layers.data(name='label', shape=[None, 1],
dtype='int32')
label_length = fluid.layers.data(name='labels_length', shape=[None],
dtype='int64')
cost = fluid.layers.warpctc(input=logits, label=label,
input_length=logits_length,
label_length=label_length) label_length=label_length)
place = fluid.CPUPlace()
batch_size = 2
x = np.random.rand(max_seq_length, batch_size, 5).astype("float32")
y = np.random.randint(0, 5, [max_seq_length * batch_size, 1]).astype("int32")
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output= exe.run(feed={"logits": x,
"label": y,
"logits_length": np.array([5, 4]).astype("int64"),
"labels_length": np.array([3, 2]).astype("int64")},
fetch_list=[cost.name])
print(output)
""" """
helper = LayerHelper('warpctc', **locals()) helper = LayerHelper('warpctc', **locals())
this_inputs = {'Logits': [input], 'Label': [label]} this_inputs = {'Logits': [input], 'Label': [label]}
...@@ -10643,77 +10676,16 @@ def affine_grid(theta, out_shape, name=None): ...@@ -10643,77 +10676,16 @@ def affine_grid(theta, out_shape, name=None):
the input feature map should be sampled to produce the transformed the input feature map should be sampled to produce the transformed
output feature map. output feature map.
.. code-block:: text Args:
theta (Variable) - A Tensor with shape [N, 2, 3]. It contains a batch of affine transform parameters.
* Case 1: The data type can be float32 or float64.
out_shape (Variable | list | tuple): The shape of target output with format [batch_size, channel, height, width].
Given: ``out_shape`` can be a Tensor or a list or tuple. The data
type must be int32.
theta = [[[x_11, x_12, x_13] name(str|None): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`.
[x_14, x_15, x_16]]
[[x_21, x_22, x_23]
[x_24, x_25, x_26]]]
out_shape = [2, 3, 5, 5]
Step 1:
Generate normalized coordinates according to out_shape.
The values of the normalized coordinates are in the interval between -1 and 1.
The shape of the normalized coordinates is [2, H, W] as below:
C = [[[-1. -1. -1. -1. -1. ]
[-0.5 -0.5 -0.5 -0.5 -0.5]
[ 0. 0. 0. 0. 0. ]
[ 0.5 0.5 0.5 0.5 0.5]
[ 1. 1. 1. 1. 1. ]]
[[-1. -0.5 0. 0.5 1. ]
[-1. -0.5 0. 0.5 1. ]
[-1. -0.5 0. 0.5 1. ]
[-1. -0.5 0. 0.5 1. ]
[-1. -0.5 0. 0.5 1. ]]]
C[0] is the coordinates in height axis and C[1] is the coordinates in width axis.
Step2:
Tanspose and reshape C to shape [H * W, 2] and append ones to last dimension. The we get:
C_ = [[-1. -1. 1. ]
[-0.5 -1. 1. ]
[ 0. -1. 1. ]
[ 0.5 -1. 1. ]
[ 1. -1. 1. ]
[-1. -0.5 1. ]
[-0.5 -0.5 1. ]
[ 0. -0.5 1. ]
[ 0.5 -0.5 1. ]
[ 1. -0.5 1. ]
[-1. 0. 1. ]
[-0.5 0. 1. ]
[ 0. 0. 1. ]
[ 0.5 0. 1. ]
[ 1. 0. 1. ]
[-1. 0.5 1. ]
[-0.5 0.5 1. ]
[ 0. 0.5 1. ]
[ 0.5 0.5 1. ]
[ 1. 0.5 1. ]
[-1. 1. 1. ]
[-0.5 1. 1. ]
[ 0. 1. 1. ]
[ 0.5 1. 1. ]
[ 1. 1. 1. ]]
Step3:
Compute output by equation $$Output[i] = C_ * Theta[i]^T$$
Args:
theta (Variable): A batch of affine transform parameters with shape [N, 2, 3].
out_shape (Variable | list | tuple): The shape of target output with format [N, C, H, W].
``out_shape`` can be a Variable or a list or tuple.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns: Returns:
Variable: The output with shape [N, H, W, 2]. Variable: A Tensor with shape [batch_size, H, W, 2] while 'H' and 'W' are the height and width of feature map in affine transformation. The data type is the same as `theta`.
Raises: Raises:
ValueError: If the type of arguments is not supported. ValueError: If the type of arguments is not supported.
...@@ -10723,13 +10695,20 @@ def affine_grid(theta, out_shape, name=None): ...@@ -10723,13 +10695,20 @@ def affine_grid(theta, out_shape, name=None):
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.fluid as fluid
theta = fluid.layers.data(name="x", shape=[2, 3], dtype="float32") import numpy as np
out_shape = fluid.layers.data(name="y", shape=[-1], dtype="float32") place = fluid.CPUPlace()
data = fluid.layers.affine_grid(theta, out_shape) theta = fluid.data(name="x", shape=[None, 2, 3], dtype="float32")
out_shape = fluid.data(name="y", shape=[4], dtype="int32")
# or grid_0 = fluid.layers.affine_grid(theta, out_shape)
data = fluid.layers.affine_grid(theta, [5, 3, 28, 28]) grid_1 = fluid.layers.affine_grid(theta, [5, 3, 28, 28])
batch_size=2
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output= exe.run(feed={"x": np.random.rand(batch_size,2,3).astype("float32"),
"y": np.array([5, 3, 28, 28]).astype("int32")},
fetch_list=[grid_0.name, grid_1.name])
print(output[0])
print(output[1])
""" """
helper = LayerHelper('affine_grid') helper = LayerHelper('affine_grid')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册