From da892cafd5cc0fd711b744a9f01d4b03d0d7b410 Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Sun, 29 Sep 2019 10:33:25 +0800 Subject: [PATCH] Refine api doc (#20037) * refine doc, test=document_fix * add API.spec,test=develop,test=document_fix --- paddle/fluid/API.spec | 22 +- paddle/fluid/operators/clip_by_norm_op.h | 5 +- paddle/fluid/operators/maxout_op.cc | 8 +- paddle/fluid/operators/roi_align_op.cc | 5 +- python/paddle/fluid/layers/detection.py | 306 +++++++++++++---------- python/paddle/fluid/layers/nn.py | 56 +++-- 6 files changed, 239 insertions(+), 163 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 6166a3e5a08..45cf21c1394 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -204,7 +204,7 @@ paddle.fluid.layers.pad (ArgSpec(args=['x', 'paddings', 'pad_value', 'name'], va paddle.fluid.layers.pad_constant_like (ArgSpec(args=['x', 'y', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0.0, None)), ('document', '95aa1972983f30fe9b5a3713e523e20f')) paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 0.1, 'float32', None)), ('document', '214f1dfbe95a628600bbe99e836319cf')) paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', '49368d724023a66b41b0071be41c0ba5')) -paddle.fluid.layers.roi_align (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)), ('document', '9a7a3b88a4fae41d58d3ca9b10ba0591')) +paddle.fluid.layers.roi_align (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)), ('document', 'dc2e2fa3d6e3d30de0a81e8ee70de733')) paddle.fluid.layers.dice_loss (ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)), ('document', '7e8e4bf1f0f8612961ed113e8af8f0c5')) paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1, 'NCHW')), ('document', 'd29d829607b5ff12924197a3ba296c89')) paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)), ('document', 'bd97ebfe4bdf5110a5fcb8ecb626a447')) @@ -232,7 +232,7 @@ paddle.fluid.layers.pow (ArgSpec(args=['x', 'factor', 'name'], varargs=None, key paddle.fluid.layers.stanh (ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None)), ('document', '1e1efad868714425da15c785dfb533a1')) paddle.fluid.layers.hard_sigmoid (ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None)), ('document', '607d79ca873bee40eed1c79a96611591')) paddle.fluid.layers.swish (ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'e0dc7bc66cba939033bc028d7a62c5f4')) -paddle.fluid.layers.prelu (ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '2da40e447716338affebfe058d05d9a9')) +paddle.fluid.layers.prelu (ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '1fadca6622c70bd33cc260817f4ff191')) paddle.fluid.layers.brelu (ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None)), ('document', '49580538249a52c857fce75c94ad8af7')) paddle.fluid.layers.leaky_relu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None)), ('document', '1eb3009c69060299ec87949ee0d4b9ae')) paddle.fluid.layers.soft_relu (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None)), ('document', '6455afd2498b00198f53f83d63d6c6a4')) @@ -271,11 +271,11 @@ paddle.fluid.layers.logical_or (ArgSpec(args=['x', 'y', 'out', 'name'], varargs= paddle.fluid.layers.logical_xor (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '77ccf37b710c507dd97e03f08ce8bb29')) paddle.fluid.layers.logical_not (ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6e2fe8a322ec69811f6507d22acf8f9f')) paddle.fluid.layers.clip (ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0ce33756573c572da67302499455dbcd')) -paddle.fluid.layers.clip_by_norm (ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '99a1b9012d9c4495efc89d69958c3be7')) +paddle.fluid.layers.clip_by_norm (ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1fc6e217c7a6128df31b806c1a8067ff')) paddle.fluid.layers.mean (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '597257fb94d0597c404a6a5c91ab5258')) paddle.fluid.layers.mul (ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None)), ('document', '784b7e36cea88493f9e37a41b10fbf4d')) paddle.fluid.layers.sigmoid_cross_entropy_with_logits (ArgSpec(args=['x', 'label', 'ignore_index', 'name', 'normalize'], varargs=None, keywords=None, defaults=(-100, None, False)), ('document', '7637c974f2d749d359acae9062c4d96f')) -paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '22df6542f3f9aa3f34c0c2dab5dc1d80')) +paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '169882eb87fb693198e0153629134c22')) paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '26decdea9376b6b9a0d3432d82ca207b')) paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'f85b263b7b6698d000977529a28f202b')) paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65c8362e48810b8226e311c5d046db51')) @@ -412,30 +412,30 @@ paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=N paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '0fdf82762fd0a5acb2578a72771b5b44')) paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', '7a484a0da5e993a7734867a3dfa86571')) paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', 'fd58078fdfffd899b91f992ba224628f')) -paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '080ce0d54d3f1950ad5a3a8e5ae529e9')) +paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '409c351dee8a4a4ea02771dc691b49cb')) paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'e9685f32d21bec8c013626c0254502c5')) paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', '5485bcaceb0cde2695565a2ffd5bbd40')) paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '14d1eeae0f41b6792be43c1c0be0589b')) paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', '651d98d51879dfa1bc1cd40391786a41')) paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', 'fa1d1c9d5e0111684c0db705f86a2595')) paddle.fluid.layers.sigmoid_focal_loss (ArgSpec(args=['x', 'label', 'fg_num', 'gamma', 'alpha'], varargs=None, keywords=None, defaults=(2, 0.25)), ('document', 'aeac6aae100173b3fc7f102cf3023a3d')) -paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', '0aaacaf9858b8270a8ab5b0aacdd94b7')) +paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', 'd25e5e90f9a342764f32b5cd48657148')) paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', 'a82016342789ba9d85737e405f824ff1')) paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', '69def376b42ef0681d0cc7f53a2dac4b')) paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', 'b7d707822b6af2a586bce608040235b1')) paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'b319b10ddaf17fb4ddf03518685a17ef')) paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '72fca4a39ccf82d5c746ae62d1868a99')) -paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '4c6225fc1a1c0b84955a8f0013008243')) +paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '1d5144c3856673d05c29c752c7c8f821')) paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e308ce1661cb722b220a6f482f85b9e4')) paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gt_box', 'gt_label', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'gt_score', 'use_label_smooth', 'name'], varargs=None, keywords=None, defaults=(None, True, None)), ('document', '400403175718d5a632402cdae88b01b8')) paddle.fluid.layers.yolo_box (ArgSpec(args=['x', 'img_size', 'anchors', 'class_num', 'conf_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ed56ff21536ca5c8ad418d0cfaf6a7b9')) -paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9ddee76cb808db83768bf68010e39b2b')) +paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '882c99ed2adad54f612a40275b881850')) paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'f6e333d76922c6e564413b4d216c245c')) paddle.fluid.layers.multiclass_nms2 (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'return_index', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, False, None)), ('document', 'be156186ee7a2ee56ab30b964acb15e5')) paddle.fluid.layers.retinanet_detection_output (ArgSpec(args=['bboxes', 'scores', 'anchors', 'im_info', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0.05, 1000, 100, 0.3, 1.0)), ('document', '078d28607ce261a0cba2b965a79f6bb8')) -paddle.fluid.layers.distribute_fpn_proposals (ArgSpec(args=['fpn_rois', 'min_level', 'max_level', 'refer_level', 'refer_scale', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6c023b9401214ae387a8b2d92638e5e4')) -paddle.fluid.layers.box_decoder_and_assign (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'box_score', 'box_clip', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3619a7847709f5868f5e929065947b38')) -paddle.fluid.layers.collect_fpn_proposals (ArgSpec(args=['multi_rois', 'multi_scores', 'min_level', 'max_level', 'post_nms_top_n', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '80a75103e001ca1ba056fbbe0c6a19f3')) +paddle.fluid.layers.distribute_fpn_proposals (ArgSpec(args=['fpn_rois', 'min_level', 'max_level', 'refer_level', 'refer_scale', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'be432c9b5f19ccba7aca38789ead29e4')) +paddle.fluid.layers.box_decoder_and_assign (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'box_score', 'box_clip', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5203935538d06a6d47b8630ad80cb2b0')) +paddle.fluid.layers.collect_fpn_proposals (ArgSpec(args=['multi_rois', 'multi_scores', 'min_level', 'max_level', 'post_nms_top_n', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '808fcca082e0040e2b77dbc53a0cf9d5')) paddle.fluid.layers.accuracy (ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)), ('document', 'ef799022a6040597462ae2b3d2f1c407')) paddle.fluid.layers.auc (ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)), ('document', '34b4575807f955f7e8698b8dead23858')) paddle.fluid.layers.exponential_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)), ('document', 'eaf430c5a0380fb11bfe9a8922cd6295')) diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h index b35e9c72c33..90265259c95 100644 --- a/paddle/fluid/operators/clip_by_norm_op.h +++ b/paddle/fluid/operators/clip_by_norm_op.h @@ -105,10 +105,11 @@ class ClipByNormOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", - "(Tensor) The input of clip_by_norm op." + "(Tensor) The input of clip_by_norm op and data type is float32." "The number of dimensions must be between [1, 9]."); AddOutput("Out", - "(Tensor) The output of clip_by_norm op with shape as input(X)"); + "(Tensor) The output of clip_by_norm op with shape as input(X)" + "The data type is float32."); AddAttr("max_norm", "(float) The maximum norm value."); AddComment(R"DOC( ClipByNorm Operator. diff --git a/paddle/fluid/operators/maxout_op.cc b/paddle/fluid/operators/maxout_op.cc index 078d7bade7e..c05c1a282c2 100644 --- a/paddle/fluid/operators/maxout_op.cc +++ b/paddle/fluid/operators/maxout_op.cc @@ -25,11 +25,13 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput( "X", - "(Tensor) The input tensor of maxout operator. " - "The format of input tensor is NCHW. Where N is batch size, C is the " - "number of channels, H and W is the height and width of feature."); + "(Tensor) The input tensor of maxout operator with data type of " + "float32. The format of input tensor is NCHW. Where N is batch size," + " C is the number of channels, H and W is the height and width of " + "feature."); AddOutput("Out", "(Tensor) The output tensor of maxout operator." + "The data type is float32." "The format of output tensor is also NCHW." "Where N is batch size, C is " "the number of channels, H and W is the height and " diff --git a/paddle/fluid/operators/roi_align_op.cc b/paddle/fluid/operators/roi_align_op.cc index 21c3dd27f02..0914ad81c77 100644 --- a/paddle/fluid/operators/roi_align_op.cc +++ b/paddle/fluid/operators/roi_align_op.cc @@ -95,7 +95,7 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "(Tensor), " - "The input of ROIAlignOp. " + "The input of ROIAlignOp. The data type is float32 or float64." "The format of input tensor is NCHW. Where N is batch size, " "C is the number of input channels, " "H is the height of the feature, and " @@ -110,7 +110,8 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Out", "(Tensor), " "The output of ROIAlignOp is a 4-D tensor with shape " - "(num_rois, channels, pooled_h, pooled_w)."); + "(num_rois, channels, pooled_h, pooled_w). The data type is " + "float32 or float64."); AddAttr("spatial_scale", "(float, default 1.0), " "Multiplicative spatial scale factor " diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index aa6c7ed0b8c..1b606da0067 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -672,64 +672,78 @@ def box_coder(prior_box, Args: prior_box(Variable): Box list prior_box is a 2-D Tensor with shape - [M, 4] holds M boxes, each box is represented as - [xmin, ymin, xmax, ymax], [xmin, ymin] is the - left top coordinate of the anchor box, if the - input is image feature map, they are close to - the origin of the coordinate system. [xmax, ymax] - is the right bottom coordinate of the anchor box. - prior_box_var(Variable|list|None): prior_box_var supports two types - of input. One is variable with shape [M, 4] - holds M group. The other one is list consist of - 4 elements shared by all boxes. + [M, 4] holds M boxes and data type is float32 or float64. Each box + is represented as [xmin, ymin, xmax, ymax], [xmin, ymin] is the + left top coordinate of the anchor box, if the input is image feature + map, they are close to the origin of the coordinate system. + [xmax, ymax] is the right bottom coordinate of the anchor box. + prior_box_var(List|Variable|None): prior_box_var supports three types + of input. One is variable with shape [M, 4] which holds M group and + data type is float32 or float64. The second is list consist of + 4 elements shared by all boxes and data type is float32 or float64. + Other is None and not involved in calculation. target_box(Variable): This input can be a 2-D LoDTensor with shape - [N, 4] when code_type is 'encode_center_size'. - This input also can be a 3-D Tensor with shape - [N, M, 4] when code_type is 'decode_center_size'. - Each box is represented as - [xmin, ymin, xmax, ymax]. This tensor can - contain LoD information to represent a batch - of inputs. - code_type(string): The code type used with the target box. It can be - encode_center_size or decode_center_size - box_normalized(int): Whether treat the priorbox as a noramlized box. - Set true by default. - name(string): The name of box coder. + [N, 4] when code_type is 'encode_center_size'. This input also can + be a 3-D Tensor with shape [N, M, 4] when code_type is + 'decode_center_size'. Each box is represented as + [xmin, ymin, xmax, ymax]. The data type is float32 or float64. + This tensor can contain LoD information to represent a batch of inputs. + code_type(str): The code type used with the target box. It can be + `encode_center_size` or `decode_center_size`. `encode_center_size` + by default. + box_normalized(bool): Whether treat the priorbox as a noramlized box. + Set true by default. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. axis(int): Which axis in PriorBox to broadcast for box decode, - for example, if axis is 0 and TargetBox has shape - [N, M, 4] and PriorBox has shape [M, 4], then PriorBox - will broadcast to [N, M, 4] for decoding. It is only valid - when code type is decode_center_size. Set 0 by default. + for example, if axis is 0 and TargetBox has shape [N, M, 4] and + PriorBox has shape [M, 4], then PriorBox will broadcast to [N, M, 4] + for decoding. It is only valid when code type is + `decode_center_size`. Set 0 by default. Returns: + Variable: + output_box(Variable): When code_type is 'encode_center_size', the - output tensor of box_coder_op with shape - [N, M, 4] representing the result of N target - boxes encoded with M Prior boxes and variances. - When code_type is 'decode_center_size', - N represents the batch size and M represents - the number of deocded boxes. + output tensor of box_coder_op with shape [N, M, 4] representing the + result of N target boxes encoded with M Prior boxes and variances. + When code_type is 'decode_center_size', N represents the batch size + and M represents the number of deocded boxes. Examples: .. code-block:: python import paddle.fluid as fluid - prior_box = fluid.layers.data(name='prior_box', - shape=[512, 4], - dtype='float32', - append_batch_size=False) - target_box = fluid.layers.data(name='target_box', - shape=[512,81,4], - dtype='float32', - append_batch_size=False) - output = fluid.layers.box_coder(prior_box=prior_box, - prior_box_var=[0.1,0.1,0.2,0.2], - target_box=target_box, - code_type="decode_center_size", - box_normalized=False, - axis=1) - + # For encode + prior_box_encode = fluid.layers.data(name='prior_box_encode', + shape=[512, 4], + dtype='float32', + append_batch_size=False) + target_box_encode = fluid.layers.data(name='target_box_encode', + shape=[81,4], + dtype='float32', + append_batch_size=False) + output_encode = fluid.layers.box_coder(prior_box=prior_box_encode, + prior_box_var=[0.1,0.1,0.2,0.2], + target_box=target_box_encode, + code_type="encode_center_size") + # For decode + prior_box_decode = fluid.layers.data(name='prior_box_decode', + shape=[512, 4], + dtype='float32', + append_batch_size=False) + target_box_decode = fluid.layers.data(name='target_box_decode', + shape=[512,81,4], + dtype='float32', + append_batch_size=False) + output_decode = fluid.layers.box_coder(prior_box=prior_box_decode, + prior_box_var=[0.1,0.1,0.2,0.2], + target_box=target_box_decode, + code_type="decode_center_size", + box_normalized=False, + axis=1) """ helper = LayerHelper("box_coder", **locals()) @@ -1105,7 +1119,7 @@ def bipartite_match(dist_matrix, also can find the matched row for each column. And this operator only calculate matched indices from column to row. For each instance, the number of matched indices is the column number of the input distance - matrix. + matrix. **The OP only supports CPU**. There are two outputs, matched indices and distance. A simple description, this algorithm matched the best (maximum distance) @@ -1122,33 +1136,35 @@ def bipartite_match(dist_matrix, Args: dist_matrix(Variable): This input is a 2-D LoDTensor with shape - [K, M]. It is pair-wise distance matrix between the entities - represented by each row and each column. For example, assumed one - entity is A with shape [K], another entity is B with shape [M]. The - dist_matrix[i][j] is the distance between A[i] and B[j]. The bigger - the distance is, the better matching the pairs are. - - NOTE: This tensor can contain LoD information to represent a batch - of inputs. One instance of this batch can contain different numbers - of entities. - match_type(string|None): The type of matching method, should be - 'bipartite' or 'per_prediction'. [default 'bipartite']. - dist_threshold(float|None): If `match_type` is 'per_prediction', + [K, M]. The data type is float32 or float64. It is pair-wise + distance matrix between the entities represented by each row and + each column. For example, assumed one entity is A with shape [K], + another entity is B with shape [M]. The dist_matrix[i][j] is the + distance between A[i] and B[j]. The bigger the distance is, the + better matching the pairs are. NOTE: This tensor can contain LoD + information to represent a batch of inputs. One instance of this + batch can contain different numbers of entities. + match_type(str, optional): The type of matching method, should be + 'bipartite' or 'per_prediction'. None ('bipartite') by default. + dist_threshold(float32, optional): If `match_type` is 'per_prediction', this threshold is to determine the extra matching bboxes based on the maximum distance, 0.5 by default. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Returns: - tuple: a tuple with two elements is returned. The first is - matched_indices, the second is matched_distance. + Tuple: - The matched_indices is a 2-D Tensor with shape [N, M] in int type. - N is the batch size. If match_indices[i][j] is -1, it + matched_indices(Variable): A 2-D Tensor with shape [N, M]. The data + type is int32. N is the batch size. If match_indices[i][j] is -1, it means B[j] does not match any entity in i-th instance. Otherwise, it means B[j] is matched to row match_indices[i][j] in i-th instance. The row number of i-th instance is saved in match_indices[i][j]. - The matched_distance is a 2-D Tensor with shape [N, M] in float type - . N is batch size. If match_indices[i][j] is -1, + matched_distance(Variable): A 2-D Tensor with shape [N, M]. The data + type is float32. N is batch size. If match_indices[i][j] is -1, match_distance[i][j] is also -1.0. Otherwise, assumed match_distance[i][j] = d, and the row offsets of each instance are called LoD. Then match_distance[i][j] = @@ -2028,32 +2044,36 @@ def anchor_generator(input, is firstly aspect_ratios loop then anchor_sizes loop. Args: - input(Variable): The input feature map, the format is NCHW. - anchor_sizes(list|tuple|float): The anchor sizes of generated anchors, - given in absolute pixels e.g. [64., 128., 256., 512.]. - For instance, the anchor size of 64 means the area of this anchor equals to 64**2. - aspect_ratios(list|tuple|float): The height / width ratios of generated - anchors, e.g. [0.5, 1.0, 2.0]. - variance(list|tuple): The variances to be used in box regression deltas. - Default:[0.1, 0.1, 0.2, 0.2]. - stride(list|tuple): The anchors stride across width and height,e.g. [16.0, 16.0] - offset(float): Prior boxes center offset. Default: 0.5 - name(str): Name of the prior box op. Default: None. + input(Variable): 4-D Tensor with shape [N,C,H,W]. The input feature map. + anchor_sizes(float32|list|tuple, optional): The anchor sizes of generated + anchors, given in absolute pixels e.g. [64., 128., 256., 512.]. + For instance, the anchor size of 64 means the area of this anchor + equals to 64**2. None by default. + aspect_ratios(float32|list|tuple, optional): The height / width ratios + of generated anchors, e.g. [0.5, 1.0, 2.0]. None by default. + variance(list|tuple, optional): The variances to be used in box + regression deltas. The data type is float32, [0.1, 0.1, 0.2, 0.2] by + default. + stride(list|tuple, optional): The anchors stride across width and height. + The data type is float32. e.g. [16.0, 16.0]. None by default. + offset(float32, optional): Prior boxes center offset. 0.5 by default. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and None + by default. Returns: - Anchors(Variable),Variances(Variable): - - two variables: - - - Anchors(Variable): The output anchors with a layout of [H, W, num_anchors, 4]. \ - H is the height of input, W is the width of input, \ - num_anchors is the box count of each position. \ - Each anchor is in (xmin, ymin, xmax, ymax) format an unnormalized. - - Variances(Variable): The expanded variances of anchors \ - with a layout of [H, W, num_priors, 4]. \ - H is the height of input, W is the width of input \ - num_anchors is the box count of each position. \ - Each variance is in (xcenter, ycenter, w, h) format. + Tuple: + + Anchors(Variable): The output anchors with a layout of [H, W, num_anchors, 4]. + H is the height of input, W is the width of input, + num_anchors is the box count of each position. + Each anchor is in (xmin, ymin, xmax, ymax) format an unnormalized. + + Variances(Variable): The expanded variances of anchors + with a layout of [H, W, num_priors, 4]. + H is the height of input, W is the width of input + num_anchors is the box count of each position. + Each variance is in (xcenter, ycenter, w, h) format. Examples: @@ -2566,15 +2586,22 @@ def box_clip(input, im_info, name=None): im_w = round(weight / scale) Args: - input(variable): The input box, the last dimension is 4. - im_info(variable): The information of image with shape [N, 3] with - layout (height, width, scale). height and width - is the input size and scale is the ratio of input - size and original size. - name (str): The name of this layer. It is optional. + input(Variable): The input Tensor with shape :math:`[N_1, N_2, ..., N_k, 4]`, + the last dimension is 4 and data type is float32 or float64. + im_info(Variable): The 2-D Tensor with shape [N, 3] with layout + (height, width, scale) represeting the information of image. + height and width is the input size and scale is the ratio of input + size and original size. The data type is float32 or float64. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. Returns: - Variable: The cliped tensor variable. + Variable: + + output(Variable): The cliped tensor with data type float32 or float64. + The shape is same as input. + Examples: .. code-block:: python @@ -2984,12 +3011,12 @@ def distribute_fpn_proposals(fpn_rois, refer_scale, name=None): """ - In Feature Pyramid Networks (FPN) models, it is needed to distribute all - proposals into different FPN level, with respect to scale of the proposals, - the referring scale and the referring level. Besides, to restore the order - of proposals, we return an array which indicates the original index of rois - in current proposals. To compute FPN level for each roi, the formula is - given as follows: + **This op only takes LoDTensor as input.** In Feature Pyramid Networks + (FPN) models, it is needed to distribute all proposals into different FPN + level, with respect to scale of the proposals, the referring scale and the + referring level. Besides, to restore the order of proposals, we return an + array which indicates the original index of rois in current proposals. + To compute FPN level for each roi, the formula is given as follows: .. math:: @@ -3000,21 +3027,30 @@ def distribute_fpn_proposals(fpn_rois, where BBoxArea is a function to compute the area of each roi. Args: - fpn_rois(variable): The input fpn_rois, the second dimension is 4. - min_level(int): The lowest level of FPN layer where the proposals come - from. - max_level(int): The highest level of FPN layer where the proposals - come from. - refer_level(int): The referring level of FPN layer with specified scale. - refer_scale(int): The referring scale of FPN layer with specified level. - name(str|None): The name of this operator. + + fpn_rois(Variable): 2-D Tensor with shape [N, 4] and data type is + float32 or float64. The input fpn_rois. + min_level(int32): The lowest level of FPN layer where the proposals come + from. + max_level(int32): The highest level of FPN layer where the proposals + come from. + refer_level(int32): The referring level of FPN layer with specified scale. + refer_scale(int32): The referring scale of FPN layer with specified level. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. Returns: - tuple: - A tuple(multi_rois, restore_ind) is returned. The multi_rois is - a list of segmented tensor variables. The restore_ind is a 2D - Tensor with shape [N, 1], N is the number of total rois. It is - used to restore the order of fpn_rois. + Tuple: + + multi_rois(List) : A list of 2-D LoDTensor with shape [M, 4] + and data type of float32 and float64. The length is + max_level-min_level+1. The proposals in each FPN level. + + restore_ind(Variable): A 2-D Tensor with shape [N, 1], N is + the number of total rois. The data type is int32. It is + used to restore the order of fpn_rois. + Examples: .. code-block:: python @@ -3066,14 +3102,17 @@ def box_decoder_and_assign(prior_box, target_box(${target_box_type}): ${target_box_comment} box_score(${box_score_type}): ${box_score_comment} box_clip(${box_clip_type}): ${box_clip_comment} - name(str|None): The name of this operator + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Returns: - decode_box(Variable), output_assign_box(Variable): + Tuple: - two variables: + decode_box(${decode_box_type}): ${decode_box_comment} + + output_assign_box(${output_assign_box_type}): ${output_assign_box_comment} - - decode_box(${decode_box_type}): ${decode_box_comment} - - output_assign_box(${output_assign_box_type}): ${output_assign_box_comment} Examples: .. code-block:: python @@ -3122,8 +3161,9 @@ def collect_fpn_proposals(multi_rois, post_nms_top_n, name=None): """ - Concat multi-level RoIs (Region of Interest) and select N RoIs - with respect to multi_scores. This operation performs the following steps: + **This OP only supports LoDTensor as input**. Concat multi-level RoIs + (Region of Interest) and select N RoIs with respect to multi_scores. + This operation performs the following steps: 1. Choose num_level RoIs and scores as input: num_level = max_level - min_level 2. Concat multi-level RoIs and scores @@ -3132,15 +3172,25 @@ def collect_fpn_proposals(multi_rois, 5. Re-sort RoIs by corresponding batch_id Args: - multi_ros(list): List of RoIs to collect - multi_scores(list): List of scores + multi_rois(list): List of RoIs to collect. Element in list is 2-D + LoDTensor with shape [N, 4] and data type is float32 or float64, + N is the number of RoIs. + multi_scores(list): List of scores of RoIs to collect. Element in list + is 2-D LoDTensor with shape [N, 1] and data type is float32 or + float64, N is the number of RoIs. min_level(int): The lowest level of FPN layer to collect max_level(int): The highest level of FPN layer to collect post_nms_top_n(int): The number of selected RoIs - name(str|None): A name for this layer(optional) - + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + Returns: - Variable: Output variable of selected RoIs. + Variable: + + fpn_rois(Variable): 2-D LoDTensor with shape [N, 4] and data type is + float32 or float64. Selected RoIs. + Examples: .. code-block:: python diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 858bfa79d47..194e007fcb4 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -8154,17 +8154,24 @@ def roi_align(input, Args: input (Variable): ${x_comment} rois (Variable): ROIs (Regions of Interest) to pool over.It should be - a 2-D LoDTensor of shape (num_rois, 4), the lod level - is 1. Given as [[x1, y1, x2, y2], ...], (x1, y1) is - the top left coordinates, and (x2, y2) is the bottom - right coordinates. - pooled_height (integer): ${pooled_height_comment} Default: 1 - pooled_width (integer): ${pooled_width_comment} Default: 1 - spatial_scale (float): ${spatial_scale_comment} Default: 1.0 - sampling_ratio(intger): ${sampling_ratio_comment} Default: -1 + a 2-D LoDTensor of shape (num_rois, 4), the lod level is 1. The + data type is float32 or float64. Given as [[x1, y1, x2, y2], ...], + (x1, y1) is the top left coordinates, and (x2, y2) is the bottom + right coordinates. + pooled_height (int32, optional): ${pooled_height_comment} Default: 1 + pooled_width (int32, optional): ${pooled_width_comment} Default: 1 + spatial_scale (float32, optional): ${spatial_scale_comment} Default: 1.0 + sampling_ratio(int32, optional): ${sampling_ratio_comment} Default: -1 + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. Returns: - Variable: ${out_comment}. + Variable: + + Output: ${out_comment}. + + Examples: .. code-block:: python @@ -10498,15 +10505,20 @@ def prelu(x, mode, param_attr=None, name=None): element: All elements do not share alpha. Each element has its own alpha. Args: - x (Variable): The input tensor. - mode (string): The mode for weight sharing. + x (Variable): The input Tensor or LoDTensor with data type float32. + mode (str): The mode for weight sharing. param_attr(ParamAttr|None): The parameter attribute for the learnable - weight (alpha), it can be create by ParamAttr. - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + weight (alpha), it can be create by ParamAttr. None by default. + For detailed information, please refer to :ref:`api_fluid_ParamAttr`. + name(str|None): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. Returns: - Variable: The output tensor with the same shape as input. + Variable: + + output(Variable): The tensor or LoDTensor with the same shape as input. + The data type is float32. Examples: @@ -12561,11 +12573,16 @@ def clip_by_norm(x, max_norm, name=None): Args: x(${x_type}): ${x_comment} max_norm(${max_norm_type}): ${max_norm_comment} - name(basestring|None): Name of the output. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. Returns: + Variable: + out(${out_type}): ${out_comment} + Examples: .. code-block:: python @@ -12771,11 +12788,16 @@ def maxout(x, groups, name=None): Args: x(${x_type}): ${x_comment} groups(${groups_type}): ${groups_comment} - name(basestring|None): Name of the output. + name(str, optional): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. Returns: + Variable: + out(${out_type}): ${out_comment} + Examples: .. code-block:: python -- GitLab