Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
df3025c3
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
df3025c3
编写于
10月 11, 2019
作者:
Q
qingqing01
提交者:
GitHub
10月 11, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Polish En doc for some APIs. (#20418)
* Polish En doc for some APIs * Update some comments and API.spec
上级
6b612a28
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
199 addition
and
142 deletion
+199
-142
paddle/fluid/API.spec
paddle/fluid/API.spec
+5
-5
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+181
-134
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+13
-3
未找到文件。
paddle/fluid/API.spec
浏览文件 @
df3025c3
...
@@ -214,7 +214,7 @@ paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name',
...
@@ -214,7 +214,7 @@ paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name',
paddle.fluid.layers.scatter_nd_add (ArgSpec(args=['ref', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2607b5c9369fbc52f208de066a80fc25'))
paddle.fluid.layers.scatter_nd_add (ArgSpec(args=['ref', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2607b5c9369fbc52f208de066a80fc25'))
paddle.fluid.layers.scatter_nd (ArgSpec(args=['index', 'updates', 'shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e43f1d3a938b35da246aea3e72a020ec'))
paddle.fluid.layers.scatter_nd (ArgSpec(args=['index', 'updates', 'shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e43f1d3a938b35da246aea3e72a020ec'))
paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'abe3f714120117a5a3d3e639853932bf'))
paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'abe3f714120117a5a3d3e639853932bf'))
paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', '
042af0b8abea96b40c22f6e70d99e042
'))
paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', '
44f35002962cf24e14dd2958f6584e3d
'))
paddle.fluid.layers.mean_iou (ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None), ('document', 'dea29c0c3cdbd5b498afef60e58c9d7c'))
paddle.fluid.layers.mean_iou (ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None), ('document', 'dea29c0c3cdbd5b498afef60e58c9d7c'))
paddle.fluid.layers.relu (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0942c174f4f6fb274976d4357356f6a2'))
paddle.fluid.layers.relu (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '0942c174f4f6fb274976d4357356f6a2'))
paddle.fluid.layers.selu (ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ee40bc474b4bccdaf112d3f0d847318'))
paddle.fluid.layers.selu (ArgSpec(args=['x', 'scale', 'alpha', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '3ee40bc474b4bccdaf112d3f0d847318'))
...
@@ -408,10 +408,10 @@ paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'],
...
@@ -408,10 +408,10 @@ paddle.fluid.layers.cumsum (ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'],
paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '94c71025bf11ab8172fd455350274138'))
paddle.fluid.layers.thresholded_relu (ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)), ('document', '94c71025bf11ab8172fd455350274138'))
paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '0fdf82762fd0a5acb2578a72771b5b44'))
paddle.fluid.layers.prior_box (ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False)), ('document', '0fdf82762fd0a5acb2578a72771b5b44'))
paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', '7a484a0da5e993a7734867a3dfa86571'))
paddle.fluid.layers.density_prior_box (ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'flatten_to_2d', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, False, None)), ('document', '7a484a0da5e993a7734867a3dfa86571'))
paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', '
fd58078fdfffd899b91f992ba224628f
'))
paddle.fluid.layers.multi_box_head (ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False)), ('document', '
61360150b911fa4097f1a221f5d49877
'))
paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '6f795f407a8e3a3ec3da52726c73405a'))
paddle.fluid.layers.bipartite_match (ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '6f795f407a8e3a3ec3da52726c73405a'))
paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '
e9685f32d21bec8c013626c0254502c5
'))
paddle.fluid.layers.target_assign (ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '
4670c1be208835fc8edd61025c21d0e4
'))
paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', '
5485bcaceb0cde2695565a2ffd5bbd40
'))
paddle.fluid.layers.detection_output (ArgSpec(args=['loc', 'scores', 'prior_box', 'prior_box_var', 'background_label', 'nms_threshold', 'nms_top_k', 'keep_top_k', 'score_threshold', 'nms_eta', 'return_index'], varargs=None, keywords=None, defaults=(0, 0.3, 400, 200, 0.01, 1.0, False)), ('document', '
ed19f55b366e68ed686318ef7aff120d
'))
paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '1f1ab4f908ceddef1d99a8363e6826af'))
paddle.fluid.layers.ssd_loss (ArgSpec(args=['location', 'confidence', 'gt_box', 'gt_label', 'prior_box', 'prior_box_var', 'background_label', 'overlap_threshold', 'neg_pos_ratio', 'neg_overlap', 'loc_loss_weight', 'conf_loss_weight', 'match_type', 'mining_type', 'normalize', 'sample_size'], varargs=None, keywords=None, defaults=(None, 0, 0.5, 3.0, 0.5, 1.0, 1.0, 'per_prediction', 'max_negative', True, None)), ('document', '1f1ab4f908ceddef1d99a8363e6826af'))
paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', 'd46629656b4ce9b07809e32c0482cbef'))
paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True)), ('document', 'd46629656b4ce9b07809e32c0482cbef'))
paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', '543b2a40641260e745a76b1f7a25fb2a'))
paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', '543b2a40641260e745a76b1f7a25fb2a'))
...
@@ -420,7 +420,7 @@ paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'as
...
@@ -420,7 +420,7 @@ paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'as
paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'b007f545ad41e66b814203bdb76516c6'))
paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', 'b007f545ad41e66b814203bdb76516c6'))
paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', 'f2342042127b536a0a16390f149f1bba'))
paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', 'f2342042127b536a0a16390f149f1bba'))
paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '5cba014b41610431f8949e2d7336f1cc'))
paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', '5cba014b41610431f8949e2d7336f1cc'))
paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', '
b319b10ddaf17fb4ddf03518685a17ef
'))
paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', '
2bacc35429f4fffe72a30c5a49a61eb7
'))
paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e24478fd1fcf1727d4947fe14356b3d4'))
paddle.fluid.layers.iou_similarity (ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e24478fd1fcf1727d4947fe14356b3d4'))
paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '511d7033c0cfce1a5b88c04ad6e7ed5b'))
paddle.fluid.layers.box_coder (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0)), ('document', '511d7033c0cfce1a5b88c04ad6e7ed5b'))
paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2183f03c4f16712dcef6a474dbcefa24'))
paddle.fluid.layers.polygon_box_transform (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2183f03c4f16712dcef6a474dbcefa24'))
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
df3025c3
...
@@ -525,12 +525,11 @@ def detection_output(loc,
...
@@ -525,12 +525,11 @@ def detection_output(loc,
nms_eta
=
1.0
,
nms_eta
=
1.0
,
return_index
=
False
):
return_index
=
False
):
"""
"""
**Detection Output Layer for Single Shot Multibox Detector (SSD).**
Given the regression locations, classification confidences and prior boxes,
calculate the detection outputs by performing following steps:
This operation is to get the detection results by performing following
1. Decode input bounding box predictions according to the prior boxes and
two steps:
regression locations.
1. Decode input bounding box predictions according to the prior boxes.
2. Get the final detection results by applying multi-class non maximum
2. Get the final detection results by applying multi-class non maximum
suppression (NMS).
suppression (NMS).
...
@@ -539,33 +538,33 @@ def detection_output(loc,
...
@@ -539,33 +538,33 @@ def detection_output(loc,
Args:
Args:
loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the
loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the
predicted locations of M bounding bboxes. N is the batch size,
predicted locations of M bounding bboxes. Data type should be
float32 or float64. N is the batch size,
and each bounding box has four coordinate values and the layout
and each bounding box has four coordinate values and the layout
is [xmin, ymin, xmax, ymax].
is [xmin, ymin, xmax, ymax].
scores(Variable): A 3-D Tensor with shape [N, M, C] represents the
scores(Variable): A 3-D Tensor with shape [N, M, C] represents the
predicted confidence predictions.
N is the batch size, C is the
predicted confidence predictions.
Data type should be float32
class number, M is number of bounding boxes. For each category
or float64. N is the batch size, C is the
there are total M scores which corresponding M
bounding boxes.
class number, M is number of
bounding boxes.
prior_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
prior_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
each box is represented as [xmin, ymin, xmax, ymax],
each box is represented as [xmin, ymin, xmax, ymax]. Data type
[xmin, ymin] is the left top coordinate of the anchor box,
should be float32 or float64.
if the input is image feature map, they are close to the origin
of the coordinate system. [xmax, ymax] is the right bottom
coordinate of the anchor box.
prior_box_var(Variable): A 2-D Tensor with shape [M, 4] holds M group
prior_box_var(Variable): A 2-D Tensor with shape [M, 4] holds M group
of variance.
of variance.
Data type should be float32 or float64.
background_label(
floa
t): The index of background label,
background_label(
in
t): The index of background label,
the background label will be ignored. If set to -1, then all
the background label will be ignored. If set to -1, then all
categories will be considered.
categories will be considered.
Default: 0.
nms_threshold(float): The threshold to be used in NMS.
nms_threshold(float): The threshold to be used in NMS.
Default: 0.3.
nms_top_k(int): Maximum number of detections to be kept according
nms_top_k(int): Maximum number of detections to be kept according
to the confidences aftern
the
filtering detections based on
to the confidences aftern filtering detections based on
score_threshold.
score_threshold
and before NMS. Default: 400
.
keep_top_k(int): Number of total bboxes to be kept per image after
keep_top_k(int): Number of total bboxes to be kept per image after
NMS step. -1 means keeping all bboxes after NMS step.
NMS step. -1 means keeping all bboxes after NMS step.
Default: 200.
score_threshold(float): Threshold to filter out bounding boxes with
score_threshold(float): Threshold to filter out bounding boxes with
low confidence score. If not provided, consider all boxes.
low confidence score. If not provided, consider all boxes.
nms_eta(float): The parameter for adaptive NMS.
Default: 0.01.
nms_eta(float): The parameter for adaptive NMS. It works only when the
value is less than 1.0. Default: 1.0.
return_index(bool): Whether return selected index. Default: False
return_index(bool): Whether return selected index. Default: False
Returns:
Returns:
...
@@ -573,22 +572,18 @@ def detection_output(loc,
...
@@ -573,22 +572,18 @@ def detection_output(loc,
A tuple with two Variables: (Out, Index) if return_index is True,
A tuple with two Variables: (Out, Index) if return_index is True,
otherwise, a tuple with one Variable(Out) is returned.
otherwise, a tuple with one Variable(Out) is returned.
Out: The detection outputs is a LoDTensor with shape [No, 6]. Each row
Out (Variable): The detection outputs is a LoDTensor with shape [No, 6].
has six values: [label, confidence, xmin, ymin, xmax, ymax]. `No` is
Data type is the same as input (loc). Each row has six values:
the total number of detections in this mini-batch. For each instance,
[label, confidence, xmin, ymin, xmax, ymax]. `No` is
the offsets in first dimension are called LoD, the offset number is
the total number of detections in this mini-batch. For each instance,
N + 1, N is the batch size. The i-th image has `LoD[i + 1] - LoD[i]`
the offsets in first dimension are called LoD, the offset number is
detected results, if it is 0, the i-th image has no detected results.
N + 1, N is the batch size. The i-th image has `LoD[i + 1] - LoD[i]`
detected results, if it is 0, the i-th image has no detected results.
If all images have not detected results, LoD will be set to {1}, and
output tensor only contains one value, which is -1.
Index (Variable): Only return when return_index is True. A 2-D LoDTensor
(After version 1.3, when no boxes detected, the lod is changed
with shape [No, 1] represents the selected index which type is Integer.
from {0} to {1}.)
The index is the absolute value cross batches. No is the same number
as Out. If the index is used to gather other attribute such as age,
Index: Only return when return_index is True. A 2-D LoDTensor with
shape [No, 1] represents the selected index which type is Integer.
The index is the absolute value cross batches. No is the same number
as Out. If the index is used to gather other attribute such as age,
one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where
one needs to reshape the input(N, M, 1) to (N * M, 1) as first, where
N is the batch size and M is the number of boxes.
N is the batch size and M is the number of boxes.
...
@@ -598,14 +593,10 @@ def detection_output(loc,
...
@@ -598,14 +593,10 @@ def detection_output(loc,
import paddle.fluid as fluid
import paddle.fluid as fluid
pb = fluid.layers.data(name='prior_box', shape=[10, 4],
pb = fluid.data(name='prior_box', shape=[10, 4], dtype='float32')
append_batch_size=False, dtype='float32')
pbv = fluid.data(name='prior_box_var', shape=[10, 4], dtype='float32')
pbv = fluid.layers.data(name='prior_box_var', shape=[10, 4],
loc = fluid.data(name='target_box', shape=[2, 21, 4], dtype='float32')
append_batch_size=False, dtype='float32')
scores = fluid.data(name='scores', shape=[2, 21, 10], dtype='float32')
loc = fluid.layers.data(name='target_box', shape=[2, 21, 4],
append_batch_size=False, dtype='float32')
scores = fluid.layers.data(name='scores', shape=[2, 21, 10],
append_batch_size=False, dtype='float32')
nmsed_outs, index = fluid.layers.detection_output(scores=scores,
nmsed_outs, index = fluid.layers.detection_output(scores=scores,
loc=loc,
loc=loc,
prior_box=pb,
prior_box=pb,
...
@@ -1318,51 +1309,57 @@ def target_assign(input,
...
@@ -1318,51 +1309,57 @@ def target_assign(input,
out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
out_weight[i][j] = 0.
out_weight[i][j] = 0.
2. Assigning out
_weight
based on `neg_indices` if `neg_indices` is provided:
2. Assigning out
puts
based on `neg_indices` if `neg_indices` is provided:
Assumed that
the row offset for each instance in `neg_indices` is called neg_lod
,
Assumed that
i-th instance in `neg_indices` is called `neg_indice`
,
for i-th instance
and each `id` of neg_indices in this instance
:
for i-th instance:
.. code-block:: text
.. code-block:: text
out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
for id in neg_indice:
out_weight[i][id] = 1.0
out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
out_weight[i][id] = 1.0
Args:
Args:
inputs (Variable): This input is a 3D LoDTensor with shape [M, P, K].
input (Variable): This input is a 3D LoDTensor with shape [M, P, K].
matched_indices (Variable): Tensor<int>), The input matched indices
Data type should be int32 or float32.
matched_indices (Variable): The input matched indices
is 2D Tenosr<int32> with shape [N, P], If MatchIndices[i][j] is -1,
is 2D Tenosr<int32> with shape [N, P], If MatchIndices[i][j] is -1,
the j-th entity of column is not matched to any entity of row in
the j-th entity of column is not matched to any entity of row in
i-th instance.
i-th instance.
negative_indices (Variable
): The input negative example indices are
negative_indices (Variable
, optional): The input negative example indices
an optional input with shape [Neg, 1] and int32 type, where Neg is
a
re a
n optional input with shape [Neg, 1] and int32 type, where Neg is
the total number of negative example indices.
the total number of negative example indices.
mismatch_value (float32): Fill this value to the mismatched location.
mismatch_value (float32, optional): Fill this value to the mismatched
location.
name (string): The default value is None. Normally there is no need for
user to set this property. For more information, please refer
to :ref:`api_guide_Name`.
Returns:
Returns:
tuple:
tuple: A tuple(out, out_weight) is returned.
A tuple(out, out_weight) is returned. out is a 3D Tensor with
shape [N, P, K], N and P is the same as they are in
out (Variable): a 3D Tensor with shape [N, P, K] and same data type
`neg_indices`, K is the same as it in input of X. If
with `input`, N and P is the same as they are in `matched_indices`,
`match_indices[i][j]`. out_weight is the weight for output with
K is the same as it in input of X.
the shape of [N, P, 1].
out_weight (Variable): the weight for output with the shape of [N, P, 1].
Data type is float32.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
import paddle.fluid as fluid
import paddle.fluid as fluid
x = fluid.
layers.
data(
x = fluid.data(
name='x',
name='x',
shape=[4, 20, 4],
shape=[4, 20, 4],
dtype='float',
dtype='float',
lod_level=1,
lod_level=1)
append_batch_size=False)
matched_id = fluid.data(
matched_id = fluid.layers.data(
name='indices',
name='indices',
shape=[8, 20],
shape=[8, 20],
dtype='int32',
dtype='int32')
append_batch_size=False)
trg, trg_weight = fluid.layers.target_assign(
trg, trg_weight = fluid.layers.target_assign(
x,
x,
matched_id,
matched_id,
...
@@ -1905,21 +1902,37 @@ def multi_box_head(inputs,
...
@@ -1905,21 +1902,37 @@ def multi_box_head(inputs,
name
=
None
,
name
=
None
,
min_max_aspect_ratios_order
=
False
):
min_max_aspect_ratios_order
=
False
):
"""
"""
Generate prior boxes for SSD(Single Shot MultiBox Detector)
Base on SSD ((Single Shot MultiBox Detector) algorithm, generate prior boxes,
algorithm. The details of this algorithm, please refer the
regression location and classification confidence on multiple input feature
section 2.2 of SSD paper `SSD: Single Shot MultiBox Detector
maps, then output the concatenate results. The details of this algorithm,
please refer the section 2.2 of SSD paper `SSD: Single Shot MultiBox Detector
<https://arxiv.org/abs/1512.02325>`_ .
<https://arxiv.org/abs/1512.02325>`_ .
Args:
Args:
inputs(list|tuple): The list of input Variables, the format
inputs (list(Variable)|tuple(Variable)): The list of input variables,
of all Variables is NCHW.
the format of all Variables are 4-D Tensor, layout is NCHW.
image(Variable): The input image data of PriorBoxOp,
Data type should be float32 or float64.
the layout is NCHW.
image (Variable): The input image, layout is NCHW. Data type should be
base_size(int): the base_size is used to get min_size
the same as inputs.
and max_size according to min_ratio and max_ratio.
base_size(int): the base_size is input image size. When len(inputs) > 2
and `min_size` and `max_size` are None, the `min_size` and `max_size`
are calculated by `baze_size`, 'min_ratio' and `max_ratio`. The
formula is as follows:
.. code-block:: text
min_sizes = []
max_sizes = []
step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2)))
for ratio in six.moves.range(min_ratio, max_ratio + 1, step):
min_sizes.append(base_size * ratio / 100.)
max_sizes.append(base_size * (ratio + step) / 100.)
min_sizes = [base_size * .10] + min_sizes
max_sizes = [base_size * .20] + max_sizes
num_classes(int): The number of classes.
num_classes(int): The number of classes.
aspect_ratios(list
|tuple): the aspect ratios of generated prior
aspect_ratios(list
(float) | tuple(float)): the aspect ratios of generated
boxes. The length of input and aspect_ratios must be equal.
prior
boxes. The length of input and aspect_ratios must be equal.
min_ratio(int): the min ratio of generated prior boxes.
min_ratio(int): the min ratio of generated prior boxes.
max_ratio(int): the max ratio of generated prior boxes.
max_ratio(int): the max ratio of generated prior boxes.
min_sizes(list|tuple|None): If `len(inputs) <=2`,
min_sizes(list|tuple|None): If `len(inputs) <=2`,
...
@@ -1945,7 +1958,9 @@ def multi_box_head(inputs,
...
@@ -1945,7 +1958,9 @@ def multi_box_head(inputs,
kernel_size(int): The kernel size of conv2d. Default: 1.
kernel_size(int): The kernel size of conv2d. Default: 1.
pad(int|list|tuple): The padding of conv2d. Default:0.
pad(int|list|tuple): The padding of conv2d. Default:0.
stride(int|list|tuple): The stride of conv2d. Default:1,
stride(int|list|tuple): The stride of conv2d. Default:1,
name(str): Name of the prior box layer. Default: None.
name(str): The default value is None. Normally there is no need
for user to set this property. For more information, please
refer to :ref:`api_guide_Name`.
min_max_aspect_ratios_order(bool): If set True, the output prior box is
min_max_aspect_ratios_order(bool): If set True, the output prior box is
in order of [min, max, aspect_ratios], which is consistent with
in order of [min, max, aspect_ratios], which is consistent with
Caffe. Please note, this order affects the weights order of
Caffe. Please note, this order affects the weights order of
...
@@ -1955,33 +1970,34 @@ def multi_box_head(inputs,
...
@@ -1955,33 +1970,34 @@ def multi_box_head(inputs,
Returns:
Returns:
tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances)
tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances)
mbox_loc: The predicted boxes' location of the inputs. The layout
mbox_loc (Variable): The predicted boxes' location of the inputs. The
is [N, H*W*Priors, 4]. where Priors is the number of predicted
layout is [N, num_priors, 4], where N is batch size, ``num_priors``
boxes each position of each input.
is the number of prior boxes. Data type is the same as input.
mbox_conf: The predicted boxes' confidence of the inputs. The layout
is [N, H*W*Priors, C]. where Priors is the number of predicted boxes
each position of each input and C is the number of Classes.
boxes: the output prior boxes of PriorBox. The layout is [num_priors, 4].
mbox_conf (Variable): The predicted boxes' confidence of the inputs.
num_priors is the total box count of each position of inputs.
The layout is [N, num_priors, C], where ``N`` and ``num_priors``
has the same meaning as above. C is the number of Classes.
Data type is the same as input.
variances: the expanded variances of PriorBox. The layout is
boxes (Variable): the output prior boxes. The layout is [num_priors, 4].
[num_priors, 4]. num_priors is the total box count of each position of inputs
The meaning of num_priors is the same as above.
Data type is the same as input.
variances (Variable): the expanded variances for prior boxes.
The layout is [num_priors, 4]. Data type is the same as input.
Examples:
Examples
1: set min_ratio and max_ratio
:
.. code-block:: python
.. code-block:: python
import paddle.fluid as fluid
import paddle.fluid as fluid
images = fluid.
layers.data(name='data', shape=[
3, 300, 300], dtype='float32')
images = fluid.
data(name='data', shape=[None,
3, 300, 300], dtype='float32')
conv1 = fluid.
layers.data(name='conv1', shape=[
512, 19, 19], dtype='float32')
conv1 = fluid.
data(name='conv1', shape=[None,
512, 19, 19], dtype='float32')
conv2 = fluid.
layers.data(name='conv2', shape=[
1024, 10, 10], dtype='float32')
conv2 = fluid.
data(name='conv2', shape=[None,
1024, 10, 10], dtype='float32')
conv3 = fluid.
layers.data(name='conv3', shape=[
512, 5, 5], dtype='float32')
conv3 = fluid.
data(name='conv3', shape=[None,
512, 5, 5], dtype='float32')
conv4 = fluid.
layers.data(name='conv4', shape=[
256, 3, 3], dtype='float32')
conv4 = fluid.
data(name='conv4', shape=[None,
256, 3, 3], dtype='float32')
conv5 = fluid.
layers.data(name='conv5', shape=[
256, 2, 2], dtype='float32')
conv5 = fluid.
data(name='conv5', shape=[None,
256, 2, 2], dtype='float32')
conv6 = fluid.
layers.data(name='conv6', shape=[
128, 1, 1], dtype='float32')
conv6 = fluid.
data(name='conv6', shape=[None,
128, 1, 1], dtype='float32')
mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head(
mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head(
inputs=[conv1, conv2, conv3, conv4, conv5, conv6],
inputs=[conv1, conv2, conv3, conv4, conv5, conv6],
...
@@ -1994,6 +2010,32 @@ def multi_box_head(inputs,
...
@@ -1994,6 +2010,32 @@ def multi_box_head(inputs,
offset=0.5,
offset=0.5,
flip=True,
flip=True,
clip=True)
clip=True)
Examples 2: set min_sizes and max_sizes:
.. code-block:: python
import paddle.fluid as fluid
images = fluid.data(name='data', shape=[None, 3, 300, 300], dtype='float32')
conv1 = fluid.data(name='conv1', shape=[None, 512, 19, 19], dtype='float32')
conv2 = fluid.data(name='conv2', shape=[None, 1024, 10, 10], dtype='float32')
conv3 = fluid.data(name='conv3', shape=[None, 512, 5, 5], dtype='float32')
conv4 = fluid.data(name='conv4', shape=[None, 256, 3, 3], dtype='float32')
conv5 = fluid.data(name='conv5', shape=[None, 256, 2, 2], dtype='float32')
conv6 = fluid.data(name='conv6', shape=[None, 128, 1, 1], dtype='float32')
mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head(
inputs=[conv1, conv2, conv3, conv4, conv5, conv6],
image=images,
num_classes=21,
min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
base_size=300,
offset=0.5,
flip=True,
clip=True)
"""
"""
def
_reshape_with_axis_
(
input
,
axis
=
1
):
def
_reshape_with_axis_
(
input
,
axis
=
1
):
...
@@ -2439,7 +2481,7 @@ def generate_proposal_labels(rpn_rois,
...
@@ -2439,7 +2481,7 @@ def generate_proposal_labels(rpn_rois,
def
generate_mask_labels
(
im_info
,
gt_classes
,
is_crowd
,
gt_segms
,
rois
,
def
generate_mask_labels
(
im_info
,
gt_classes
,
is_crowd
,
gt_segms
,
rois
,
labels_int32
,
num_classes
,
resolution
):
labels_int32
,
num_classes
,
resolution
):
"""
"""
**
Generate Mask Labels for Mask-RCNN
**
**
Generate Mask Labels for Mask-RCNN
**
This operator can be, for given the RoIs and corresponding labels,
This operator can be, for given the RoIs and corresponding labels,
to sample foreground RoIs. This mask branch also has
to sample foreground RoIs. This mask branch also has
...
@@ -2475,62 +2517,67 @@ def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois,
...
@@ -2475,62 +2517,67 @@ def generate_mask_labels(im_info, gt_classes, is_crowd, gt_segms, rois,
feeder.feed(batch_masks)
feeder.feed(batch_masks)
Args:
Args:
im_info(Variable): A 2-D Tensor with shape [N, 3]. N is the batch size,
im_info (Variable): A 2-D Tensor with shape [N, 3] and float32
each element is [height, width, scale] of image. Image scale is
data type. N is the batch size, each element is
target_size) / original_size.
[height, width, scale] of image. Image scale is
gt_classes(Variable): A 2-D LoDTensor with shape [M, 1]. M is the total
target_size / original_size, target_size is the size after resize,
number of ground-truth, each element is a class label.
original_size is the original image size.
is_crowd(Variable): A 2-D LoDTensor with shape as gt_classes,
gt_classes (Variable): A 2-D LoDTensor with shape [M, 1]. Data type
each element is a flag indicating whether a groundtruth is crowd.
shoule be int. M is the total number of ground-truth, each
gt_segms(Variable): This input is a 2D LoDTensor with shape [S, 2],
element is a class label.
it's LoD level is 3. Usually users do not needs to understand LoD,
is_crowd (Variable): A 2-D LoDTensor with same shape and same data type
as gt_classes, each element is a flag indicating whether a
groundtruth is crowd.
gt_segms (Variable): This input is a 2D LoDTensor with shape [S, 2] and
float32 data type, it's LoD level is 3.
Usually users do not needs to understand LoD,
The users should return correct data format in reader.
The users should return correct data format in reader.
The LoD[0] represents the ground-truth objects number of
The LoD[0] represents the gt objects number of
each instance. LoD[1] represents the segmentation counts of each
each instance. LoD[1] represents the segmentation counts of each
objects. LoD[2] represents the polygons number of each segmentation.
objects. LoD[2] represents the polygons number of each segmentation.
S the total number of polygons coordinate points. Each element is
S the total number of polygons coordinate points. Each element is
(x, y) coordinate points.
(x, y) coordinate points.
rois
(Variable): A 2-D LoDTensor with shape [R, 4]. R is the total
rois
(Variable): A 2-D LoDTensor with shape [R, 4] and float32 data type
number of RoIs, each element is a bounding box with
float32. R is the total number of RoIs, each element is a bounding
(xmin, ymin, xmax, ymax) format in the range of original image.
box with
(xmin, ymin, xmax, ymax) format in the range of original image.
labels_int32(Variable): A 2-D LoDTensor in shape of [R, 1] with type
labels_int32
(Variable): A 2-D LoDTensor in shape of [R, 1] with type
of int32. R is the same as it in `rois`. Each element repersents
of int32. R is the same as it in `rois`. Each element repersents
a class label of a RoI.
a class label of a RoI.
num_classes(int): Class number.
num_classes
(int): Class number.
resolution(int): Resolution of mask predictions.
resolution
(int): Resolution of mask predictions.
Returns:
Returns:
mask_rois (Variable): A 2D LoDTensor with shape [P, 4]. P is the total
mask_rois (Variable): A 2D LoDTensor with shape [P, 4] and same data
number of sampled RoIs. Each element is a bounding box with
type as `rois`. P is the total number of sampled RoIs. Each element
[xmin, ymin, xmax, ymax] format in range of orignal image size.
is a bounding box with [xmin, ymin, xmax, ymax] format in range of
mask_rois_has_mask_int32 (Variable): A 2D LoDTensor with shape [P, 1],
orignal image size.
each element repersents the output mask RoI index with regard to
to input RoIs.
mask_rois_has_mask_int32 (Variable): A 2D LoDTensor with shape [P, 1]
mask_int32 (Variable): A 2D LoDTensor with shape [P, K * M * M],
and int data type, each element repersents the output mask RoI
K is the classes number and M is the resolution of mask predictions.
index with regard to input RoIs.
Each element repersents the binary mask targets.
mask_int32 (Variable): A 2D LoDTensor with shape [P, K * M * M] and int
data type, K is the classes number and M is the resolution of mask
predictions. Each element repersents the binary mask targets.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
import paddle.fluid as fluid
import paddle.fluid as fluid
im_info = fluid.
layers.data(name="im_info", shape=[
3],
im_info = fluid.
data(name="im_info", shape=[None,
3],
dtype="float32")
dtype="float32")
gt_classes = fluid.
layers.data(name="gt_classes", shape=[
1],
gt_classes = fluid.
data(name="gt_classes", shape=[None,
1],
dtype="float32", lod_level=1)
dtype="float32", lod_level=1)
is_crowd = fluid.
layers.data(name="is_crowd", shape=[
1],
is_crowd = fluid.
data(name="is_crowd", shape=[None,
1],
dtype="float32", lod_level=1)
dtype="float32", lod_level=1)
gt_masks = fluid.
layers.data(name="gt_masks", shape=[
2],
gt_masks = fluid.
data(name="gt_masks", shape=[None,
2],
dtype="float32", lod_level=3)
dtype="float32", lod_level=3)
# rois, roi_labels can be the output of
# rois, roi_labels can be the output of
# fluid.layers.generate_proposal_labels.
# fluid.layers.generate_proposal_labels.
rois = fluid.
layers.data(name="rois", shape=[
4],
rois = fluid.
data(name="rois", shape=[None,
4],
dtype="float32", lod_level=1)
dtype="float32", lod_level=1)
roi_labels = fluid.
layers.data(name="roi_labels", shape=[
1],
roi_labels = fluid.
data(name="roi_labels", shape=[None,
1],
dtype="int32", lod_level=1)
dtype="int32", lod_level=1)
mask_rois, mask_index, mask_int32 = fluid.layers.generate_mask_labels(
mask_rois, mask_index, mask_int32 = fluid.layers.generate_mask_labels(
im_info=im_info,
im_info=im_info,
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
df3025c3
...
@@ -10510,9 +10510,19 @@ def random_crop(x, shape, seed=None):
...
@@ -10510,9 +10510,19 @@ def random_crop(x, shape, seed=None):
${out_comment}
${out_comment}
Examples:
Examples:
>>> import paddle.fluid as fluid
.. code-block:: python
>>> img = fluid.layers.data("img", [3, 256, 256])
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
import paddle.fluid as fluid
img = fluid.data("img", [None, 3, 256, 256])
# cropped_img is [-1, 3, 224, 224]
cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
# cropped_img2 shape: [-1, 2, 224, 224]
# cropped_img2 = fluid.layers.random_crop(img, shape=[2, 224, 224])
# cropped_img3 shape: [-1, 3, 128, 224]
# cropped_img3 = fluid.layers.random_crop(img, shape=[128, 224])
"""
"""
helper = LayerHelper("random_crop", **locals())
helper = LayerHelper("random_crop", **locals())
dtype = x.dtype
dtype = x.dtype
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录