From 0099e549244048472b970770156c494a4fe38845 Mon Sep 17 00:00:00 2001 From: chengjuntao <18222160892@163.com> Date: Wed, 25 Sep 2019 16:31:57 +0800 Subject: [PATCH] refine deformable roi pooling doc (#19944) * refine doc, test=develop, test=document_preview --- paddle/fluid/API.spec | 2 +- python/paddle/fluid/layers/nn.py | 106 +++++++++++++++++++++---------- 2 files changed, 75 insertions(+), 33 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 9080a53ab39..1fd4880c162 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -294,7 +294,7 @@ paddle.fluid.layers.where (ArgSpec(args=['condition'], varargs=None, keywords=No paddle.fluid.layers.sign (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'fa2f457a81714430c5677c2d68744728')) paddle.fluid.layers.deformable_conv (ArgSpec(args=['input', 'offset', 'mask', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'deformable_groups', 'im2col_step', 'param_attr', 'bias_attr', 'modulated', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, None, None, True, None)), ('document', '335193ac57d41d7199f8d26d30c069b1')) paddle.fluid.layers.unfold (ArgSpec(args=['x', 'kernel_sizes', 'strides', 'paddings', 'dilations', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None)), ('document', '3f884662ad443d9ecc2b3734b4f61ad6')) -paddle.fluid.layers.deformable_roi_pooling (ArgSpec(args=['input', 'rois', 'trans', 'no_trans', 'spatial_scale', 'group_size', 'pooled_height', 'pooled_width', 'part_size', 'sample_per_part', 'trans_std', 'position_sensitive', 'name'], varargs=None, keywords=None, defaults=(False, 1.0, [1, 1], 1, 1, None, 1, 0.1, False, None)), ('document', '99c03e3f249e36854f87dedaa17c8f35')) +paddle.fluid.layers.deformable_roi_pooling (ArgSpec(args=['input', 'rois', 'trans', 'no_trans', 'spatial_scale', 'group_size', 'pooled_height', 'pooled_width', 'part_size', 'sample_per_part', 'trans_std', 'position_sensitive', 'name'], varargs=None, keywords=None, defaults=(False, 1.0, [1, 1], 1, 1, None, 1, 0.1, False, None)), ('document', '47c5d1c890b36fa00ff3285c9398f613')) paddle.fluid.layers.filter_by_instag (ArgSpec(args=['ins', 'ins_tag', 'filter_tag', 'is_lod'], varargs=None, keywords=None, defaults=None), ('document', '7703a2088af8de4128b143ff1164ca4a')) paddle.fluid.layers.shard_index (ArgSpec(args=['input', 'index_num', 'nshards', 'shard_id', 'ignore_value'], varargs=None, keywords=None, defaults=(-1,)), ('document', 'c4969dd6bf164f9e6a90414ea4f4e5ad')) paddle.fluid.layers.hard_swish (ArgSpec(args=['x', 'threshold', 'scale', 'offset', 'name'], varargs=None, keywords=None, defaults=(6.0, 6.0, 3.0, None)), ('document', '6a5152a7015c62cb8278fc24cb456459')) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index ebd81515ade..f4f40cf4a82 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -14197,43 +14197,85 @@ def deformable_roi_pooling(input, position_sensitive=False, name=None): """ - Deformable PSROI Pooling Layer + Deformable ROI Pooling Layer + + Performs deformable region-of-interest pooling on inputs. As described + in `Deformable Convolutional Networks `_, it will get offset for each bin after + roi pooling so that pooling at correct region. Batch_size will change to the number of region bounding boxes after deformable_roi_pooling. + + The operation has three steps: - Args: - input (Variable):The input of Deformable PSROIPooling.The shape of input tensor is - [N,C,H,W]. Where N is batch size,C is number of input channels,H - is height of the feature, and W is the width of the feature. - rois (Variable): ROIs (Regions of Interest) to pool over.It should be - a 2-D LoDTensor of shape (num_rois, 4), the lod level - is 1. Given as [[x1, y1, x2, y2], ...], (x1, y1) is - the top left coordinates, and (x2, y2) is the bottom - right coordinates. - trans (Variable): Offset of features on ROIs while pooling.The format is NCHW, where - N is number of ROIs, C is number of channels, which indicate the offset distance - in the x and y directions, H is pooled height, and W is pooled width. - no_trans (bool): Whether to add offset to get new value or not while roi pooling, which - value is True or False. Default: False. - spatial_scale (float): Ratio of input feature map height (or width) to raw image height (or width). - Equals the reciprocal of total stride in convolutional layers, Default: 1.0. - group_size (list|tuple): The number of groups which input channels are divided.(eg.number of input channels - is k1*k2*(C+1), which k1 and k2 are group width and height and C+1 is number of output - chanels. eg.(4, 6), which 4 is height of group and 6 is width of group. Default: [1, 1]. - pooled_height (integer): The pooled output height. Default: 1. - pooled_width (integer): The pooled output width. Default: 1. - part_size (list|tuple): The height and width of offset, eg.(4, 6), which height is 4 and width is 6, Default: - if None, default value is [pooled_height, pooled_width]. - sample_per_part (integer): The number of samples in each bin. Default: 1. - trans_std (float): Coefficient of offset. Default: 0.1. - position_sensitive (bool): Whether to choose deformable psroi pooling mode or not. Default: False. - name (str): Name of layer. Default: None. - Returns: - Variable: The tensor variable storing the deformable psroi pooling \ - result. - + 1. Dividing each region proposal into equal-sized sections with the pooled_width and pooled_height. + + 2. Add offset to pixel in ROI to get new location and the new value which are computed directly through + bilinear interpolation with four nearest pixel. + + 3. Sample several points in each bin to get average values as output. + + + Args: + input (Variable):The input of deformable roi pooling and it is tensor which value type is float32. The shape of input is + [N, C, H, W]. Where N is batch size, C is number of input channels, + H is height of the feature, and W is the width of the feature. + rois (Variable): ROIs (Regions of Interest) with type float32 to pool over. It should be + a 2-D LoDTensor of shape (num_rois, 4), and the lod level + is 1. Given as [[x1, y1, x2, y2], ...], (x1, y1) is + the top left coordinates, and (x2, y2) is the bottom + right coordinates, which value type is float32. + trans (Variable): Offset of features on ROIs while pooling which value type is float32. The format is [N, C, H, W], where + N is number of ROIs, C is number of channels, which indicate the offset distance + in the x and y directions, H is pooled height, and W is pooled width. + no_trans (bool): Whether to add offset to get new value or not while roi pooling, which value with type bool is True or False. + If value is True, no offset will be added in operation. Default: False. + spatial_scale (float): Ratio of input feature map height (or width) to raw image height (or width), which value type is float32. + Equals the reciprocal of total stride in convolutional layers, Default: 1.0. + group_size (list|tuple): The number of groups which input channels are divided and the input is list or tuple, which value type is int32. (eg.number of input channels + is k1 * k2 * (C + 1), which k1 and k2 are group width and height and C+1 is number of output + chanels.) eg.(4, 6), which 4 is height of group and 6 is width of group. Default: [1, 1]. + pooled_height (int): The pooled output height which value type is int32. Default: 1. + pooled_width (int): The pooled output width which value type is int32. Default: 1. + part_size (list|tuple): The height and width of offset which values in list or tuple is int32, eg.(4, 6), which height is 4 and width is 6, and values always equal to pooled_height \ + and pooled_width. Default: if None, default value is [pooled_height, pooled_width]. + sample_per_part (int): The number of samples in each bin which value type is int32. If value is bigger, it will consume more performance. Default: 1. + trans_std (float): Coefficient of offset which value type is float32. It controls weight of offset. Default: 0.1. + position_sensitive (bool): Whether to choose deformable psroi pooling mode or not, and value type is bool(True or False). If value is False, input dimension equals to output dimension. \ + If value is True, input dimension shoule be output dimension * pooled_height * pooled_width. Default: False. + name (str|None): Name of layer. Default: None. + Returns: + Variable: Output of deformable roi pooling is that, if position sensitive is False, input dimension equals to output dimension. If position sensitive is True,\ + input dimension should be the result of output dimension divided by pooled height and pooled width. Examples: .. code-block:: python + # position_sensitive=True + import paddle.fluid as fluid + input = fluid.layers.data(name="input", + shape=[2, 192, 64, 64], + dtype='float32', + append_batch_size=False) + rois = fluid.layers.data(name="rois", + shape=[4], + dtype='float32', + lod_level=1) + trans = fluid.layers.data(name="trans", + shape=[2, 384, 64, 64], + dtype='float32', + append_batch_size=False) + x = fluid.layers.nn.deformable_roi_pooling(input=input, + rois=rois, + trans=trans, + no_trans=False, + spatial_scale=1.0, + group_size=(1, 1), + pooled_height=8, + pooled_width=8, + part_size=(8, 8), + sample_per_part=4, + trans_std=0.1, + position_sensitive=True) + + # position_sensitive=False import paddle.fluid as fluid input = fluid.layers.data(name="input", shape=[2, 192, 64, 64], -- GitLab