From 324e73fc4af9280d0473949f4978c29ad97b9064 Mon Sep 17 00:00:00 2001 From: dengkaipeng Date: Fri, 22 Feb 2019 04:06:49 +0000 Subject: [PATCH] fix adaptive pool doc.test=develop --- .../operators/detection/yolov3_loss_op.cc | 40 +++--- paddle/fluid/operators/pool_op.cc | 129 +++++++++++------- python/paddle/fluid/layers/detection.py | 19 +-- python/paddle/fluid/layers/nn.py | 66 +++++++-- 4 files changed, 167 insertions(+), 87 deletions(-) diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.cc b/paddle/fluid/operators/detection/yolov3_loss_op.cc index 2a69ad4b5..ab01bdf7c 100644 --- a/paddle/fluid/operators/detection/yolov3_loss_op.cc +++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc @@ -144,34 +144,40 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { "The ignore threshold to ignore confidence loss.") .SetDefault(0.7); AddComment(R"DOC( - This operator generate yolov3 loss by given predict result and ground + This operator generates yolov3 loss based on given predict result and ground truth boxes. The output of previous network is in shape [N, C, H, W], while H and W - should be the same, specify the grid size, each grid point predict given - number boxes, this given number is specified by anchors, it should be - half anchors length, which following will be represented as S. In the - second dimention(the channel dimention), C should be S * (class_num + 5), - class_num is the box categoriy number of source dataset(such as coco), - so in the second dimention, stores 4 box location coordinates x, y, w, h - and confidence score of the box and class one-hot key of each anchor box. + should be the same, H and W specify the grid size, each grid point predict + given number boxes, this given number, which following will be represented as S, + is specified by the number of anchors, In the second dimension(the channel + dimension), C should be equal to S * (class_num + 5), class_num is the object + category number of source dataset(such as 80 in coco dataset), so in the + second(channel) dimension, apart from 4 box location coordinates x, y, w, h, + also includes confidence score of the box and class one-hot key of each anchor box. - While the 4 location coordinates if $$tx, ty, tw, th$$, the box predictions - correspnd to: + Assume the 4 location coordinates are :math:`t_x, t_y, t_w, t_h`, the box predictions + should be as follows: $$ - b_x = \sigma(t_x) + c_x - b_y = \sigma(t_y) + c_y + b_x = \\sigma(t_x) + c_x + $$ + $$ + b_y = \\sigma(t_y) + c_y + $$ + $$ b_w = p_w e^{t_w} + $$ + $$ b_h = p_h e^{t_h} $$ - While $$c_x, c_y$$ is the left top corner of current grid and $$p_w, p_h$$ - is specified by anchors. + In the equation above, :math:`c_x, c_y` is the left top corner of current grid + and :math:`p_w, p_h` is specified by anchors. As for confidence score, it is the logistic regression value of IoU between anchor boxes and ground truth boxes, the score of the anchor box which has - the max IoU should be 1, and if the anchor box has IoU bigger then ignore + the max IoU should be 1, and if the anchor box has IoU bigger than ignore thresh, the confidence score loss of this anchor box will be ignored. Therefore, the yolov3 loss consist of three major parts, box location loss, @@ -186,13 +192,13 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { In order to trade off box coordinate losses between big boxes and small boxes, box coordinate losses will be mutiplied by scale weight, which is - calculated as follow. + calculated as follows. $$ weight_{box} = 2.0 - t_w * t_h $$ - Final loss will be represented as follow. + Final loss will be represented as follows. $$ loss = (loss_{xy} + loss_{wh}) * weight_{box} diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 5399ae556..7e1df3b9e 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -259,31 +259,40 @@ Example: W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1 $$ - For exclusive = true: + For exclusive = false: $$ hstart = i * strides[0] - paddings[0] + $$ + $$ hend = hstart + ksize[0] + $$ + $$ wstart = j * strides[1] - paddings[1] + $$ + $$ wend = wstart + ksize[1] + $$ + $$ Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{ksize[0] * ksize[1]} $$ - For exclusive = false: + + For exclusive = true: $$ hstart = max(0, i * strides[0] - paddings[0]) + $$ + $$ hend = min(H, hstart + ksize[0]) + $$ + $$ wstart = max(0, j * strides[1] - paddings[1]) + $$ + $$ wend = min(W, wstart + ksize[1]) + $$ + $$ Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} $$ - For adaptive = true: - $$ - hstart = floor(i * H_{in} / H_{out}) - hend = ceil((i + 1) * H_{in} / H_{out}) - wstart = floor(j * W_{in} / W_{out}) - wend = ceil((j + 1) * W_{in} / W_{out}) - Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} - $$ )DOC"); } @@ -392,48 +401,68 @@ Example: Output: Out shape: $(N, C, D_{out}, H_{out}, W_{out})$ For ceil_mode = false: - $$ - D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\ - H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\ - W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1 - $$ + $$ + D_{out} = \\frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 + $$ + $$ + H_{out} = \\frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[2]} + 1 + $$ + $$ + W_{out} = \\frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1 + $$ For ceil_mode = true: - $$ - D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0] + strides[0] -1)}{strides[0]} + 1 \\ - H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1] + strides[1] -1)}{strides[1]} + 1 \\ - W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2] + strides[2] -1)}{strides[2]} + 1 - $$ - For exclusive = true: - $$ - dstart = i * strides[0] - paddings[0] - dend = dstart + ksize[0] - hstart = j * strides[1] - paddings[1] - hend = hstart + ksize[1] - wstart = k * strides[2] - paddings[2] - wend = wstart + ksize[2] - Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{ksize[0] * ksize[1] * ksize[2]} - $$ + $$ + D_{out} = \\frac{(D_{in} - ksize[0] + 2 * paddings[0] + strides[0] -1)}{strides[0]} + 1 + $$ + $$ + H_{out} = \\frac{(H_{in} - ksize[1] + 2 * paddings[1] + strides[1] -1)}{strides[1]} + 1 + $$ + $$ + W_{out} = \\frac{(W_{in} - ksize[2] + 2 * paddings[2] + strides[2] -1)}{strides[2]} + 1 + $$ + For exclusive = false: - $$ - dstart = max(0, i * strides[0] - paddings[0]) - dend = min(D, dstart + ksize[0]) - hstart = max(0, j * strides[1] - paddings[1]) - hend = min(H, hstart + ksize[1]) - wstart = max(0, k * strides[2] - paddings[2]) - wend = min(W, wstart + ksize[2]) - Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} - $$ - - For adaptive = true: - $$ - dstart = floor(i * D_{in} / D_{out}) - dend = ceil((i + 1) * D_{in} / D_{out}) - hstart = floor(j * H_{in} / H_{out}) - hend = ceil((j + 1) * H_{in} / H_{out}) - wstart = floor(k * W_{in} / W_{out}) - wend = ceil((k + 1) * W_{in} / W_{out}) - Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} - $$ + $$ + dstart = i * strides[0] - paddings[0] + $$ + $$ + dend = dstart + ksize[0] + $$ + $$ + hstart = j * strides[1] - paddings[1] + $$ + $$ + hend = hstart + ksize[1] + $$ + $$ + wstart = k * strides[2] - paddings[2] + $$ + $$ + wend = wstart + ksize[2] + $$ + $$ + Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{ksize[0] * ksize[1] * ksize[2]} + $$ + + For exclusive = true: + $$ + dstart = max(0, i * strides[0] - paddings[0]) + $$ + $$ + dend = min(D, dstart + ksize[0]) + $$ + $$ + hend = min(H, hstart + ksize[1]) + $$ + $$ + wstart = max(0, k * strides[2] - paddings[2]) + $$ + $$ + wend = min(W, wstart + ksize[2]) + $$ + $$ + Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} + $$ )DOC"); } diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index c983e2a44..0695bccd2 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -545,15 +545,16 @@ def yolov3_loss(x, TypeError: Attr ignore_thresh of yolov3_loss must be a float number Examples: - .. code-block:: python - - x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32') - gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32') - gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32') - anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] - anchors = [0, 1, 2] - loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, class_num=80, anchors=anchors, - ignore_thresh=0.5, downsample_ratio=32) + .. code-block:: python + + x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32') + gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32') + gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32') + anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] + anchor_mask = [0, 1, 2] + loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, gtlabel=gtlabel, anchors=anchors, + anchor_mask=anchor_mask, class_num=80, + ignore_thresh=0.7, downsample_ratio=32) """ helper = LayerHelper('yolov3_loss', **locals()) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 586eac7fd..872c96d60 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -2569,7 +2569,27 @@ def adaptive_pool2d(input, require_index=False, name=None): """ - ${comment} + **Adaptive Pool2d Operator** + The adaptive_pool2d operation calculates the output based on the input, pool_size, + pool_type parameters. Input(X) and output(Out) are in NCHW format, where N is batch + size, C is the number of channels, H is the height of the feature, and W is + the width of the feature. Parameters(pool_size) should contain two elements which + represent height and width, respectively. Also the H and W dimensions of output(Out) + is same as Parameter(pool_size). + + For average adaptive pool2d: + + .. math:: + + hstart &= floor(i * H_{in} / H_{out}) + + hend &= ceil((i + 1) * H_{in} / H_{out}) + + wstart &= floor(j * W_{in} / W_{out}) + + wend &= ceil((j + 1) * W_{in} / W_{out}) + + Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} Args: input (Variable): The input tensor of pooling operator. The format of @@ -2579,8 +2599,8 @@ def adaptive_pool2d(input, pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two integers, (pool_size_Height, pool_size_Width). pool_type: ${pooling_type_comment} - require_index (bool): If true, the index of max pooling point along with outputs. - it cannot be set in average pooling type. + require_index (bool): If true, the index of max pooling point will be returned along + with outputs. It cannot be set in average pooling type. name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. @@ -2661,18 +2681,42 @@ def adaptive_pool3d(input, require_index=False, name=None): """ - ${comment} + **Adaptive Pool3d Operator** + The adaptive_pool3d operation calculates the output based on the input, pool_size, + pool_type parameters. Input(X) and output(Out) are in NCDHW format, where N is batch + size, C is the number of channels, D is the depth of the feature, H is the height of + the feature, and W is the width of the feature. Parameters(pool_size) should contain + three elements which represent height and width, respectively. Also the D, H and W + dimensions of output(Out) is same as Parameter(pool_size). + + For average adaptive pool3d: + + .. math:: + + dstart &= floor(i * D_{in} / D_{out}) + + dend &= ceil((i + 1) * D_{in} / D_{out}) + + hstart &= floor(j * H_{in} / H_{out}) + + hend &= ceil((j + 1) * H_{in} / H_{out}) + + wstart &= floor(k * W_{in} / W_{out}) + + wend &= ceil((k + 1) * W_{in} / W_{out}) + + Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)} Args: input (Variable): The input tensor of pooling operator. The format of - input tensor is NCHW, where N is batch size, C is - the number of channels, H is the height of the - feature, and W is the width of the feature. + input tensor is NCDHW, where N is batch size, C is + the number of channels, D is the depth of the feature, + H is the height of the feature, and W is the width of the feature. pool_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain two integers, (Depth, Height, Width). + it must contain three integers, (Depth, Height, Width). pool_type: ${pooling_type_comment} - require_index (bool): If true, the index of max pooling point along with outputs. - it cannot be set in average pooling type. + require_index (bool): If true, the index of max pooling point will be returned along + with outputs. It cannot be set in average pooling type. name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. @@ -2709,7 +2753,7 @@ def adaptive_pool3d(input, name='data', shape=[3, 32, 32], dtype='float32') pool_out, mask = fluid.layers.adaptive_pool3d( input=data, - pool_size=[3, 3], + pool_size=[3, 3, 3], pool_type='avg') """ if pool_type not in ["max", "avg"]: -- GitLab