From 144016fcfc9e3d3665b13297b4c6b7f4aee2ff41 Mon Sep 17 00:00:00 2001
From: dengkaipeng <dengkaipeng@baidu.com>
Date: Fri, 22 Feb 2019 19:32:44 +0800
Subject: [PATCH] fix adaptive_pool and yolov3_loss. test=develop

---
 .../operators/detection/yolov3_loss_op.cc     |  34 +++--
 paddle/fluid/operators/pool_op.cc             | 125 ++++++++++--------
 python/paddle/fluid/layers/detection.py       |  19 +--
 python/paddle/fluid/layers/nn.py              |  32 +++++
 4 files changed, 131 insertions(+), 79 deletions(-)

diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.cc b/paddle/fluid/operators/detection/yolov3_loss_op.cc
index 2a69ad4b53..59ca65a5a1 100644
--- a/paddle/fluid/operators/detection/yolov3_loss_op.cc
+++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc
@@ -144,30 +144,36 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
                    "The ignore threshold to ignore confidence loss.")
         .SetDefault(0.7);
     AddComment(R"DOC(
-         This operator generate yolov3 loss by given predict result and ground
+         This operator generates yolov3 loss based on given predict result and ground
          truth boxes.
          
          The output of previous network is in shape [N, C, H, W], while H and W
-         should be the same, specify the grid size, each grid point predict given
-         number boxes, this given number is specified by anchors, it should be 
-         half anchors length, which following will be represented as S. In the 
-         second dimention(the channel dimention), C should be S * (class_num + 5),
-         class_num is the box categoriy number of source dataset(such as coco), 
-         so in the second dimention, stores 4 box location coordinates x, y, w, h 
-         and confidence score of the box and class one-hot key of each anchor box.
+         should be the same, H and W specify the grid size, each grid point predict 
+         given number boxes, this given number, which following will be represented as S,
+         is specified by the number of anchors, In the second dimension(the channel
+         dimension), C should be equal to S * (class_num + 5), class_num is the object 
+         category number of source dataset(such as 80 in coco dataset), so in the 
+         second(channel) dimension, apart from 4 box location coordinates x, y, w, h, 
+         also includes confidence score of the box and class one-hot key of each anchor box.
 
-         While the 4 location coordinates if $$tx, ty, tw, th$$, the box predictions
-         correspnd to:
+         Assume the 4 location coordinates is :math:`t_x, t_y, t_w, t_h`, the box predictions
+         should be following:
 
          $$
-         b_x = \sigma(t_x) + c_x
-         b_y = \sigma(t_y) + c_y
+         b_x = \\sigma(t_x) + c_x
+         $$
+         $$
+         b_y = \\sigma(t_y) + c_y
+         $$
+         $$
          b_w = p_w e^{t_w}
+         $$
+         $$
          b_h = p_h e^{t_h}
          $$
 
-         While $$c_x, c_y$$ is the left top corner of current grid and $$p_w, p_h$$
-         is specified by anchors.
+         In the equaltion above, :math:`c_x, c_y` is the left top corner of current grid
+         and :math:`p_w, p_h` is specified by anchors.
 
          As for confidence score, it is the logistic regression value of IoU between
          anchor boxes and ground truth boxes, the score of the anchor box which has 
diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc
index 1579c4e994..7e1df3b9ef 100644
--- a/paddle/fluid/operators/pool_op.cc
+++ b/paddle/fluid/operators/pool_op.cc
@@ -260,34 +260,39 @@ Example:
        $$
 
   For exclusive = false:
-
-  ..  math::
-
-       hstart &= i * strides[0] - paddings[0] \\
-       hend &= hstart + ksize[0] \\
-       wstart &= j * strides[1] - paddings[1] \\
-       wend &= wstart + ksize[1] \\
-       Output(i ,j) &= \frac{sum(Input[hstart:hend, wstart:wend])}{ksize[0] * ksize[1]}
+       $$
+       hstart = i * strides[0] - paddings[0]
+       $$
+       $$
+       hend = hstart + ksize[0]
+       $$
+       $$
+       wstart = j * strides[1] - paddings[1]
+       $$
+       $$
+       wend = wstart + ksize[1]
+       $$
+       $$
+       Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{ksize[0] * ksize[1]}
+       $$
 
   For exclusive = true:
+       $$
+       hstart = max(0, i * strides[0] - paddings[0])
+       $$
+       $$
+       hend = min(H, hstart + ksize[0])
+       $$
+       $$
+       wstart = max(0, j * strides[1] - paddings[1])
+       $$
+       $$
+       wend = min(W, wstart + ksize[1])
+       $$
+       $$
+       Output(i ,j) = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
+       $$
 
-  ..  math::
-
-       hstart &= max(0, i * strides[0] - paddings[0]) \\
-       hend &= min(H, hstart + ksize[0]) \\
-       wstart &= max(0, j * strides[1] - paddings[1]) \\
-       wend &= min(W, wstart + ksize[1]) \\
-       Output(i ,j) &= \frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
-
-  For adaptive = true:
-
-  ..  math::
-
-       hstart &= floor(i * H_{in} / H_{out}) \\
-       hend &= ceil((i + 1) * H_{in} / H_{out}) \\
-       wstart &= floor(j * W_{in} / W_{out}) \\
-       wend &= ceil((j + 1) * W_{in} / W_{out}) \\
-       Output(i ,j) &= \frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
 )DOC");
 }
 
@@ -417,39 +422,47 @@ Example:
        $$
 
   For exclusive = false:
-
-  ..  math::
-
-      dstart &= i * strides[0] - paddings[0] \\
-      dend &= dstart + ksize[0] \\
-      hstart &= j * strides[1] - paddings[1] \\
-      hend &= hstart + ksize[1] \\
-      wstart &= k * strides[2] - paddings[2] \\
-      wend &= wstart + ksize[2] \\
-      Output(i ,j, k) &= \frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{ksize[0] * ksize[1] * ksize[2]}
+       $$
+       dstart = i * strides[0] - paddings[0]
+       $$
+       $$
+       dend = dstart + ksize[0]
+       $$
+       $$
+       hstart = j * strides[1] - paddings[1]
+       $$
+       $$
+       hend = hstart + ksize[1]
+       $$
+       $$
+       wstart = k * strides[2] - paddings[2]
+       $$
+       $$
+       wend = wstart + ksize[2]
+       $$
+       $$
+       Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{ksize[0] * ksize[1] * ksize[2]}
+       $$
 
   For exclusive = true:
-
-  ..  math::
-
-      dstart &= max(0, i * strides[0] - paddings[0]) \\
-      dend &= min(D, dstart + ksize[0]) \\
-      hend &= min(H, hstart + ksize[1]) \\
-      wstart &= max(0, k * strides[2] - paddings[2]) \\
-      wend &= min(W, wstart + ksize[2]) \\
-      Output(i ,j, k) &= \frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
-
-  For adaptive = true:
-
-  ..  math::
-
-      dstart &= floor(i * D_{in} / D_{out}) \\
-      dend &= ceil((i + 1) * D_{in} / D_{out}) \\
-      hstart &= floor(j * H_{in} / H_{out}) \\
-      hend &= ceil((j + 1) * H_{in} / H_{out}) \\
-      wstart &= floor(k * W_{in} / W_{out}) \\
-      wend &= ceil((k + 1) * W_{in} / W_{out}) \\
-      Output(i ,j, k) &= \frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
+       $$
+       dstart = max(0, i * strides[0] - paddings[0])
+       $$
+       $$
+       dend = min(D, dstart + ksize[0])
+       $$
+       $$
+       hend = min(H, hstart + ksize[1])
+       $$
+       $$
+       wstart = max(0, k * strides[2] - paddings[2])
+       $$
+       $$
+       wend = min(W, wstart + ksize[2])
+       $$
+       $$
+       Output(i ,j, k) = \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
+       $$
 
 )DOC");
 }
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index 3b43ae0b9c..61a7d4f31d 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -545,15 +545,16 @@ def yolov3_loss(x,
         TypeError: Attr ignore_thresh of yolov3_loss must be a float number
 
     Examples:
-    .. code-block:: python
-
-        x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32')
-        gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32')
-        gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32')
-        anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
-        anchors = [0, 1, 2]
-        loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, class_num=80, anchors=anchors, 
-                                        ignore_thresh=0.5, downsample_ratio=32)
+      .. code-block:: python
+
+          x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32')
+          gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32')
+          gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32')
+          anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
+          anchor_mask = [0, 1, 2]
+          loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, gtlabel=gtlabel, anchors=anchors, 
+                                          anchor_mask=anchor_mask, class_num=80,
+                                          ignore_thresh=0.7, downsample_ratio=32)
     """
     helper = LayerHelper('yolov3_loss', **locals())
 
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 1ae9f6fc3b..7795090eef 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -2577,6 +2577,20 @@ def adaptive_pool2d(input,
     represent height and width, respectively. Also the H and W dimensions of output(Out)
     is same as Parameter(pool_size).
 
+    For average adaptive pool2d:
+
+    ..  math::
+
+       hstart &= floor(i * H_{in} / H_{out})
+
+       hend &= ceil((i + 1) * H_{in} / H_{out})
+
+       wstart &= floor(j * W_{in} / W_{out})
+
+       wend &= ceil((j + 1) * W_{in} / W_{out})
+
+       Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
+
     Args:
         input (Variable): The input tensor of pooling operator. The format of
                           input tensor is NCHW, where N is batch size, C is
@@ -2675,6 +2689,24 @@ def adaptive_pool3d(input,
     three elements which represent height and width, respectively. Also the D, H and W
     dimensions of output(Out) is same as Parameter(pool_size).
 
+    For average adaptive pool3d:
+
+    ..  math::
+
+      dstart &= floor(i * D_{in} / D_{out})
+
+      dend &= ceil((i + 1) * D_{in} / D_{out})
+
+      hstart &= floor(j * H_{in} / H_{out})
+
+      hend &= ceil((j + 1) * H_{in} / H_{out})
+
+      wstart &= floor(k * W_{in} / W_{out})
+
+      wend &= ceil((k + 1) * W_{in} / W_{out})
+
+      Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
+
     Args:
         input (Variable): The input tensor of pooling operator. The format of
                           input tensor is NCDHW, where N is batch size, C is
-- 
GitLab