From 1eac27630047e59cb461846022bada5bf0f490c1 Mon Sep 17 00:00:00 2001
From: sweetsky0901 <work@yq01-idl-gpu-online20.yq01.baidu.com>
Date: Sun, 17 Dec 2017 22:09:29 +0800
Subject: [PATCH] add spp avg

---
 paddle/operators/spp_op.cc                  |  5 +++
 paddle/operators/spp_op.h                   | 34 ++++++++++++++++-----
 python/paddle/v2/fluid/tests/test_spp_op.py | 23 +++++++++++---
 3 files changed, 51 insertions(+), 11 deletions(-)
diff --git a/paddle/operators/spp_op.cc b/paddle/operators/spp_op.cc
index c4bd4f5ab3a..b1807b62616 100644
--- a/paddle/operators/spp_op.cc
+++ b/paddle/operators/spp_op.cc
@@ -30,6 +30,11 @@ class SppOpMaker : public framework::OpProtoAndCheckerMaker {
               "N * M."
               "M = C * H * W");
     AddAttr<int>("pyramid_height", "(int), multi level pooling");
+    AddAttr<std::string>(
+        "pooling_type",
+        "(string), pooling type, can be \"max\" for max-pooling "
+        "and \"avg\" for average-pooling.")
+        .InEnum({"max", "avg"});
     AddComment(R"DOC(
         "With spatial pyramid pooling, the input image can
         be of any sizes. This not only allows arbitrary aspect
diff --git a/paddle/operators/spp_op.h b/paddle/operators/spp_op.h
index 16510cb8260..f35b305d02c 100644
--- a/paddle/operators/spp_op.h
+++ b/paddle/operators/spp_op.h
@@ -27,6 +27,8 @@ class SppKernel : public framework::OpKernel<T> {
     const framework::Tensor* in_x = context.Input<framework::Tensor>("X");
     auto* out = context.Output<framework::Tensor>("Out");
     int pyramid_height = context.template Attr<int>("pyramid_height");
+    std::string pooling_type =
+        context.template Attr<std::string>("pooling_type");
     out->mutable_data<T>(context.GetPlace());
     auto out_stride = framework::stride(out->dims());
     int input_h = in_x->dims()[2];
@@ -48,10 +50,17 @@ class SppKernel : public framework::OpKernel<T> {
       framework::DDim output_shape(framework::make_ddim(output_shape_vec));
       out_level.mutable_data<T>(output_shape, context.GetPlace());
       // pooling
-      math::Pool2dFunctor<DeviceContext, math::MaxPool<T>, T> pool_forward;
-      math::MaxPool<T> max_process;
-      pool_forward(context.template device_context<DeviceContext>(), *in_x,
-                   kernel_size, strides, paddings, max_process, &out_level);
+      if (pooling_type == "max") {
+        math::Pool2dFunctor<DeviceContext, math::MaxPool<T>, T> pool_forward;
+        math::MaxPool<T> max_process;
+        pool_forward(context.template device_context<DeviceContext>(), *in_x,
+                     kernel_size, strides, paddings, max_process, &out_level);
+      } else if (pooling_type == "avg") {
+        math::Pool2dFunctor<DeviceContext, math::AvgPool<T>, T> pool_forward;
+        math::AvgPool<T> avg_process;
+        pool_forward(context.template device_context<DeviceContext>(), *in_x,
+                     kernel_size, strides, paddings, avg_process, &out_level);
+      }
       // flatten pooling output shape
       int output_flatten_w = in_x->dims()[1] * bins * bins;
       std::vector<int64_t> output_flatten_shape_vec(
@@ -79,6 +88,8 @@ class SppGradKernel : public framework::OpKernel<T> {
     framework::Tensor* in_x_grad =
         context.Output<framework::Tensor>(framework::GradVarName("X"));
     int pyramid_height = context.template Attr<int>("pyramid_height");
+    std::string pooling_type =
+        context.template Attr<std::string>("pooling_type");
     auto& device_ctx = context.template device_context<DeviceContext>();
     math::SetConstant<DeviceContext, T> zero;
     in_x_grad->mutable_data<T>(context.GetPlace());
@@ -130,10 +141,19 @@ class SppGradKernel : public framework::OpKernel<T> {
       outgrad_level.ShareDataWith(outgrad_level);
       outgrad_level.Resize(out_shape);
       // pooling backward
-      math::MaxPool2dGradFunctor<DeviceContext, T> pool2d_backward;
-      pool2d_backward(context.template device_context<DeviceContext>(), *in_x,
+      if (pooling_type == "max") {
+        math::MaxPool2dGradFunctor<DeviceContext, T> pool2d_backward;
+        pool2d_backward(context.template device_context<DeviceContext>(), *in_x,
+                        *&out_level, *&outgrad_level, kernel_size, strides,
+                        paddings, in_x_grad);
+      } else if (pooling_type == "avg") {
+        math::Pool2dGradFunctor<DeviceContext, math::AvgPoolGrad<T>, T>
+            pool_backward;
+        math::AvgPoolGrad<T> avg_process;
+        pool_backward(context.template device_context<DeviceContext>(), *in_x,
                       *&out_level, *&outgrad_level, kernel_size, strides,
-                      paddings, in_x_grad);
+                      paddings, avg_process, in_x_grad);
+      }
     }
   }
 };
diff --git a/python/paddle/v2/fluid/tests/test_spp_op.py b/python/paddle/v2/fluid/tests/test_spp_op.py
index b57f4a795dc..007723f0e35 100644
--- a/python/paddle/v2/fluid/tests/test_spp_op.py
+++ b/python/paddle/v2/fluid/tests/test_spp_op.py
@@ -2,6 +2,7 @@ import unittest
 import numpy as np
 from op_test import OpTest
 from test_pool2d_op import max_pool2D_forward_naive
+from test_pool2d_op import avg_pool2D_forward_naive
 
 
 class TestSppOp(OpTest):
@@ -24,8 +25,8 @@ class TestSppOp(OpTest):
                                      bins.astype("double")).astype("int32")
             padding[1] = (
                 (kernel_size[1] * bins - wsize + 1) / 2).astype("int32")
-            out_level = max_pool2D_forward_naive(input, kernel_size,
-                                                 kernel_size, padding)
+            out_level = self.pool2D_forward_naive(input, kernel_size,
+                                                  kernel_size, padding)
             out_level_flatten.append(
                 out_level.reshape(nsize, bins * bins * csize))
             if i == 0:
@@ -34,7 +35,10 @@ class TestSppOp(OpTest):
                 output = np.concatenate((output, out_level_flatten[i]), 1)
         # output = np.concatenate(out_level_flatten.tolist(), 0);
         self.inputs = {'X': input.astype('float32'), }
-        self.attrs = {'pyramid_height': self.pyramid_height}
+        self.attrs = {
+            'pyramid_height': self.pyramid_height,
+            'pooling_type': self.pool_type
+        }
 
         self.outputs = {'Out': output.astype('float32')}
 
@@ -42,11 +46,22 @@ class TestSppOp(OpTest):
         self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(['X'], 'Out', max_relative_error=0.05)
+        if self.pool_type != "avg":
+            self.check_grad(['X'], 'Out', max_relative_error=0.05)
 
     def init_test_case(self):
         self.shape = [3, 2, 4, 4]
         self.pyramid_height = 3
+        self.pool2D_forward_naive = max_pool2D_forward_naive
+        self.pool_type = "max"
+
+
+class TestCase2(TestSppOp):
+    def init_test_case(self):
+        self.shape = [3, 2, 4, 4]
+        self.pyramid_height = 3
+        self.pool2D_forward_naive = avg_pool2D_forward_naive
+        self.pool_type = "avg"
 
 
 if __name__ == '__main__':
-- 
GitLab