gpu test ok

531e7b6f · sweetsky0901 · c75b4538 · 531e7b6f · 531e7b6f · 531e7b6f
4 changed file
--- a/paddle/operators/spp_op.cc
+++ b/paddle/operators/spp_op.cc
@@ -29,28 +29,22 @@ class SppOpMaker : public framework::OpProtoAndCheckerMaker {
              "(Tensor) The output tensor of spp operator."
              "N * M."
              "M = C * H * W");
-    AddAttr<int>("pyramid_height", ">= 1");
+    AddAttr<int>("pyramid_height", "int");
    AddComment(R"DOC(
-        "Input shape: $(N, C_{in}, H_{in}, W_{in})$
+        "Does spatial pyramid pooling on the input image by taking the max,
+        etc. within regions so that the result vector of different sized
+        images are of the same size
+        Input shape: $(N, C_{in}, H_{in}, W_{in})$
        Output shape: $(H_{out}, W_{out})$
        Where
          $$
-            H_{out} = (H_{in}−1) * strides[0] − 2 * paddings[0] + ksize[0] \\
-            W_{out} = (W_{in}−1) * strides[1] − 2 * paddings[1] + ksize[1]
+            H_{out} = N \\
+            W_{out} = ((std::pow(4, pyramid_height) - 1) / (4 - 1)) * C_{in}
          $$
        )DOC");
  }
 };

-int OutputSize(int pyramid_level, int input_size) {
-  int bins = std::pow(2, pyramid_level);
-  int ksize = std::ceil(input_size / static_cast<double>(bins));
-  int padding = (ksize * bins - input_size + 1) / 2;
-  int output_size = (input_size - ksize + 2 * padding) / ksize + 1;
-  // output_size = bins
-  return output_size;
-}
-
 class SppOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
@@ -64,13 +58,7 @@ class SppOp : public framework::OperatorWithKernel {
    int pyramid_height = ctx->Attrs().Get<int>("pyramid_height");
    PADDLE_ENFORCE(in_x_dims.size() == 4,
                   "Spping intput must be of 4-dimensional.");
-    int outlen = 0;
-    for (int p = 0; p < pyramid_height; ++p) {
-      int outh = OutputSize(p, in_x_dims[2]);
-      int outw = OutputSize(p, in_x_dims[3]);
-      int p_level_outlen = outh * outw * in_x_dims[1];
-      outlen += p_level_outlen;
-    }
+    int outlen = ((std::pow(4, pyramid_height) - 1) / (4 - 1)) * in_x_dims[1];
    std::vector<int64_t> output_shape({in_x_dims[0], outlen});
    ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
  }

--- a/paddle/operators/spp_op.cu.cc
+++ b/paddle/operators/spp_op.cu.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+Indicesou may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/spp_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(spp, ops::SppKernel<paddle::platform::GPUPlace, float>,
+                       ops::SppKernel<paddle::platform::GPUPlace, double>);
+REGISTER_OP_GPU_KERNEL(spp_grad,
+                       ops::SppGradKernel<paddle::platform::GPUPlace, float>,
+                       ops::SppGradKernel<paddle::platform::GPUPlace, double>);
--- a/paddle/operators/spp_op.h
+++ b/paddle/operators/spp_op.h
@@ -42,34 +42,36 @@ class SppKernel : public framework::OpKernel<T> {
      std::vector<int> strides({ksize_h, ksize_w});
      std::vector<int> paddings({padding_h, padding_w});
      // pooling output shape
+      framework::Tensor out_level;
      std::vector<int64_t> output_shape_vec({in_x->dims()[0], in_x->dims()[1]});
      output_shape_vec.push_back((input_h - ksize_h + 2 * padding_h) / ksize_h +
                                 1);
      output_shape_vec.push_back((input_w - ksize_w + 2 * padding_w) / ksize_w +
                                 1);
      framework::DDim output_shape(framework::make_ddim(output_shape_vec));
-      // flatten pooling output shape
-      int output_flatten_w = in_x->dims()[1] * bins * bins;
-      std::vector<int64_t> output_flatten_shape_vec(
-          {in_x->dims()[0], output_flatten_w});
-      framework::DDim output_flatten_shape(
-          framework::make_ddim(output_flatten_shape_vec));
-      framework::Tensor out_level;
-      framework::Tensor out_flatten_level;
      out_level.mutable_data<T>(output_shape, context.GetPlace());
      // pooling
      math::Pool2dFunctor<Place, math::MaxPool<T>, T> pool_forward;
      math::MaxPool<T> max_process;
      pool_forward(context.device_context(), *in_x, ksize, strides, paddings,
                   max_process, &out_level);
+      // flatten pooling output shape
+      framework::Tensor out_flatten_level;
+      int output_flatten_w = in_x->dims()[1] * bins * bins;
+      std::vector<int64_t> output_flatten_shape_vec(
+          {in_x->dims()[0], output_flatten_w});
+      framework::DDim output_flatten_shape(
+          framework::make_ddim(output_flatten_shape_vec));
      out_flatten_level.ShareDataWith(out_level);
      out_flatten_level.Resize(output_flatten_shape);
-      auto in_stride = framework::stride(out_flatten_level.dims());
-      const T* src_data = out_flatten_level.data<T>();
-      StridedMemcpy<T>(context.device_context(), src_data, in_stride,
-                       out_flatten_level.dims(), out_stride,
-                       out->data<T>() + output_offset);
-      output_offset += out_flatten_level.dims()[1] * in_stride[1];
+      // concat
+      auto out_flatten_level_stride =
+          framework::stride(out_flatten_level.dims());
+      StridedMemcpy<T>(context.device_context(), out_flatten_level.data<T>(),
+                       out_flatten_level_stride, out_flatten_level.dims(),
+                       out_stride, out->data<T>() + output_offset);
+      output_offset +=
+          out_flatten_level.dims()[1] * out_flatten_level_stride[1];
    }
  }
 };
@@ -83,12 +85,11 @@ class SppGradKernel : public framework::OpKernel<T> {
        context.Input<framework::Tensor>(framework::GradVarName("Out"));
    framework::Tensor* in_x_grad =
        context.Output<framework::Tensor>(framework::GradVarName("X"));
+    int pyramid_height = context.template Attr<int>("pyramid_height");
    auto& device_ctx = context.device_context();
    math::SetConstant<Place, T> zero;
    in_x_grad->mutable_data<T>(context.GetPlace());
    zero(device_ctx, in_x_grad, static_cast<T>(0));
-    int pyramid_height = context.template Attr<int>("pyramid_height");
-    auto outgrad_stride = framework::stride(out_grad->dims());
    auto out_stride = framework::stride(out->dims());
    int input_h = in_x->dims()[2];
    int input_w = in_x->dims()[3];
@@ -102,26 +103,17 @@ class SppGradKernel : public framework::OpKernel<T> {
      std::vector<int> ksize({ksize_h, ksize_w});
      std::vector<int> strides({ksize_h, ksize_w});
      std::vector<int> paddings({padding_h, padding_w});
-      // split outgrad and get flatten
-      std::vector<int64_t> out_shape_vec({in_x->dims()[0], in_x->dims()[1]});
-      out_shape_vec.push_back((input_h - ksize_h + 2 * padding_h) / ksize_h +
-                              1);
-      out_shape_vec.push_back((input_w - ksize_w + 2 * padding_w) / ksize_w +
-                              1);
-      framework::DDim out_shape(framework::make_ddim(out_shape_vec));
+      // split out and outgrad  ...  to flatten
+      framework::Tensor out_flatten_level;
+      framework::Tensor outgrad_flatten_level;
      int out_flatten_w = in_x->dims()[1] * bins * bins;
      std::vector<int64_t> out_flatten_shape_vec(
          {in_x->dims()[0], out_flatten_w});
      framework::DDim out_flatten_shape(
          framework::make_ddim(out_flatten_shape_vec));
-      framework::Tensor out_level;
-      framework::Tensor outgrad_level;
-      framework::Tensor out_flatten_level;
-      framework::Tensor outgrad_flatten_level;
      out_flatten_level.mutable_data<T>(out_flatten_shape, context.GetPlace());
      outgrad_flatten_level.mutable_data<T>(out_flatten_shape,
                                            context.GetPlace());
-
      auto flatten_stride = framework::stride(out_flatten_level.dims());
      // memcpy
      StridedMemcpy<T>(context.device_context(), out->data<T>() + out_offset,
@@ -129,15 +121,24 @@ class SppGradKernel : public framework::OpKernel<T> {
                       out_flatten_level.data<T>());

      StridedMemcpy<T>(context.device_context(),
-                       out_grad->data<T>() + out_offset, outgrad_stride,
+                       out_grad->data<T>() + out_offset, out_stride,
                       outgrad_flatten_level.dims(), flatten_stride,
                       outgrad_flatten_level.data<T>());
      out_offset += out_flatten_level.dims()[1] * out_stride[1];
-      // flatten backward
+      // flatten backward to nchw
+      framework::Tensor out_level;
+      framework::Tensor outgrad_level;
+      std::vector<int64_t> out_shape_vec({in_x->dims()[0], in_x->dims()[1]});
+      out_shape_vec.push_back((input_h - ksize_h + 2 * padding_h) / ksize_h +
+                              1);
+      out_shape_vec.push_back((input_w - ksize_w + 2 * padding_w) / ksize_w +
+                              1);
+      framework::DDim out_shape(framework::make_ddim(out_shape_vec));
      out_level.ShareDataWith(out_flatten_level);
      out_level.Resize(out_shape);
      outgrad_level.ShareDataWith(outgrad_flatten_level);
      outgrad_level.Resize(out_shape);
+      // pooling backward
      math::MaxPool2dGradFunctor<Place, T> pool2d_backward;
      pool2d_backward(context.device_context(), *in_x, *&out_level,
                      *&outgrad_level, ksize, strides, paddings, in_x_grad);

--- a/python/paddle/v2/fluid/tests/test_spp_op.py
+++ b/python/paddle/v2/fluid/tests/test_spp_op.py
@@ -37,11 +37,11 @@ class TestSppOp(OpTest):
        self.check_output()

    def test_check_grad(self):
-        self.check_grad(['X'], 'Out')
+        self.check_grad(['X'], 'Out', max_relative_error=0.05)

    def init_test_case(self):
-        self.shape = [1, 1, 2, 2]
-        self.pyramid_height = 2
+        self.shape = [3, 2, 4, 4]
+        self.pyramid_height = 3


 if __name__ == '__main__':