diff --git a/paddle/operators/spp_op.cc b/paddle/operators/spp_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..b1807b62616b80ea8a9e48409e0760c1c7b36a38 --- /dev/null +++ b/paddle/operators/spp_op.cc @@ -0,0 +1,99 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/spp_op.h" +namespace paddle { +namespace operators { + +class SppOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SppOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "X", + "(Tensor) The input tensor of spp operator. " + "The format of input tensor is NCHW. Where N is batch size, C is the " + "number of channels, H and W is the height and width of feature."); + AddOutput("Out", + "(Tensor) The output tensor of spp operator." + "N * M." + "M = C * H * W"); + AddAttr("pyramid_height", "(int), multi level pooling"); + AddAttr( + "pooling_type", + "(string), pooling type, can be \"max\" for max-pooling " + "and \"avg\" for average-pooling.") + .InEnum({"max", "avg"}); + AddComment(R"DOC( + "With spatial pyramid pooling, the input image can + be of any sizes. This not only allows arbitrary aspect + ratios, but also allows arbitrary scales. We can resize + the input image to any scale (e.g., min(w, h)=180, 224, + ...) and apply the same deep network. When the + input image is at different scales, the network (with + the same filter sizes) will extract features at different + scales. The scales play important roles in traditional + methods. + Input shape: $(N, C_{in}, H_{in}, W_{in})$ + Output shape: $(H_{out}, W_{out})$ + Where + $$ + H_{out} = N \\ + W_{out} = (((4^pyramid_height) - 1) / (4 - 1))$ * C_{in} + $$ + paper https://arxiv.org/pdf/1406.4729v4.pdf + )DOC"); + } +}; + +class SppOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of SppOp" + "should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of SppOp should not be null."); + auto in_x_dims = ctx->GetInputDim("X"); + int pyramid_height = ctx->Attrs().Get("pyramid_height"); + PADDLE_ENFORCE(in_x_dims.size() == 4, + "Spping intput must be of 4-dimensional."); + int outlen = ((std::pow(4, pyramid_height) - 1) / (4 - 1)) * in_x_dims[1]; + std::vector output_shape({in_x_dims[0], outlen}); + ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); + } +}; + +class SppOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null."); + PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")), + "Input(X@GRAD) should not be null."); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(spp, ops::SppOp, ops::SppOpMaker, spp_grad, ops::SppOpGrad); +REGISTER_OP_CPU_KERNEL( + spp, ops::SppKernel, + ops::SppKernel); +REGISTER_OP_CPU_KERNEL( + spp_grad, ops::SppGradKernel, + ops::SppGradKernel); diff --git a/paddle/operators/spp_op.cu.cc b/paddle/operators/spp_op.cu.cc new file mode 100644 index 0000000000000000000000000000000000000000..761e4d6c4a9639898ba548d56bed3c8817720c1b --- /dev/null +++ b/paddle/operators/spp_op.cu.cc @@ -0,0 +1,23 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/spp_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + spp, ops::SppKernel, + ops::SppKernel); +REGISTER_OP_CUDA_KERNEL( + spp_grad, ops::SppGradKernel, + ops::SppGradKernel); diff --git a/paddle/operators/spp_op.h b/paddle/operators/spp_op.h new file mode 100644 index 0000000000000000000000000000000000000000..f35b305d02c73bcae6e72b8afa5ce55148ea98b8 --- /dev/null +++ b/paddle/operators/spp_op.h @@ -0,0 +1,161 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +Indicesou may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" +#include "paddle/operators/math/pooling.h" +#include "paddle/operators/strided_memcpy.h" + +namespace paddle { +namespace operators { +template +class SppKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const framework::Tensor* in_x = context.Input("X"); + auto* out = context.Output("Out"); + int pyramid_height = context.template Attr("pyramid_height"); + std::string pooling_type = + context.template Attr("pooling_type"); + out->mutable_data(context.GetPlace()); + auto out_stride = framework::stride(out->dims()); + int input_h = in_x->dims()[2]; + int input_w = in_x->dims()[3]; + size_t output_offset = 0; + for (int p = 0; p < pyramid_height; ++p) { + int bins = std::pow(2, p); + int kernel_size_h = std::ceil(input_h / static_cast(bins)); + int kernel_size_w = std::ceil(input_w / static_cast(bins)); + int padding_h = (kernel_size_h * bins - input_h + 1) / 2; + int padding_w = (kernel_size_w * bins - input_w + 1) / 2; + std::vector kernel_size({kernel_size_h, kernel_size_w}); + std::vector strides({kernel_size_h, kernel_size_w}); + std::vector paddings({padding_h, padding_w}); + // pooling output shape + framework::Tensor out_level; + std::vector output_shape_vec( + {in_x->dims()[0], in_x->dims()[1], bins, bins}); + framework::DDim output_shape(framework::make_ddim(output_shape_vec)); + out_level.mutable_data(output_shape, context.GetPlace()); + // pooling + if (pooling_type == "max") { + math::Pool2dFunctor, T> pool_forward; + math::MaxPool max_process; + pool_forward(context.template device_context(), *in_x, + kernel_size, strides, paddings, max_process, &out_level); + } else if (pooling_type == "avg") { + math::Pool2dFunctor, T> pool_forward; + math::AvgPool avg_process; + pool_forward(context.template device_context(), *in_x, + kernel_size, strides, paddings, avg_process, &out_level); + } + // flatten pooling output shape + int output_flatten_w = in_x->dims()[1] * bins * bins; + std::vector output_flatten_shape_vec( + {in_x->dims()[0], output_flatten_w}); + framework::DDim output_flatten_shape( + framework::make_ddim(output_flatten_shape_vec)); + out_level.Resize(output_flatten_shape); + // concat + auto out_level_stride = framework::stride(out_level.dims()); + StridedMemcpy(context.template device_context(), + out_level.data(), out_level_stride, out_level.dims(), + out_stride, out->data() + output_offset); + output_offset += out_level.dims()[1] * out_level_stride[1]; + } + } +}; +template +class SppGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const framework::Tensor* in_x = context.Input("X"); + const framework::Tensor* out = context.Input("Out"); + const framework::Tensor* out_grad = + context.Input(framework::GradVarName("Out")); + framework::Tensor* in_x_grad = + context.Output(framework::GradVarName("X")); + int pyramid_height = context.template Attr("pyramid_height"); + std::string pooling_type = + context.template Attr("pooling_type"); + auto& device_ctx = context.template device_context(); + math::SetConstant zero; + in_x_grad->mutable_data(context.GetPlace()); + zero(device_ctx, in_x_grad, static_cast(0)); + auto out_stride = framework::stride(out->dims()); + int input_h = in_x->dims()[2]; + int input_w = in_x->dims()[3]; + size_t out_offset = 0; + for (int p = 0; p < pyramid_height; ++p) { + int bins = std::pow(2, p); + int kernel_size_h = std::ceil(input_h / static_cast(bins)); + int kernel_size_w = std::ceil(input_w / static_cast(bins)); + int padding_h = (kernel_size_h * bins - input_h + 1) / 2; + int padding_w = (kernel_size_w * bins - input_w + 1) / 2; + std::vector kernel_size({kernel_size_h, kernel_size_w}); + std::vector strides({kernel_size_h, kernel_size_w}); + std::vector paddings({padding_h, padding_w}); + // split out and outgrad ... to flatten + framework::Tensor out_level; + framework::Tensor outgrad_level; + int out_flatten_w = in_x->dims()[1] * bins * bins; + std::vector out_flatten_shape_vec( + {in_x->dims()[0], out_flatten_w}); + framework::DDim out_flatten_shape( + framework::make_ddim(out_flatten_shape_vec)); + out_level.mutable_data(out_flatten_shape, context.GetPlace()); + outgrad_level.mutable_data(out_flatten_shape, context.GetPlace()); + auto flatten_stride = framework::stride(out_level.dims()); + // memcpy + StridedMemcpy(context.template device_context(), + out->data() + out_offset, out_stride, + out_level.dims(), flatten_stride, out_level.data()); + + StridedMemcpy(context.template device_context(), + out_grad->data() + out_offset, out_stride, + outgrad_level.dims(), flatten_stride, + outgrad_level.data()); + out_offset += out_level.dims()[1] * out_stride[1]; + // flatten backward to nchw + + std::vector out_shape_vec({in_x->dims()[0], in_x->dims()[1]}); + out_shape_vec.push_back( + (input_h - kernel_size_h + 2 * padding_h) / kernel_size_h + 1); + out_shape_vec.push_back( + (input_w - kernel_size_w + 2 * padding_w) / kernel_size_w + 1); + framework::DDim out_shape(framework::make_ddim(out_shape_vec)); + out_level.ShareDataWith(out_level); + out_level.Resize(out_shape); + outgrad_level.ShareDataWith(outgrad_level); + outgrad_level.Resize(out_shape); + // pooling backward + if (pooling_type == "max") { + math::MaxPool2dGradFunctor pool2d_backward; + pool2d_backward(context.template device_context(), *in_x, + *&out_level, *&outgrad_level, kernel_size, strides, + paddings, in_x_grad); + } else if (pooling_type == "avg") { + math::Pool2dGradFunctor, T> + pool_backward; + math::AvgPoolGrad avg_process; + pool_backward(context.template device_context(), *in_x, + *&out_level, *&outgrad_level, kernel_size, strides, + paddings, avg_process, in_x_grad); + } + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/fluid/tests/test_spp_op.py b/python/paddle/v2/fluid/tests/test_spp_op.py new file mode 100644 index 0000000000000000000000000000000000000000..007723f0e35ad194c427401337bc9b13756576de --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_spp_op.py @@ -0,0 +1,68 @@ +import unittest +import numpy as np +from op_test import OpTest +from test_pool2d_op import max_pool2D_forward_naive +from test_pool2d_op import avg_pool2D_forward_naive + + +class TestSppOp(OpTest): + def setUp(self): + self.op_type = "spp" + self.init_test_case() + input = np.random.random(self.shape).astype("float32") + nsize, csize, hsize, wsize = input.shape + out_level_flatten = [] + for i in xrange(self.pyramid_height): + bins = np.power(2, i) + kernel_size = [0, 0] + padding = [0, 0] + kernel_size[0] = np.ceil(hsize / + bins.astype("double")).astype("int32") + padding[0] = ( + (kernel_size[0] * bins - hsize + 1) / 2).astype("int32") + + kernel_size[1] = np.ceil(wsize / + bins.astype("double")).astype("int32") + padding[1] = ( + (kernel_size[1] * bins - wsize + 1) / 2).astype("int32") + out_level = self.pool2D_forward_naive(input, kernel_size, + kernel_size, padding) + out_level_flatten.append( + out_level.reshape(nsize, bins * bins * csize)) + if i == 0: + output = out_level_flatten[i] + else: + output = np.concatenate((output, out_level_flatten[i]), 1) + # output = np.concatenate(out_level_flatten.tolist(), 0); + self.inputs = {'X': input.astype('float32'), } + self.attrs = { + 'pyramid_height': self.pyramid_height, + 'pooling_type': self.pool_type + } + + self.outputs = {'Out': output.astype('float32')} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + if self.pool_type != "avg": + self.check_grad(['X'], 'Out', max_relative_error=0.05) + + def init_test_case(self): + self.shape = [3, 2, 4, 4] + self.pyramid_height = 3 + self.pool2D_forward_naive = max_pool2D_forward_naive + self.pool_type = "max" + + +class TestCase2(TestSppOp): + def init_test_case(self): + self.shape = [3, 2, 4, 4] + self.pyramid_height = 3 + self.pool2D_forward_naive = avg_pool2D_forward_naive + self.pool_type = "avg" + + +if __name__ == '__main__': + unittest.main()