diff --git a/paddle/operators/spp_op.cc b/paddle/operators/spp_op.cc index 5e51e73eccb12025b0afc6dd1198810b47517944..c4bd4f5ab3ac9b7322397f0e7af6f1feb85dbca0 100644 --- a/paddle/operators/spp_op.cc +++ b/paddle/operators/spp_op.cc @@ -31,9 +31,15 @@ class SppOpMaker : public framework::OpProtoAndCheckerMaker { "M = C * H * W"); AddAttr("pyramid_height", "(int), multi level pooling"); AddComment(R"DOC( - "Does spatial pyramid pooling on the input image by taking the max, - etc. within regions so that the result vector of different sized - images are of the same size + "With spatial pyramid pooling, the input image can + be of any sizes. This not only allows arbitrary aspect + ratios, but also allows arbitrary scales. We can resize + the input image to any scale (e.g., min(w, h)=180, 224, + ...) and apply the same deep network. When the + input image is at different scales, the network (with + the same filter sizes) will extract features at different + scales. The scales play important roles in traditional + methods. Input shape: $(N, C_{in}, H_{in}, W_{in})$ Output shape: $(H_{out}, W_{out})$ Where @@ -41,6 +47,7 @@ class SppOpMaker : public framework::OpProtoAndCheckerMaker { H_{out} = N \\ W_{out} = (((4^pyramid_height) - 1) / (4 - 1))$ * C_{in} $$ + paper https://arxiv.org/pdf/1406.4729v4.pdf )DOC"); } }; @@ -79,8 +86,9 @@ class SppOpGrad : public framework::OperatorWithKernel { namespace ops = paddle::operators; REGISTER_OP(spp, ops::SppOp, ops::SppOpMaker, spp_grad, ops::SppOpGrad); -REGISTER_OP_CPU_KERNEL(spp, ops::SppKernel, - ops::SppKernel); -REGISTER_OP_CPU_KERNEL(spp_grad, - ops::SppGradKernel, - ops::SppGradKernel); +REGISTER_OP_CPU_KERNEL( + spp, ops::SppKernel, + ops::SppKernel); +REGISTER_OP_CPU_KERNEL( + spp_grad, ops::SppGradKernel, + ops::SppGradKernel); diff --git a/paddle/operators/spp_op.cu.cc b/paddle/operators/spp_op.cu.cc index a7057907cea2eed21753efeb399874f01533cd53..761e4d6c4a9639898ba548d56bed3c8817720c1b 100644 --- a/paddle/operators/spp_op.cu.cc +++ b/paddle/operators/spp_op.cu.cc @@ -15,8 +15,9 @@ limitations under the License. */ #include "paddle/operators/spp_op.h" namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(spp, ops::SppKernel, - ops::SppKernel); -REGISTER_OP_GPU_KERNEL(spp_grad, - ops::SppGradKernel, - ops::SppGradKernel); +REGISTER_OP_CUDA_KERNEL( + spp, ops::SppKernel, + ops::SppKernel); +REGISTER_OP_CUDA_KERNEL( + spp_grad, ops::SppGradKernel, + ops::SppGradKernel); diff --git a/paddle/operators/spp_op.h b/paddle/operators/spp_op.h index 0f2c43ee65be40b5f44dc7b140704ea21424034d..16510cb8260be10f090bf556e8b9145ef35bf44f 100644 --- a/paddle/operators/spp_op.h +++ b/paddle/operators/spp_op.h @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class SppKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { @@ -43,39 +43,32 @@ class SppKernel : public framework::OpKernel { std::vector paddings({padding_h, padding_w}); // pooling output shape framework::Tensor out_level; - std::vector output_shape_vec({in_x->dims()[0], in_x->dims()[1]}); - output_shape_vec.push_back( - (input_h - kernel_size_h + 2 * padding_h) / kernel_size_h + 1); - output_shape_vec.push_back( - (input_w - kernel_size_w + 2 * padding_w) / kernel_size_w + 1); + std::vector output_shape_vec( + {in_x->dims()[0], in_x->dims()[1], bins, bins}); framework::DDim output_shape(framework::make_ddim(output_shape_vec)); out_level.mutable_data(output_shape, context.GetPlace()); // pooling - math::Pool2dFunctor, T> pool_forward; + math::Pool2dFunctor, T> pool_forward; math::MaxPool max_process; - pool_forward(context.device_context(), *in_x, kernel_size, strides, - paddings, max_process, &out_level); + pool_forward(context.template device_context(), *in_x, + kernel_size, strides, paddings, max_process, &out_level); // flatten pooling output shape - framework::Tensor out_flatten_level; int output_flatten_w = in_x->dims()[1] * bins * bins; std::vector output_flatten_shape_vec( {in_x->dims()[0], output_flatten_w}); framework::DDim output_flatten_shape( framework::make_ddim(output_flatten_shape_vec)); - out_flatten_level.ShareDataWith(out_level); - out_flatten_level.Resize(output_flatten_shape); + out_level.Resize(output_flatten_shape); // concat - auto out_flatten_level_stride = - framework::stride(out_flatten_level.dims()); - StridedMemcpy(context.device_context(), out_flatten_level.data(), - out_flatten_level_stride, out_flatten_level.dims(), + auto out_level_stride = framework::stride(out_level.dims()); + StridedMemcpy(context.template device_context(), + out_level.data(), out_level_stride, out_level.dims(), out_stride, out->data() + output_offset); - output_offset += - out_flatten_level.dims()[1] * out_flatten_level_stride[1]; + output_offset += out_level.dims()[1] * out_level_stride[1]; } } }; -template +template class SppGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { @@ -86,8 +79,8 @@ class SppGradKernel : public framework::OpKernel { framework::Tensor* in_x_grad = context.Output(framework::GradVarName("X")); int pyramid_height = context.template Attr("pyramid_height"); - auto& device_ctx = context.device_context(); - math::SetConstant zero; + auto& device_ctx = context.template device_context(); + math::SetConstant zero; in_x_grad->mutable_data(context.GetPlace()); zero(device_ctx, in_x_grad, static_cast(0)); auto out_stride = framework::stride(out->dims()); @@ -104,45 +97,43 @@ class SppGradKernel : public framework::OpKernel { std::vector strides({kernel_size_h, kernel_size_w}); std::vector paddings({padding_h, padding_w}); // split out and outgrad ... to flatten - framework::Tensor out_flatten_level; - framework::Tensor outgrad_flatten_level; + framework::Tensor out_level; + framework::Tensor outgrad_level; int out_flatten_w = in_x->dims()[1] * bins * bins; std::vector out_flatten_shape_vec( {in_x->dims()[0], out_flatten_w}); framework::DDim out_flatten_shape( framework::make_ddim(out_flatten_shape_vec)); - out_flatten_level.mutable_data(out_flatten_shape, context.GetPlace()); - outgrad_flatten_level.mutable_data(out_flatten_shape, - context.GetPlace()); - auto flatten_stride = framework::stride(out_flatten_level.dims()); + out_level.mutable_data(out_flatten_shape, context.GetPlace()); + outgrad_level.mutable_data(out_flatten_shape, context.GetPlace()); + auto flatten_stride = framework::stride(out_level.dims()); // memcpy - StridedMemcpy(context.device_context(), out->data() + out_offset, - out_stride, out_flatten_level.dims(), flatten_stride, - out_flatten_level.data()); + StridedMemcpy(context.template device_context(), + out->data() + out_offset, out_stride, + out_level.dims(), flatten_stride, out_level.data()); - StridedMemcpy(context.device_context(), + StridedMemcpy(context.template device_context(), out_grad->data() + out_offset, out_stride, - outgrad_flatten_level.dims(), flatten_stride, - outgrad_flatten_level.data()); - out_offset += out_flatten_level.dims()[1] * out_stride[1]; + outgrad_level.dims(), flatten_stride, + outgrad_level.data()); + out_offset += out_level.dims()[1] * out_stride[1]; // flatten backward to nchw - framework::Tensor out_level; - framework::Tensor outgrad_level; + std::vector out_shape_vec({in_x->dims()[0], in_x->dims()[1]}); out_shape_vec.push_back( (input_h - kernel_size_h + 2 * padding_h) / kernel_size_h + 1); out_shape_vec.push_back( (input_w - kernel_size_w + 2 * padding_w) / kernel_size_w + 1); framework::DDim out_shape(framework::make_ddim(out_shape_vec)); - out_level.ShareDataWith(out_flatten_level); + out_level.ShareDataWith(out_level); out_level.Resize(out_shape); - outgrad_level.ShareDataWith(outgrad_flatten_level); + outgrad_level.ShareDataWith(outgrad_level); outgrad_level.Resize(out_shape); // pooling backward - math::MaxPool2dGradFunctor pool2d_backward; - pool2d_backward(context.device_context(), *in_x, *&out_level, - *&outgrad_level, kernel_size, strides, paddings, - in_x_grad); + math::MaxPool2dGradFunctor pool2d_backward; + pool2d_backward(context.template device_context(), *in_x, + *&out_level, *&outgrad_level, kernel_size, strides, + paddings, in_x_grad); } } };