提交 ea093283 编写于 作者: S sweetsky0901

for code review by zhaolong

上级 89de58d9
...@@ -31,9 +31,15 @@ class SppOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -31,9 +31,15 @@ class SppOpMaker : public framework::OpProtoAndCheckerMaker {
"M = C * H * W"); "M = C * H * W");
AddAttr<int>("pyramid_height", "(int), multi level pooling"); AddAttr<int>("pyramid_height", "(int), multi level pooling");
AddComment(R"DOC( AddComment(R"DOC(
"Does spatial pyramid pooling on the input image by taking the max, "With spatial pyramid pooling, the input image can
etc. within regions so that the result vector of different sized be of any sizes. This not only allows arbitrary aspect
images are of the same size ratios, but also allows arbitrary scales. We can resize
the input image to any scale (e.g., min(w, h)=180, 224,
...) and apply the same deep network. When the
input image is at different scales, the network (with
the same filter sizes) will extract features at different
scales. The scales play important roles in traditional
methods.
Input shape: $(N, C_{in}, H_{in}, W_{in})$ Input shape: $(N, C_{in}, H_{in}, W_{in})$
Output shape: $(H_{out}, W_{out})$ Output shape: $(H_{out}, W_{out})$
Where Where
...@@ -41,6 +47,7 @@ class SppOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -41,6 +47,7 @@ class SppOpMaker : public framework::OpProtoAndCheckerMaker {
H_{out} = N \\ H_{out} = N \\
W_{out} = (((4^pyramid_height) - 1) / (4 - 1))$ * C_{in} W_{out} = (((4^pyramid_height) - 1) / (4 - 1))$ * C_{in}
$$ $$
paper https://arxiv.org/pdf/1406.4729v4.pdf
)DOC"); )DOC");
} }
}; };
...@@ -79,8 +86,9 @@ class SppOpGrad : public framework::OperatorWithKernel { ...@@ -79,8 +86,9 @@ class SppOpGrad : public framework::OperatorWithKernel {
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(spp, ops::SppOp, ops::SppOpMaker, spp_grad, ops::SppOpGrad); REGISTER_OP(spp, ops::SppOp, ops::SppOpMaker, spp_grad, ops::SppOpGrad);
REGISTER_OP_CPU_KERNEL(spp, ops::SppKernel<paddle::platform::CPUPlace, float>, REGISTER_OP_CPU_KERNEL(
ops::SppKernel<paddle::platform::CPUPlace, double>); spp, ops::SppKernel<paddle::platform::CPUDeviceContext, float>,
REGISTER_OP_CPU_KERNEL(spp_grad, ops::SppKernel<paddle::platform::CPUDeviceContext, double>);
ops::SppGradKernel<paddle::platform::CPUPlace, float>, REGISTER_OP_CPU_KERNEL(
ops::SppGradKernel<paddle::platform::CPUPlace, double>); spp_grad, ops::SppGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::SppGradKernel<paddle::platform::CPUDeviceContext, double>);
...@@ -15,8 +15,9 @@ limitations under the License. */ ...@@ -15,8 +15,9 @@ limitations under the License. */
#include "paddle/operators/spp_op.h" #include "paddle/operators/spp_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(spp, ops::SppKernel<paddle::platform::GPUPlace, float>, REGISTER_OP_CUDA_KERNEL(
ops::SppKernel<paddle::platform::GPUPlace, double>); spp, ops::SppKernel<paddle::platform::CUDADeviceContext, float>,
REGISTER_OP_GPU_KERNEL(spp_grad, ops::SppKernel<paddle::platform::CUDADeviceContext, double>);
ops::SppGradKernel<paddle::platform::GPUPlace, float>, REGISTER_OP_CUDA_KERNEL(
ops::SppGradKernel<paddle::platform::GPUPlace, double>); spp_grad, ops::SppGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::SppGradKernel<paddle::platform::CUDADeviceContext, double>);
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename Place, typename T> template <typename DeviceContext, typename T>
class SppKernel : public framework::OpKernel<T> { class SppKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -43,39 +43,32 @@ class SppKernel : public framework::OpKernel<T> { ...@@ -43,39 +43,32 @@ class SppKernel : public framework::OpKernel<T> {
std::vector<int> paddings({padding_h, padding_w}); std::vector<int> paddings({padding_h, padding_w});
// pooling output shape // pooling output shape
framework::Tensor out_level; framework::Tensor out_level;
std::vector<int64_t> output_shape_vec({in_x->dims()[0], in_x->dims()[1]}); std::vector<int64_t> output_shape_vec(
output_shape_vec.push_back( {in_x->dims()[0], in_x->dims()[1], bins, bins});
(input_h - kernel_size_h + 2 * padding_h) / kernel_size_h + 1);
output_shape_vec.push_back(
(input_w - kernel_size_w + 2 * padding_w) / kernel_size_w + 1);
framework::DDim output_shape(framework::make_ddim(output_shape_vec)); framework::DDim output_shape(framework::make_ddim(output_shape_vec));
out_level.mutable_data<T>(output_shape, context.GetPlace()); out_level.mutable_data<T>(output_shape, context.GetPlace());
// pooling // pooling
math::Pool2dFunctor<Place, math::MaxPool<T>, T> pool_forward; math::Pool2dFunctor<DeviceContext, math::MaxPool<T>, T> pool_forward;
math::MaxPool<T> max_process; math::MaxPool<T> max_process;
pool_forward(context.device_context(), *in_x, kernel_size, strides, pool_forward(context.template device_context<DeviceContext>(), *in_x,
paddings, max_process, &out_level); kernel_size, strides, paddings, max_process, &out_level);
// flatten pooling output shape // flatten pooling output shape
framework::Tensor out_flatten_level;
int output_flatten_w = in_x->dims()[1] * bins * bins; int output_flatten_w = in_x->dims()[1] * bins * bins;
std::vector<int64_t> output_flatten_shape_vec( std::vector<int64_t> output_flatten_shape_vec(
{in_x->dims()[0], output_flatten_w}); {in_x->dims()[0], output_flatten_w});
framework::DDim output_flatten_shape( framework::DDim output_flatten_shape(
framework::make_ddim(output_flatten_shape_vec)); framework::make_ddim(output_flatten_shape_vec));
out_flatten_level.ShareDataWith(out_level); out_level.Resize(output_flatten_shape);
out_flatten_level.Resize(output_flatten_shape);
// concat // concat
auto out_flatten_level_stride = auto out_level_stride = framework::stride(out_level.dims());
framework::stride(out_flatten_level.dims()); StridedMemcpy<T>(context.template device_context<DeviceContext>(),
StridedMemcpy<T>(context.device_context(), out_flatten_level.data<T>(), out_level.data<T>(), out_level_stride, out_level.dims(),
out_flatten_level_stride, out_flatten_level.dims(),
out_stride, out->data<T>() + output_offset); out_stride, out->data<T>() + output_offset);
output_offset += output_offset += out_level.dims()[1] * out_level_stride[1];
out_flatten_level.dims()[1] * out_flatten_level_stride[1];
} }
} }
}; };
template <typename Place, typename T> template <typename DeviceContext, typename T>
class SppGradKernel : public framework::OpKernel<T> { class SppGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -86,8 +79,8 @@ class SppGradKernel : public framework::OpKernel<T> { ...@@ -86,8 +79,8 @@ class SppGradKernel : public framework::OpKernel<T> {
framework::Tensor* in_x_grad = framework::Tensor* in_x_grad =
context.Output<framework::Tensor>(framework::GradVarName("X")); context.Output<framework::Tensor>(framework::GradVarName("X"));
int pyramid_height = context.template Attr<int>("pyramid_height"); int pyramid_height = context.template Attr<int>("pyramid_height");
auto& device_ctx = context.device_context(); auto& device_ctx = context.template device_context<DeviceContext>();
math::SetConstant<Place, T> zero; math::SetConstant<DeviceContext, T> zero;
in_x_grad->mutable_data<T>(context.GetPlace()); in_x_grad->mutable_data<T>(context.GetPlace());
zero(device_ctx, in_x_grad, static_cast<T>(0)); zero(device_ctx, in_x_grad, static_cast<T>(0));
auto out_stride = framework::stride(out->dims()); auto out_stride = framework::stride(out->dims());
...@@ -104,45 +97,43 @@ class SppGradKernel : public framework::OpKernel<T> { ...@@ -104,45 +97,43 @@ class SppGradKernel : public framework::OpKernel<T> {
std::vector<int> strides({kernel_size_h, kernel_size_w}); std::vector<int> strides({kernel_size_h, kernel_size_w});
std::vector<int> paddings({padding_h, padding_w}); std::vector<int> paddings({padding_h, padding_w});
// split out and outgrad ... to flatten // split out and outgrad ... to flatten
framework::Tensor out_flatten_level; framework::Tensor out_level;
framework::Tensor outgrad_flatten_level; framework::Tensor outgrad_level;
int out_flatten_w = in_x->dims()[1] * bins * bins; int out_flatten_w = in_x->dims()[1] * bins * bins;
std::vector<int64_t> out_flatten_shape_vec( std::vector<int64_t> out_flatten_shape_vec(
{in_x->dims()[0], out_flatten_w}); {in_x->dims()[0], out_flatten_w});
framework::DDim out_flatten_shape( framework::DDim out_flatten_shape(
framework::make_ddim(out_flatten_shape_vec)); framework::make_ddim(out_flatten_shape_vec));
out_flatten_level.mutable_data<T>(out_flatten_shape, context.GetPlace()); out_level.mutable_data<T>(out_flatten_shape, context.GetPlace());
outgrad_flatten_level.mutable_data<T>(out_flatten_shape, outgrad_level.mutable_data<T>(out_flatten_shape, context.GetPlace());
context.GetPlace()); auto flatten_stride = framework::stride(out_level.dims());
auto flatten_stride = framework::stride(out_flatten_level.dims());
// memcpy // memcpy
StridedMemcpy<T>(context.device_context(), out->data<T>() + out_offset, StridedMemcpy<T>(context.template device_context<DeviceContext>(),
out_stride, out_flatten_level.dims(), flatten_stride, out->data<T>() + out_offset, out_stride,
out_flatten_level.data<T>()); out_level.dims(), flatten_stride, out_level.data<T>());
StridedMemcpy<T>(context.device_context(), StridedMemcpy<T>(context.template device_context<DeviceContext>(),
out_grad->data<T>() + out_offset, out_stride, out_grad->data<T>() + out_offset, out_stride,
outgrad_flatten_level.dims(), flatten_stride, outgrad_level.dims(), flatten_stride,
outgrad_flatten_level.data<T>()); outgrad_level.data<T>());
out_offset += out_flatten_level.dims()[1] * out_stride[1]; out_offset += out_level.dims()[1] * out_stride[1];
// flatten backward to nchw // flatten backward to nchw
framework::Tensor out_level;
framework::Tensor outgrad_level;
std::vector<int64_t> out_shape_vec({in_x->dims()[0], in_x->dims()[1]}); std::vector<int64_t> out_shape_vec({in_x->dims()[0], in_x->dims()[1]});
out_shape_vec.push_back( out_shape_vec.push_back(
(input_h - kernel_size_h + 2 * padding_h) / kernel_size_h + 1); (input_h - kernel_size_h + 2 * padding_h) / kernel_size_h + 1);
out_shape_vec.push_back( out_shape_vec.push_back(
(input_w - kernel_size_w + 2 * padding_w) / kernel_size_w + 1); (input_w - kernel_size_w + 2 * padding_w) / kernel_size_w + 1);
framework::DDim out_shape(framework::make_ddim(out_shape_vec)); framework::DDim out_shape(framework::make_ddim(out_shape_vec));
out_level.ShareDataWith(out_flatten_level); out_level.ShareDataWith(out_level);
out_level.Resize(out_shape); out_level.Resize(out_shape);
outgrad_level.ShareDataWith(outgrad_flatten_level); outgrad_level.ShareDataWith(outgrad_level);
outgrad_level.Resize(out_shape); outgrad_level.Resize(out_shape);
// pooling backward // pooling backward
math::MaxPool2dGradFunctor<Place, T> pool2d_backward; math::MaxPool2dGradFunctor<DeviceContext, T> pool2d_backward;
pool2d_backward(context.device_context(), *in_x, *&out_level, pool2d_backward(context.template device_context<DeviceContext>(), *in_x,
*&outgrad_level, kernel_size, strides, paddings, *&out_level, *&outgrad_level, kernel_size, strides,
in_x_grad); paddings, in_x_grad);
} }
} }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册