diff --git a/paddle/operators/im2sequence_op.cc b/paddle/operators/im2sequence_op.cc index 9b2397bdc8812ed289253e0df019780e0c2cc501..9c9802c043f2f6c24a74a4d1861c88337e14d259 100644 --- a/paddle/operators/im2sequence_op.cc +++ b/paddle/operators/im2sequence_op.cc @@ -30,28 +30,24 @@ class Im2SequenceOp : public framework::OperatorWithKernel { auto in_dim = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(in_dim.size(), 4, - "Input(X) format must be 4D tensor, eg., NCHW."); + "Input(X) format must be 4D tensor, eg., NCHW."); - int block_height = ctx->Attrs().Get("block_height"); - int block_width = ctx->Attrs().Get("block_width"); - int stride_height = ctx->Attrs().Get("stride_height"); - int stride_width = ctx->Attrs().Get("stride_width"); - int padding_height = ctx->Attrs().Get("padding_height"); - int padding_width = ctx->Attrs().Get("padding_width"); + auto kernels = ctx->Attrs().Get>("kernels"); + auto strides = ctx->Attrs().Get>("strides"); + auto paddings = ctx->Attrs().Get>("paddings"); int batch_size = in_dim[0]; int img_channels = in_dim[1]; int img_height = in_dim[2]; int img_width = in_dim[3]; - int output_height = get_output_size(img_height, block_height, stride_height, - padding_height); + int output_height = OutputSize(img_height, kernels[0], paddings[0], + paddings[2], strides[0]); int output_width = - get_output_size(img_width, block_width, stride_width, padding_width); + OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]); ctx->SetOutputDim("Out", {batch_size * output_height * output_width, - img_channels * block_height * block_width}); - // TODO(wanghaoshuang): cal lod in complie time + img_channels * kernels[0] * kernels[1]}); } }; @@ -66,26 +62,30 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker { "H: height" "W: width"); AddOutput("Out", "(LodTensor)The output data of im2sequence op,"); - AddAttr("block_height", "(int)height of block."); - AddAttr("block_width", "(int)width of block."); - AddAttr("stride_height", "(int)height of stride."); - AddAttr("stride_width", "(int)width of stride."); - AddAttr("padding_height", "(int)height of padding."); - AddAttr("padding_width", "(int)width of padding."); + AddAttr>("kernels", + "(vector), the " + "kernels(kernel_height, kernel_width)") + AddAttr>("strides", + "(vector default:{1, 1}), the " + "strides(h_stride, w_stride)") + .SetDefault({1, 1}); + AddAttr>("paddings", + "(vector default:{0, 0, 0, 0}), the " + "paddings(up_pad, left_pad, down_pad, right_pad)") + .SetDefault({0, 0, 0, 0}); AddComment(R"DOC( -Convert feature map to minibatch matrix. -- matirx height is: output_height * output_width -- matrix width is: block_height * block_width * channels +This op uses kernels to scan images and converts these images to sequences. +After expanding, The number of time steps are output_height * output_width +and the dimension of each time step is kernel_height * kernel_width * channels, +in which: output_height = - 1 + (2 * padding_height + img_height - block_height + stride_height - 1) / + 1 + (padding_height + padding_down + img_height - kernel_height + stride_height - 1) / stride_height; output_width = - 1 + (2 * padding_width + img_width - block_width + stride_width - 1) / + 1 + (padding_left + padding+right + img_width - kernel_width + stride_width - 1) / stride_width; -After expanding, The number of time steps are output_height * output_width -and the dimension of each time step is block_height * block_width * channels. This op can be used after convolution neural network, and before recurrent neural network. Given: @@ -109,12 +109,9 @@ x.dims = {2, 2, 3, 3} And: -block_height = 2 -block_width = 2 -stride_height = 1 -stride_width = 1 -padding_height = 0 -padding_width = 0 +kernels = [2, 2] +strides = [1, 1] +paddings = [0, 0, 0, 0] Then: diff --git a/paddle/operators/im2sequence_op.h b/paddle/operators/im2sequence_op.h index 85d6cac44484ec4115b9c49b2c166c13af3f5552..352d290b1b690b92889f0e062c8fa6b824c06cbb 100644 --- a/paddle/operators/im2sequence_op.h +++ b/paddle/operators/im2sequence_op.h @@ -26,9 +26,11 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; -inline int get_output_size(int img_size, int block_size, int stride, - int padding) { - return (1 + (img_size + 2 * padding - block_size + stride - 1) / stride); +inline int OutputSize(int input_size, int filter_size, int padding_0, + int padding_1, int stride) { + const int output_size = + (input_size + padding_0 + padding_1 - filter_size) / stride + 1; + return output_size; } template @@ -47,32 +49,24 @@ class Im2SequenceKernel : public framework::OpKernel { int img_channels = in_dim[1]; int img_height = in_dim[2]; int img_width = in_dim[3]; - int block_height = ctx.Attr("block_height"); - int block_width = ctx.Attr("block_width"); - int stride_height = ctx.Attr("stride_height"); - int stride_width = ctx.Attr("stride_width"); - int padding_height = ctx.Attr("padding_height"); - int padding_width = ctx.Attr("padding_width"); - - int output_height = get_output_size(img_height, block_height, stride_height, - padding_height); + + auto kernels = ctx->Attrs().Get>("kernels"); + auto strides = ctx->Attrs().Get>("strides"); + auto paddings = ctx->Attrs().Get>("paddings"); + int output_height = + OutputSize(img_height, kernels[0], paddings[0], paddings[2] strides[0]); int output_width = - get_output_size(img_width, block_width, stride_width, padding_width); + OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]); const std::vector dilations({1, 1}); - const std::vector strides( - {stride_height, stride_width, stride_height, stride_width}); - const std::vector paddings( - {padding_height, padding_width, padding_height, padding_width}); auto out_dims = out->dims(); out->Resize({batch_size, out->numel() / batch_size}); for (int i = 0; i < batch_size; i++) { const Tensor src = in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); - Tensor dst = out->Slice(i, i + 1).Resize({output_height, output_width, - img_channels, block_height, - block_width}); + Tensor dst = out->Slice(i, i + 1).Resize( + {output_height, output_width, img_channels, kernels[0], kernels[1]}); math::Im2ColFunctor f; auto& dev_ctx = ctx.template device_context(); @@ -112,22 +106,15 @@ class Im2SequenceGradKernel : public framework::OpKernel { int img_height = in_dim[2]; int img_width = in_dim[3]; - int block_height = ctx.Attr("block_height"); - int block_width = ctx.Attr("block_width"); - int stride_height = ctx.Attr("stride_height"); - int stride_width = ctx.Attr("stride_width"); - int padding_height = ctx.Attr("padding_height"); - int padding_width = ctx.Attr("padding_width"); - int output_height = get_output_size(img_height, block_height, stride_height, - padding_height); + auto kernels = ctx->Attrs().Get>("kernels"); + auto strides = ctx->Attrs().Get>("strides"); + auto paddings = ctx->Attrs().Get>("paddings"); + int output_height = OutputSize(img_height, kernels[0], paddings[0], + paddings[2], strides[0]); int output_width = - get_output_size(img_width, block_width, stride_width, padding_width); + OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]); const std::vector dilations({1, 1}); - const std::vector strides( - {stride_height, stride_width, stride_height, stride_width}); - const std::vector paddings( - {padding_height, padding_width, padding_height, padding_width}); auto d_out_dims = d_out->dims(); d_out->Resize({batch_size, d_out->numel() / batch_size}); @@ -135,8 +122,7 @@ class Im2SequenceGradKernel : public framework::OpKernel { Tensor dst = d_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); const Tensor src = d_out->Slice(i, i + 1).Resize( - {output_height, output_width, img_channels, block_height, - block_width}); + {output_height, output_width, img_channels, kernels[0], kernels[1]}); math::Col2ImFunctor f; auto& dev_ctx = ctx.template device_context(); f(dev_ctx, src, dilations, strides, paddings, &dst);