diff --git a/paddle/operators/block_expand_op.cc b/paddle/operators/block_expand_op.cc index 69c5e02a65809c4d02d5befb83ad525d13429a01..ec467374008d34827e09a3be62fc449902ca52c9 100644 --- a/paddle/operators/block_expand_op.cc +++ b/paddle/operators/block_expand_op.cc @@ -24,18 +24,43 @@ class BlockExpandOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { using namespace framework; - PADDLE_ENFORCE(ctx->HasInput("input"), + PADDLE_ENFORCE(ctx->HasInput("X"), "Input of BlockExpandOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of BlockExpandOp op should not be null."); + "Output of BlockExpandOp op should not be null."); - auto in_dim = ctx->GetInputDim("input"); + auto in_dim = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(in_dim.size(), 4, "Input format must be NCHW."); PADDLE_ENFORCE_GE(in_dim[0], 1, "Input batchsize must >= 1."); - ctx->ShareLoD("X", /*->*/ "Out"); - - // ctx->SetOutputDim("Out", {1}); + int blockHeight = ctx->Attrs().Get("blockHeight"); + int blockWidth = ctx->Attrs().Get("blockWidth"); + int strideHeight = ctx->Attrs().Get("strideHeight"); + int strideWidth = ctx->Attrs().Get("strideWidth"); + int paddingHeight = ctx->Attrs().Get("paddingHeight"); + int paddingWidth = ctx->Attrs().Get("paddingWidth"); + + int N = in_dim[0]; + int C = in_dim[1]; + int imgHeight = in_dim[3]; + int imgWidth = in_dim[4]; + + int outputHeight = 0; + int outputWidth = 0; + + get_blockexpand_output_shape(imgHeight, imgWidth, blockHeight, blockWidth, + strideHeight, strideWidth, paddingHeight, + paddingWidth, outputHeight, outputWidth); + + // The result of im2col is [outputHeight, outputWidth, + // inputChannels, filterHeight, filterWidth], and it is easy to + // reshape into [seqLength, stepSize], where seqLength is equal + // outputHeight * outputWidth, stepSize is equal + // input_channels * blockHeight * blockWidth + ctx->SetOutputDim( + "Out", {N, outputHeight, outputWidth, C, blockHeight, blockWidth}); + + // ctx->ShareLoD("X", /*->*/ "Out"); } }; @@ -44,41 +69,36 @@ class BlockExpandOpMaker : public framework::OpProtoAndCheckerMaker { BlockExpandOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("input", "The input of block_expand op"); - AddOutput("out", "The output of block_expand op"); - AddAttr("block_height", - R"DOC( - )DOC"); - AddAttr("block_width", - R"DOC( - )DOC"); - AddAttr("stride_height", - R"DOC( - )DOC"); - AddAttr("stride_width", - R"DOC( - )DOC"); - AddAttr("padding_height", - R"DOC( - )DOC"); - AddAttr("padding_width", - R"DOC( - )DOC"); + AddInput("X", R"DOC( +(Tensor)The input tensor has NCHW format. + N: batch size + C: channels + H: height + W: width +)DOC"); + AddOutput("Out", "(LodTensor)The output data of block_expand op,"); + AddAttr("blockHeight", "(int)height of block."); + AddAttr("blockWidth", "(int)width of block."); + AddAttr("strideHeight", "(int)height of stride."); + AddAttr("strideWidth", "(int)width of stride."); + AddAttr("paddingHeight", "(int)height of padding."); + AddAttr("paddingWidth", "(int)width of padding."); AddComment(R"DOC( Expand feature map to minibatch matrix. -- matrix width is: blockH_ * blockW_ * channels_ -- matirx height is: outputH_ * outputW_ +- matirx height is: outputHeight * outputWidth +- matrix width is: blockHeight * blockWidth * channels -outputH\_ = 1 + (2paddingH\_ + imgSizeH\_ - blockH\_ + strideH\_ - 1) / - strideH\_ \\ -outputW\_ = 1 + (2paddingW\_ + imgSizeW\_ - blockW\_ + strideW\_ - 1) / - strideW\_ +outputHeight = + 1 + (2 * paddingHeight + imgHeight - blockHeight + strideHeight - 1) / + strideHeight; +outputWidth = + 1 + (2 * paddingWidth + imgWidth - blockWidth + strideWidth - 1) / + strideWidth; The expand method is the same with ExpandConvLayer, but saved the transposed -value. After expanding, output_.sequenceStartPositions will store timeline. -The number of time steps are outputH_outputW_ and the dimension of each -time step is blockH_ * blockW_ * channels_. This layer can be used after -convolution neural network, and before recurrent neural network. +value. After expanding, The number of time steps are outputHeight * outputWidth +and the dimension of each time step is blockHeight * blockWidth * channels. +This layer can be used after convolution neural network, and before recurrent neural network. )DOC"); } }; @@ -98,7 +118,7 @@ namespace ops = paddle::operators; REGISTER_OP(block_expand, ops::BlockExpandOp, ops::BlockExpandOpMaker, block_expand_grad, ops::BlockExpandOpGrad); REGISTER_OP_CPU_KERNEL( - block_expand, ops::BlockExpanddKernel); + block_expand, ops::BlockExpandKernel); REGISTER_OP_CPU_KERNEL( block_expand_grad, ops::BlockExpandGradKernel); diff --git a/paddle/operators/block_expand_op.h b/paddle/operators/block_expand_op.h index c0521dbbadb47a13c7018a523b91b4cac1fee6ec..58f9e4c6adf41ff7cd36355390229eb59cd6bf9f 100644 --- a/paddle/operators/block_expand_op.h +++ b/paddle/operators/block_expand_op.h @@ -18,10 +18,26 @@ #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/math/img2col.h" namespace paddle { namespace operators { +inline void get_blockexpand_output_shape(int imgHeight, int imgWidth, + int blockHeight, int blockWidth, + int strideHeight, int strideWidth, + int paddingHeight, int paddingWidth, + int& outputHeight, int& outputWidth) { + outputHeight = + 1 + + (imgHeight + 2 * paddingHeight - blockHeight + strideHeight - 1) / + strideHeight; + + outputWidth = 1 + + (imgWidth + 2 * paddingWidth - blockWidth + strideWidth - 1) / + strideWidth; +} + template class BlockExpandKernel : public framework::OpKernel { public: @@ -34,39 +50,30 @@ class BlockExpandKernel : public framework::OpKernel { auto in_dim = in->dims(); int N = in_dim[0]; int C = in_dim[1]; - - int in_height = in_dim[2]; - int in_width = in_dim[3]; - - int block_height = ctx.Attr("block_height"); - int block_width = ctx.Attr("block_width"); - int stride_height = ctx.Attr("stride_height"); - int stride_width = ctx.Attr("stride_width"); - int padding_height = ctx.Attr("padding_height"); - int padding_width = ctx.Attr("padding_width"); - - int output_height = - 1 + - (in_height + 2 * padding_height - block_height + stride_height - 1) / - stride_height; - - int output_width = - 1 + - (in_width + 2 * padding_width - block_width + stride_width - 1) / - stride_width; - - Tensor col; - if (clo_format = KCFO) { - col.Resize( - {N, C, filter_height, filter_width, output_height, output_width}); - } else { - col.Resize( - {N, output_height, output_width, C, filter_height, filter_width}); - } - - for (size_t i = 0; i < N; i++) { - Im2ColFunctor(ctx, one_img, col, stride[0], - stride[1], padding[0], padding[1]); + int imgHeight = in_dim[2]; + int imgWidth = in_dim[3]; + + int blockHeight = ctx.Attr("blockHeight"); + int blockWidth = ctx.Attr("blockWidth"); + int strideHeight = ctx.Attr("strideHeight"); + int strideWidth = ctx.Attr("strideWidth"); + int paddingHeight = ctx.Attr("paddingHeight"); + int paddingWidth = ctx.Attr("paddingWidth"); + + int outputHeight = 0; + int outputWidth = 0; + + get_blockexpand_output_shape(imgHeight, imgWidth, blockHeight, blockWidth, + strideHeight, strideWidth, paddingHeight, + paddingWidth, outputHeight, outputWidth); + + for (int i = 0; i < N; i++) { + Tensor src = in->Slice(i, i + 1).Resize(C, imgHeight, imgWidth); + Tensor dst = out->Slice(i, i + 1).Resize(outputHeight, outputWidth, C, + blockHeight, blockWidth); + math::Im2ColFunctorGetPlace(), T>(ctx, src, dst, strideHeight, + strideWidth, paddingHeight, + paddingWidth); } } }; @@ -75,13 +82,7 @@ template class BlockExpandGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - using Tensor = framework::Tensor; - /* - int x_num_col_dims = ctx.template Attr("x_num_col_dims"); - int y_num_col_dims = ctx.template Attr("y_num_col_dims"); - const Tensor* x = ctx.Input("X"); - const Tensor* y = ctx.Input("Y"); - */ + using namespace framework; } };