diff --git a/paddle/operators/im2sequence_op.cc b/paddle/operators/im2sequence_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..31baaedf6914b1a6939fc762491ef35013db4bb6 --- /dev/null +++ b/paddle/operators/im2sequence_op.cc @@ -0,0 +1,157 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/im2sequence_op.h" + +namespace paddle { +namespace operators { + +class Im2SequenceOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of Im2SequenceOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of Im2SequenceOp op should not be null."); + + auto in_dim = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(in_dim.size(), 4, + "Input(X) format must be 4D tensor, eg., NCHW."); + + auto kernels = ctx->Attrs().Get>("kernels"); + auto strides = ctx->Attrs().Get>("strides"); + auto paddings = ctx->Attrs().Get>("paddings"); + + int batch_size = in_dim[0]; + int img_channels = in_dim[1]; + int img_height = in_dim[2]; + int img_width = in_dim[3]; + + int output_height = OutputSize(img_height, kernels[0], paddings[0], + paddings[2], strides[0]); + int output_width = + OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]); + + ctx->SetOutputDim("Out", {batch_size * output_height * output_width, + img_channels * kernels[0] * kernels[1]}); + } +}; + +class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker { + public: + Im2SequenceOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(Tensor) The input tensor has NCHW format." + "N: batch size" + "C: channels" + "H: height" + "W: width"); + AddOutput("Out", "(LodTensor) The output data of im2sequence op,"); + AddAttr>("kernels", + "(vector), the " + "kernels(kernel_height, kernel_width)"); + AddAttr>("strides", + "(vector default:{1, 1}), the " + "strides(h_stride, w_stride)") + .SetDefault({1, 1}); + AddAttr>("paddings", + "(vector default:{0, 0, 0, 0}), the " + "paddings(up_pad, left_pad, down_pad, right_pad)") + .SetDefault({0, 0, 0, 0}); + AddComment(R"DOC( +This op uses kernels to scan images and converts these images to sequences. +After expanding, The number of time steps are output_height * output_width +and the dimension of each time step is kernel_height * kernel_width * channels, +in which: + +output_height = + 1 + (padding_height + padding_down + img_height - kernel_height + stride_height - 1) / + stride_height; +output_width = + 1 + (padding_left + padding+right + img_width - kernel_width + stride_width - 1) / + stride_width; + +This op can be used after convolution neural network, and before recurrent neural network. + +Given: + +x = [[[[ 6. 2. 1.] + [ 8. 3. 5.] + [ 0. 2. 6.]] + + [[ 2. 4. 4.] + [ 6. 3. 0.] + [ 6. 4. 7.]]] + + [[[ 6. 7. 1.] + [ 5. 7. 9.] + [ 2. 4. 8.]] + + [[ 1. 2. 1.] + [ 1. 3. 5.] + [ 9. 0. 8.]]]] +x.dims = {2, 2, 3, 3} + +And: + +kernels = [2, 2] +strides = [1, 1] +paddings = [0, 0, 0, 0] + +Then: + +output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.] + [ 2. 1. 3. 5. 4. 4. 3. 0.] + [ 8. 3. 0. 2. 6. 3. 6. 4.] + [ 3. 5. 2. 6. 3. 0. 4. 7.] + [ 6. 7. 5. 7. 1. 2. 1. 3.] + [ 7. 1. 7. 9. 2. 1. 3. 5.] + [ 5. 7. 2. 4. 1. 3. 9. 0.] + [ 7. 9. 4. 8. 3. 5. 0. 8.]] +output.dims = {8, 9} +output.lod = [[0, 4, 8]] + +)DOC"); + } +}; + +class Im2SequenceGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) shouldn't be null."); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker, + im2sequence_grad, ops::Im2SequenceGradOp); +REGISTER_OP_CPU_KERNEL( + im2sequence, + ops::Im2SequenceKernel); +REGISTER_OP_CPU_KERNEL( + im2sequence_grad, + ops::Im2SequenceGradKernel); diff --git a/paddle/operators/im2sequence_op.cu b/paddle/operators/im2sequence_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..9db7529112f2710d6ff4af2b444e304543486de3 --- /dev/null +++ b/paddle/operators/im2sequence_op.cu @@ -0,0 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/im2sequence_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_CUDA_KERNEL( + im2sequence, + ops::Im2SequenceKernel); +REGISTER_OP_CUDA_KERNEL( + im2sequence_grad, + ops::Im2SequenceGradKernel); diff --git a/paddle/operators/im2sequence_op.h b/paddle/operators/im2sequence_op.h new file mode 100644 index 0000000000000000000000000000000000000000..aeb810015134babc132909b3e820fa8391233b1c --- /dev/null +++ b/paddle/operators/im2sequence_op.h @@ -0,0 +1,135 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + You may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/data_layout.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/im2col.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; + +inline int OutputSize(int input_size, int filter_size, int padding_0, + int padding_1, int stride) { + const int output_size = + (input_size + padding_0 + padding_1 - filter_size) / stride + 1; + return output_size; +} + +template +class Im2SequenceKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const Tensor* in = ctx.Input("X"); + LoDTensor* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); + // TODO(wanghaoshuang): Add layout checker after 'set_layout' + // being available for python API + // PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW, + // "Input(X) layout must be NCHW"); + auto in_dim = in->dims(); + int batch_size = in_dim[0]; + int img_channels = in_dim[1]; + int img_height = in_dim[2]; + int img_width = in_dim[3]; + + auto kernels = ctx.Attr>("kernels"); + auto strides = ctx.Attr>("strides"); + auto paddings = ctx.Attr>("paddings"); + int output_height = OutputSize(img_height, kernels[0], paddings[0], + paddings[2], strides[0]); + int output_width = + OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]); + + const std::vector dilations({1, 1}); + + auto out_dims = out->dims(); + out->Resize({batch_size, out->numel() / batch_size}); + for (int i = 0; i < batch_size; i++) { + const Tensor src = + in->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); + Tensor dst = out->Slice(i, i + 1).Resize( + {output_height, output_width, img_channels, kernels[0], kernels[1]}); + + math::Im2ColFunctor f; + auto& dev_ctx = ctx.template device_context(); + f(dev_ctx, src, dilations, strides, paddings, &dst); + } + out->Resize(out_dims); + + // set lod information + // TODO(wanghaoshuang): Move this to InferShape + framework::LoD lod(1); + lod[0].reserve(batch_size + 1); + for (int i = 0, offset = 0; i < batch_size + 1; ++i) { + lod[0][i] = offset; + offset += output_height * output_width; + } + out->set_lod(lod); + } +}; + +template +class Im2SequenceGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* in = ctx.Input("X"); + Tensor* d_out = + const_cast(ctx.Input(framework::GradVarName("Out"))); + auto* d_x = ctx.Output(framework::GradVarName("X")); + d_x->mutable_data(ctx.GetPlace()); + + auto x_v = framework::EigenVector::Flatten(*d_x); + auto& place = *ctx.template device_context().eigen_device(); + x_v.device(place) = x_v.constant(0.0); + + auto in_dim = in->dims(); + int batch_size = in_dim[0]; + int img_channels = in_dim[1]; + int img_height = in_dim[2]; + int img_width = in_dim[3]; + + auto kernels = ctx.Attr>("kernels"); + auto strides = ctx.Attr>("strides"); + auto paddings = ctx.Attr>("paddings"); + int output_height = OutputSize(img_height, kernels[0], paddings[0], + paddings[2], strides[0]); + int output_width = + OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]); + + const std::vector dilations({1, 1}); + + auto d_out_dims = d_out->dims(); + d_out->Resize({batch_size, d_out->numel() / batch_size}); + for (int i = 0; i < batch_size; i++) { + Tensor dst = + d_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); + const Tensor src = d_out->Slice(i, i + 1).Resize( + {output_height, output_width, img_channels, kernels[0], kernels[1]}); + math::Col2ImFunctor f; + auto& dev_ctx = ctx.template device_context(); + f(dev_ctx, src, dilations, strides, paddings, &dst); + } + d_out->Resize(d_out_dims); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/fluid/tests/test_im2sequence_op.py b/python/paddle/v2/fluid/tests/test_im2sequence_op.py new file mode 100644 index 0000000000000000000000000000000000000000..2cab3e31a50034e3b1b362b59690e425aef1c399 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_im2sequence_op.py @@ -0,0 +1,167 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +import unittest +import numpy as np +from op_test import OpTest + + +def get_output_shape(attrs, in_shape): + img_height = in_shape[2] + img_width = in_shape[3] + + paddings = attrs['paddings'] + kernels = attrs['kernels'] + strides = attrs['strides'] + + output_height = \ + 1 + \ + (img_height + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \ + strides[0] + + output_width = \ + 1 + \ + (img_width + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \ + strides[1] + + return output_height, output_width + + +def im2col(attrs, im, col): + """ + im: {CHW} + col: + {outputHeight, outputWidth, inputChannels, filterHeight, filterWidth} + """ + input_channels, input_height, input_width = im.shape + output_height, output_width, _, filter_height, filter_width = col.shape + + stride_height, stride_width = attrs['strides'] + padding_height, padding_width = attrs['paddings'][0:2] + + for col_row_idx in range(0, output_height): + for col_col_idx in range(0, output_width): + for channel in range(0, input_channels): + for filter_row_idx in range(0, filter_height): + for filter_col_idx in range(0, filter_width): + im_row_offset = col_row_idx * stride_height \ + + filter_row_idx - padding_height + + im_col_offset = col_col_idx * stride_width \ + + filter_col_idx - padding_width + + if (im_row_offset < 0 or + im_row_offset >= input_height or + im_col_offset < 0 or + im_col_offset >= input_width): + col[col_row_idx][col_col_idx][channel][\ + filter_row_idx][filter_col_idx] = 0.0 + else: + im_offset = (channel * input_height + im_row_offset \ + ) * input_width + im_col_offset + + col[col_row_idx][col_col_idx][channel][\ + filter_row_idx][filter_col_idx] = im[channel][ \ + im_row_offset][im_col_offset] + + +def Im2Sequence(inputs, attrs): + output_height, output_width = get_output_shape(attrs, inputs.shape) + img_channels = inputs.shape[1] + batch_size = inputs.shape[0] + out = np.zeros([ + batch_size, output_height, output_width, img_channels, + attrs['kernels'][0], attrs['kernels'][1] + ]).astype("float32") + + for i in range(len(inputs)): + im2col(attrs, inputs[i], out[i]) + + out = out.reshape([ + batch_size * output_height * output_width, + img_channels * attrs['kernels'][0] * attrs['kernels'][1] + ]) + return out + + +class TestBlockExpandOp(OpTest): + def config(self): + self.batch_size = 1 + self.img_channels = 3 + self.img_height = 4 + self.img_width = 4 + self.attrs = { + 'kernels': [2, 2], + 'strides': [1, 1], + 'paddings': [1, 1, 1, 1] + } + + def setUp(self): + self.config() + self.op_type = "im2sequence" + x = np.random.uniform(0.1, 1, [ + self.batch_size, self.img_channels, self.img_height, self.img_width + ]).astype("float32") + + out = Im2Sequence(x, self.attrs) + self.inputs = {'X': x} + self.outputs = {'Out': out} + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X'], 'Out') + + +class TestBlockExpandOpCase2(TestBlockExpandOp): + def config(self): + self.batch_size = 2 + self.img_channels = 3 + self.img_height = 4 + self.img_width = 5 + self.attrs = { + 'kernels': [2, 1], + 'strides': [2, 1], + 'paddings': [2, 1, 2, 1] + } + + +class TestBlockExpandOpCase3(TestBlockExpandOp): + def config(self): + self.batch_size = 3 + self.img_channels = 1 + self.img_height = 4 + self.img_width = 5 + self.attrs = { + 'kernels': [2, 1], + 'strides': [2, 1], + 'paddings': [2, 0, 2, 0] + } + + +class TestBlockExpandOpCase4(TestBlockExpandOp): + def config(self): + self.batch_size = 2 + self.img_channels = 2 + self.img_height = 3 + self.img_width = 3 + self.attrs = { + 'kernels': [2, 2], + 'strides': [1, 1], + 'paddings': [0, 0, 0, 0] + } + + +if __name__ == '__main__': + unittest.main()