Convolution op and forward calculation.

c9d8cb4e · hedaoyuan · 544458e0 · c9d8cb4e · c9d8cb4e · c9d8cb4e
6 changed file
--- a/paddle/operators/conv_op.cc
+++ b/paddle/operators/conv_op.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#include "paddle/operators/gemm_conv_op.h"
+namespace paddle {
+namespace operators {
+int outputSize(int input_size, int filter_size, int padding, int stride) {
+  int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
+  return output_size;
+}
+class Conv2DOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    auto *in = ctx.Input<framework::Tensor>("Input");
+    auto *filter = ctx.Input<framework::Tensor>("Filter");
+    auto *out = ctx.Output<framework::Tensor>("Output");
+    PADDLE_ENFORCE_EQ(in->dims().size(), 4, "Conv2DOp intput should be 4-D.");
+    PADDLE_ENFORCE_EQ(filter->dims().size(), 4,
+                      "Conv2DOp filter should be 4-D.");
+    std::vector<int> strides = Attr<std::vector<int>>("strides");
+    std::vector<int> paddings = Attr<std::vector<int>>("paddings");
+    auto output_height =
+        outputSize(in->dims()[2], filter->dims()[2], paddings[0], strides[0]);
+    auto output_width =
+        outputSize(in->dims()[3], filter->dims()[3], paddings[1], strides[1]);
+    out->Resize(
+        {in->dims()[0], filter->dims()[0], output_height, output_width});
+  }
+};
+class Conv2DOppMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  Conv2DOppMaker(framework::OpProto *proto,
+                 framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput(
+        "Input",
+        "The input tensor of convolution operator. "
+        "The format of input tensor is NCHW. Where N is batch size, C is the "
+        "number of channels, H and W is the height and width of image.");
+    AddInput(
+        "Filter",
+        "The filter tensor of convolution operator."
+        "The format of the filter tensor is MCHW, where M is the number of "
+        "output "
+        "image channels, C is the number of input image channels, H and W is "
+        " height and width of filter.");
+    AddOutput("Output",
+              "The output tensor of convolution operator."
+              "The format of output tensor is also NCHW.");
+    AddComment(R"DOC(
+The convolution operation calculates the output based on
+the input, filter and strides, paddings parameters.
+)DOC");
+    AddAttr<std::vector<int>>("strides", "strides of convolution operator.");
+    AddAttr<std::vector<int>>("paddings", "paddings of convolution operator.");
+  }
+};
+class Conv2DOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {}
+};
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OP(conv2d, ops::Conv2DOp, ops::Conv2DOppMaker, conv2d_grad,
+            ops::Conv2DOpGrad);
+REGISTER_OP_CPU_KERNEL(conv2d,
+                       ops::GemmConvKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+    conv2d_grad, ops::GemmConvGradKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/conv_op.cu
+++ b/paddle/operators/conv_op.cu
+/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#include "paddle/operators/gemm_conv_op.h"
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(conv2d,
+                       ops::GemmConvKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    conv2d_grad, ops::GemmConvGradKernel<paddle::platform::GPUPlace, float>);
--- a/paddle/operators/gemm_conv_op.h
+++ b/paddle/operators/gemm_conv_op.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/im2col.h"
+#include "paddle/operators/math/math_function.h"
+namespace paddle {
+namespace operators {
+using Tensor = framework::Tensor;
+template <typename Place, typename T>
+class GemmConvKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    const Tensor* input = context.Input<Tensor>("Input");
+    Tensor* filter = const_cast<Tensor*>(context.Input<Tensor>("Filter"));
+    Tensor* output = context.Output<Tensor>("Output");
+    output->mutable_data<T>(context.GetPlace());
+    paddle::framework::Tensor col;
+    paddle::framework::Tensor in_slice;
+    paddle::framework::Tensor out_slice;
+    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
+    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
+    int batch_size = input->dims()[0];
+    int input_channels = input->dims()[1];
+    int filter_height = filter->dims()[filter->dims().size() - 2];
+    int filter_width = filter->dims()[filter->dims().size() - 1];
+    int output_height = output->dims()[2];
+    int output_width = output->dims()[3];
+    paddle::operators::math::Im2ColFunctor<
+        paddle::operators::math::ColFormat::kCFO, Place, T>
+        im2col;
+    framework::DDim col_shape = {input_channels, filter_height, filter_width,
+                                 output_height, output_width};
+    col.mutable_data<float>(col_shape, context.GetPlace());
+    auto* device_context =
+        const_cast<platform::DeviceContext*>(context.device_context_);
+    framework::DDim input_shape = {input->dims()[1], input->dims()[2],
+                                   input->dims()[3]};
+    framework::DDim filter_matrix_shape = {
+        filter->dims()[0],
+        filter->dims()[1] * filter->dims()[2] * filter->dims()[3]};
+    framework::DDim col_matrix_shape = {
+        input_channels * filter_height * filter_width,
+        output_height * output_width};
+    framework::DDim output_matrix_shape = {
+        output->dims()[1], output->dims()[2] * output->dims()[3]};
+    filter->Resize(filter_matrix_shape);
+    // convolution opperator: im2col + gemm
+    for (int i = 0; i < batch_size; i++) {
+      // im2col
+      in_slice = input->Slice<T>(i, i + 1);
+      in_slice.Resize(input_shape);
+      col.Resize(col_shape);
+      im2col(in_slice, col, strides[0], strides[1], paddings[0], paddings[1],
+             device_context);
+      // gemm
+      out_slice = output->Slice<T>(i, i + 1);
+      out_slice.Resize(output_matrix_shape);
+      col.Resize(col_matrix_shape);
+      math::matmul<Place, T>(*filter, false, col, false, T(1.0), &out_slice,
+                             T(0.0), device_context);
+    }
+  }
+};
+template <typename Place, typename T>
+class GemmConvGradKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+#if 0
+    auto input = context.Input<Tensor>("Input");
+    auto filter = context.Input<Tensor>("Filter");
+    auto output = context.Output<Tensor>("Output");
+    output->mutable_data<T>(context.GetPlace());
+#endif
+  }
+};
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -51,6 +51,7 @@ USE_CPU_ONLY_OP(gather);
 USE_CPU_ONLY_OP(scatter);
 USE_OP(top_k);
 USE_OP(squared_l2_distance);
+USE_OP(conv2d);
 namespace paddle {
 namespace framework {

--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@@ -35,3 +35,4 @@ py_test(test_lookup_table SRCS test_lookup_table.py)
 py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py)
 py_test(mnist SRCS mnist.py)
 py_test(test_squared_l2_distance_op SRCS test_squared_l2_distance_op.py)
+py_test(test_conv2d SRCS test_conv2d_op.py)
--- a/python/paddle/v2/framework/tests/test_conv2d_op.py
+++ b/python/paddle/v2/framework/tests/test_conv2d_op.py
+import unittest
+import numpy as np
+from gradient_checker import GradientChecker, create_op
+from op_test_util import OpTestMeta
+class TestConv2dOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+    def setUp(self):
+        self.type = "conv2d"
+        batch_size = 2
+        input_channels = 3
+        input_height = 5
+        input_width = 5
+        output_channels = 6
+        filter_height = 3
+        filter_width = 3
+        stride = 1
+        padding = 0
+        output_height = (input_height - filter_height + 2 * padding
+                         ) / stride + 1
+        output_width = (input_width - filter_width + 2 * padding) / stride + 1
+        input = np.random.random((batch_size, input_channels, input_height,
+                                  input_width)).astype("float32")
+        filter = np.random.random(
+            (output_channels, input_channels, filter_height,
+             filter_width)).astype("float32")
+        output = np.ndarray(
+            (batch_size, output_channels, output_height, output_width))
+        for batchid in xrange(batch_size):
+            for channelid in xrange(output_channels):
+                for rowid in xrange(output_height):
+                    for colid in xrange(output_width):
+                        start_h = (rowid * stride) - padding
+                        start_w = (colid * stride) - padding
+                        output_value = 0.0
+                        for inchannelid in xrange(input_channels):
+                            for frowid in xrange(filter_height):
+                                for fcolid in xrange(filter_width):
+                                    input_value = 0.0
+                                    inrowid = start_h + frowid
+                                    incolid = start_w + fcolid
+                                    if ((inrowid >= 0 and
+                                         inrowid < input_height) and
+                                        (incolid >= 0 and
+                                         incolid < input_width)):
+                                        input_value = input[batchid][
+                                            inchannelid][inrowid][incolid]
+                                    filter_value = filter[channelid][
+                                        inchannelid][frowid][fcolid]
+                                    output_value += input_value * filter_value
+                        output[batchid][channelid][rowid][colid] = output_value
+        self.inputs = {'Input': input, 'Filter': filter}
+        self.outputs = {'Output': output}
+        self.attrs = {'strides': [1, 1], 'paddings': [0, 0]}
+if __name__ == '__main__':
+    unittest.main()