Add groups in convolution operator.

fb46345f · hedaoyuan · 14ae8050 · fb46345f · fb46345f
隐藏空白更改
内联并排

Showing with 44 addition and 14 deletion

paddle/operators/conv_op.cc paddle/operators/conv_op.cc +20 -2

paddle/operators/gemm_conv_op.h paddle/operators/gemm_conv_op.h +24 -12

未找到文件。
--- a/paddle/operators/conv_op.cc
+++ b/paddle/operators/conv_op.cc
@@ -31,12 +31,22 @@ class Conv2DOp : public framework::OperatorWithKernel {
    auto in = ctx.Input<Tensor>("Input");
    auto filter = ctx.Input<Tensor>("Filter");
    auto out = ctx.Output<Tensor>("Output");
+    std::vector<int> strides = Attr<std::vector<int>>("strides");
+    std::vector<int> paddings = Attr<std::vector<int>>("paddings");
+    int groups = context.Attr<int>("groups");
+    int input_channels = in->dims()[1];
+    int output_channels = filter->dims()[0];
    PADDLE_ENFORCE_EQ(in->dims().size(), 4, "Conv2DOp intput should be 4-D.");
    PADDLE_ENFORCE_EQ(filter->dims().size(), 4,
                      "Conv2DOp filter should be 4-D.");
+    PADDLE_ENFORCE_EQ(input_channels, filter->dims()[1] * groups,
+                      "The number of input channels should be equal to filter "
+                      "channels * groups.");
+    PADDLE_ENFORCE_EQ(
+        output_channels % groups, 0,
+        "The number of output channels should be divided by groups.");
-    std::vector<int> strides = Attr<std::vector<int>>("strides");
-    std::vector<int> paddings = Attr<std::vector<int>>("paddings");
    auto output_height =
        outputSize(in->dims()[2], filter->dims()[2], paddings[0], strides[0]);
    auto output_width =
@@ -71,6 +81,14 @@ the input, filter and strides, paddings parameters.
 )DOC");
    AddAttr<std::vector<int>>("strides", "strides of convolution operator.");
    AddAttr<std::vector<int>>("paddings", "paddings of convolution operator.");
+    AddAttr<int>(
+        "groups",
+        "group size of convolution operator. "
+        "Refer to grouped convolution in Alex Krizhevsky's paper: "
+        "when group=2, the first half of the filters are only connected to the "
+        "first half of the input channels, and the second half only connected "
+        "to the second half.")
+        .SetDefault(1);
  }
 };

--- a/paddle/operators/gemm_conv_op.h
+++ b/paddle/operators/gemm_conv_op.h
@@ -38,6 +38,7 @@ class GemmConvKernel : public framework::OpKernel {
    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
+    int groups = context.Attr<int>("groups");
    int batch_size = input->dims()[0];
    int input_channels = input->dims()[1];
@@ -51,11 +52,11 @@ class GemmConvKernel : public framework::OpKernel {
        paddle::operators::math::ColFormat::kCFO, Place, T>
        im2col;
    // use col_shape in the im2col calculation
-    framework::DDim col_shape = {input_channels, filter_height, filter_width,
+    framework::DDim col_shape = {input_channels / groups, filter_height,
-                                 output_height, output_width};
+                                 filter_width, output_height, output_width};
    // use col_matrix_shape in the gemm calculation
    framework::DDim col_matrix_shape = {
-        input_channels * filter_height * filter_width,
+        input_channels / groups * filter_height * filter_width,
        output_height * output_width};
    Tensor col;
    col.mutable_data<T>(col_shape, context.GetPlace());
@@ -78,16 +79,26 @@ class GemmConvKernel : public framework::OpKernel {
        const_cast<platform::DeviceContext*>(context.device_context_);
    // convolution operator: im2col + gemm
+    int in_step = input_channels / groups;
+    int out_step = output_channels / groups;
    for (int i = 0; i < batch_size; i++) {
-      // im2col
+      Tensor in_slice_batch = input->Slice<T>(i, i + 1).Resize(input_shape);
-      Tensor in_slice = input->Slice<T>(i, i + 1).Resize(input_shape);
+      Tensor out_slice_batch =
-      im2col(in_slice, col, strides[0], strides[1], paddings[0], paddings[1],
+          output->Slice<T>(i, i + 1).Resize(output_matrix_shape);
-             device_context);
+      for (int g = 0; g < groups; g++) {
+        // im2col
-      // gemm
+        Tensor in_slice =
-      Tensor out_slice = output->Slice<T>(i, i + 1).Resize(output_matrix_shape);
+            in_slice_batch.Slice<T>(g * in_step, (g + 1) * in_step);
-      math::matmul<Place, T>(filter, false, col_matrix, false, T(1.0),
+        im2col(in_slice, col, strides[0], strides[1], paddings[0], paddings[1],
-                             &out_slice, T(0.0), device_context);
+               device_context);
+        // gemm
+        Tensor out_slice =
+            out_slice_batch.Slice<T>(g * out_step, (g + 1) * out_step);
+        Tensor filter_slice = filter.Slice<T>(g * out_step, (g + 1) * out_step);
+        math::matmul<Place, T>(filter_slice, false, col_matrix, false, T(1.0),
+                               &out_slice, T(0.0), device_context);
+      }
    }
  }
 };
@@ -114,6 +125,7 @@ class GemmConvGradKernel : public framework::OpKernel {
    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
+    // int groups = context.Attr<int>("groups");
    int batch_size = input->dims()[0];
    int input_channels = input->dims()[1];