refine conv2d for filter size:(1,1)

21ce7042 · chengduoZH · b6f9ba48 · 21ce7042 · 21ce7042
隐藏空白更改
内联并排

Showing with 192 addition and 83 deletion

paddle/operators/conv_op.h paddle/operators/conv_op.h +173 -83

python/paddle/v2/framework/tests/test_conv2d_op.py python/paddle/v2/framework/tests/test_conv2d_op.py +19 -0

未找到文件。
--- a/paddle/operators/conv_op.h
+++ b/paddle/operators/conv_op.h
@@ -35,6 +35,18 @@ inline int OutputSize(int input_size, int filter_size, int dilation,
                    1;
  return output_size;
 }
+inline bool NotExpand(std::vector<int64_t>& filter_dim,
+                      std::vector<int>& strides, std::vector<int>& paddings,
+                      std::vector<int>& dilations) {
+  bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
+  for (size_t j = 0; j < strides.size(); ++j) {
+    filter_1 &= (static_cast<int>(filter_dim[j]) == 1);
+    strides_1 &= (strides[j] == 1);
+    padding_0 &= (paddings[j] == 0);
+    dilation_1 &= (dilations[j] == 1);
+  }
+  return filter_1 && strides_1 && padding_0 && dilation_1;
+}
 // Define Op classes in .h file so that other conv
 // operator implementations can reuse the code.
@@ -110,14 +122,17 @@ class GemmConvKernel : public framework::OpKernel<T> {
    framework::DDim col_matrix_shape =
        framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1);
+    bool not_expand = NotExpand(filter_shape_vec, strides, paddings, dilations);
    Tensor col;
-    col.mutable_data<T>(col_shape, context.GetPlace());
    // col_matrix shares the same piece of data with col,
    // but will be reshaped into a two-dimensional matrix shape
    // to call the matrix multiplication interface.
    Tensor col_matrix;
-    col_matrix.ShareDataWith(col);
+    if (!not_expand) {
-    col_matrix.Resize(col_matrix_shape);
+      col.mutable_data<T>(col_shape, context.GetPlace());
+      col_matrix.ShareDataWith(col);
+      col_matrix.Resize(col_matrix_shape);
+    }
    framework::DDim input_shape = framework::slice_ddim(
        input->dims(), 1, static_cast<int>(input->dims().size()));
@@ -134,31 +149,51 @@ class GemmConvKernel : public framework::OpKernel<T> {
    int in_step = static_cast<int>(input->dims()[1]) / groups;
    int out_step = static_cast<int>(output->dims()[1]) / groups;
-    for (int i = 0; i < batch_size; i++) {
+    if (!not_expand) {
-      Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+      for (int i = 0; i < batch_size; i++) {
-      Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
+        Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
-      for (int g = 0; g < groups; g++) {
+        Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
-        Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
+        for (int g = 0; g < groups; g++) {
+          Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
-        if (filter_shape_vec.size() == 2) {
-          // im2col
+          if (filter_shape_vec.size() == 2) {
-          math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
+            // im2col
-          im2col(context.device_context(), in_slice, col, dilations[0],
+            math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
-                 dilations[1], strides[0], strides[1], paddings[0], paddings[0],
+            im2col(context.device_context(), in_slice, col, dilations[0],
-                 paddings[1], paddings[1]);
+                   dilations[1], strides[0], strides[1], paddings[0],
-        } else if (filter_shape_vec.size() == 3) {
+                   paddings[0], paddings[1], paddings[1]);
-          // vol2col
+          } else if (filter_shape_vec.size() == 3) {
-          math::Vol2ColFunctor<Place, T> vol2col;
+            // vol2col
-          vol2col(context.device_context(), in_slice, col, strides[0],
+            math::Vol2ColFunctor<Place, T> vol2col;
-                  strides[1], strides[2], paddings[0], paddings[1],
+            vol2col(context.device_context(), in_slice, col, strides[0],
-                  paddings[2]);
+                    strides[1], strides[2], paddings[0], paddings[1],
+                    paddings[2]);
+          }
+          // gemm
+          Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
+          Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+          math::matmul<Place, T>(context.device_context(), filter_slice, false,
+                                 col_matrix, false, T(1.0), &out_slice, T(0.0));
        }
+      }
+    } else {
+      for (int i = 0; i < batch_size; i++) {
+        Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+        Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
+        for (int g = 0; g < groups; g++) {
+          Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
-        // gemm
+          col.ShareDataWith(in_slice);
-        Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
+          col_matrix.ShareDataWith(col);
-        Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+          col_matrix.Resize(col_matrix_shape);
-        math::matmul<Place, T>(context.device_context(), filter_slice, false,
-                               col_matrix, false, T(1.0), &out_slice, T(0.0));
+          // gemm
+          Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
+          Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+          math::matmul<Place, T>(context.device_context(), filter_slice, false,
+                                 col_matrix, false, T(1.0), &out_slice, T(0.0));
+        }
      }
    }
  }
@@ -235,14 +270,17 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
    int in_step = static_cast<int>(input->dims()[1]) / groups;
    int out_step = static_cast<int>(output_grad->dims()[1]) / groups;
+    bool not_expand = NotExpand(filter_shape_vec, strides, paddings, dilations);
    Tensor col;
    // col_matrix shares the same piece of data with col,
    // but will be reshaped into a two-dimensional matrix shape
    // to call the matrix multiplication interface.
    Tensor col_matrix;
-    col.mutable_data<T>(col_shape, context.GetPlace());
+    if (!not_expand) {
-    col_matrix.ShareDataWith(col);
+      col.mutable_data<T>(col_shape, context.GetPlace());
-    col_matrix.Resize(col_matrix_shape);
+      col_matrix.ShareDataWith(col);
+      col_matrix.Resize(col_matrix_shape);
+    }
    math::SetConstant<Place, T> set_zero;
@@ -250,33 +288,60 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
      input_grad->mutable_data<T>(context.GetPlace());
      set_zero(context.device_context(), input_grad, static_cast<T>(0));
-      for (int i = 0; i < batch_size; i++) {
+      if (!not_expand) {
-        Tensor out_grad_batch =
+        for (int i = 0; i < batch_size; i++) {
-            output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
+          Tensor out_grad_batch =
-        Tensor in_grad_batch = input_grad->Slice(i, i + 1).Resize(input_shape);
+              output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
-        for (int g = 0; g < groups; g++) {
+          Tensor in_grad_batch =
-          // gemm
+              input_grad->Slice(i, i + 1).Resize(input_shape);
-          Tensor out_grad_slice =
+          for (int g = 0; g < groups; g++) {
-              out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
+            // gemm
-          Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+            Tensor out_grad_slice =
-          math::matmul<Place, T>(context.device_context(), filter_slice, true,
+                out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
-                                 out_grad_slice, false, T(1.0), &col_matrix,
+            Tensor filter_slice =
-                                 T(0.0));
+                filter.Slice(g * out_step, (g + 1) * out_step);
-          // col2im
+            math::matmul<Place, T>(context.device_context(), filter_slice, true,
-          Tensor in_grad_slice =
+                                   out_grad_slice, false, T(1.0), &col_matrix,
-              in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
+                                   T(0.0));
+            Tensor in_grad_slice =
-          if (filter_shape_vec.size() == 2) {
+                in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
-            math::Col2ImFunctor<math::ColFormat::kCFO, Place, T> col2im;
-            col2im(context.device_context(), in_grad_slice, col, dilations[0],
+            if (filter_shape_vec.size() == 2) {
-                   dilations[1], strides[0], strides[1], paddings[0],
+              math::Col2ImFunctor<math::ColFormat::kCFO, Place, T> col2im;
-                   paddings[0], paddings[1], paddings[1]);
+              col2im(context.device_context(), in_grad_slice, col, dilations[0],
+                     dilations[1], strides[0], strides[1], paddings[0],
-          } else if (filter_shape_vec.size() == 3) {
+                     paddings[0], paddings[1], paddings[1]);
-            math::Col2VolFunctor<Place, T> col2vol;
-            col2vol(context.device_context(), in_grad_slice, col, strides[0],
+            } else if (filter_shape_vec.size() == 3) {
-                    strides[1], strides[2], paddings[0], paddings[1],
+              math::Col2VolFunctor<Place, T> col2vol;
-                    paddings[2]);
+              col2vol(context.device_context(), in_grad_slice, col, strides[0],
+                      strides[1], strides[2], paddings[0], paddings[1],
+                      paddings[2]);
+            }
+          }
+        }
+      } else {
+        for (int i = 0; i < batch_size; i++) {
+          Tensor out_grad_batch =
+              output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
+          Tensor in_grad_batch =
+              input_grad->Slice(i, i + 1).Resize(input_shape);
+          for (int g = 0; g < groups; g++) {
+            // gemm
+            Tensor out_grad_slice =
+                out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
+            Tensor filter_slice =
+                filter.Slice(g * out_step, (g + 1) * out_step);
+            Tensor in_grad_slice =
+                in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
+            col_matrix.ShareDataWith(in_grad_slice);
+            col_matrix.Resize(col_matrix_shape);
+            math::matmul<Place, T>(context.device_context(), filter_slice, true,
+                                   out_grad_slice, false, T(1.0), &col_matrix,
+                                   T(0.0));
          }
        }
      }
@@ -288,34 +353,59 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
      filter_grad_.Resize(filter_matrix_shape);
      set_zero(context.device_context(), filter_grad, static_cast<T>(0));
-      for (int i = 0; i < batch_size; i++) {
+      if (!not_expand) {
-        Tensor out_grad_batch =
+        for (int i = 0; i < batch_size; i++) {
-            output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
+          Tensor out_grad_batch =
-        Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+              output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
-        for (int g = 0; g < groups; g++) {
+          Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
-          // im2col
+          for (int g = 0; g < groups; g++) {
-          Tensor out_grad_slice =
+            // im2col
-              out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
+            Tensor out_grad_slice =
-          Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
+                out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
+            Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
-          if (filter_shape_vec.size() == 2) {
-            math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
+            if (filter_shape_vec.size() == 2) {
-            im2col(context.device_context(), in_slice, col, dilations[0],
+              math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
-                   dilations[1], strides[0], strides[1], paddings[0],
+              im2col(context.device_context(), in_slice, col, dilations[0],
-                   paddings[0], paddings[1], paddings[1]);
+                     dilations[1], strides[0], strides[1], paddings[0],
-          } else if (filter_shape_vec.size() == 3) {
+                     paddings[0], paddings[1], paddings[1]);
-            math::Vol2ColFunctor<Place, T> vol2col;
+            } else if (filter_shape_vec.size() == 3) {
-            vol2col(context.device_context(), in_slice, col, strides[0],
+              math::Vol2ColFunctor<Place, T> vol2col;
-                    strides[1], strides[2], paddings[0], paddings[1],
+              vol2col(context.device_context(), in_slice, col, strides[0],
-                    paddings[2]);
+                      strides[1], strides[2], paddings[0], paddings[1],
+                      paddings[2]);
+            }
+            // gemm
+            Tensor filter_grad_slice =
+                filter_grad_.Slice(g * out_step, (g + 1) * out_step);
+            math::matmul<Place, T>(context.device_context(), out_grad_slice,
+                                   false, col_matrix, true, T(1.0),
+                                   &filter_grad_slice, T(1.0));
+          }
+        }
+      } else {
+        for (int i = 0; i < batch_size; i++) {
+          Tensor out_grad_batch =
+              output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
+          Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+          for (int g = 0; g < groups; g++) {
+            // im2col
+            Tensor out_grad_slice =
+                out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
+            Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
+            col.ShareDataWith(in_slice);
+            col_matrix.ShareDataWith(col);
+            col_matrix.Resize(col_matrix_shape);
+            // gemm
+            Tensor filter_grad_slice =
+                filter_grad_.Slice(g * out_step, (g + 1) * out_step);
+            math::matmul<Place, T>(context.device_context(), out_grad_slice,
+                                   false, col_matrix, true, T(1.0),
+                                   &filter_grad_slice, T(1.0));
          }
-          // gemm
-          Tensor filter_grad_slice =
-              filter_grad_.Slice(g * out_step, (g + 1) * out_step);
-          math::matmul<Place, T>(context.device_context(), out_grad_slice,
-                                 false, col_matrix, true, T(1.0),
-                                 &filter_grad_slice, T(1.0));
        }
      }
    }

--- a/python/paddle/v2/framework/tests/test_conv2d_op.py
+++ b/python/paddle/v2/framework/tests/test_conv2d_op.py
@@ -104,6 +104,25 @@ class TestWithGroup(TestConv2dOp):
        self.op_type = "conv2d"
+class TestWith1x1(TestConv2dOp):
+    def init_test_case(self):
+        self.pad = [0, 0]
+        self.stride = [1, 1]
+        self.input_size = [2, 3, 5, 5]  # NCHW
+        assert np.mod(self.input_size[1], self.groups) == 0
+        f_c = self.input_size[1] / self.groups
+        self.filter_size = [6, f_c, 1, 1]
+    def init_dilation(self):
+        self.dilations = [1, 1]
+    def init_group(self):
+        self.groups = 3
+    def init_op_type(self):
+        self.op_type = "conv2d"
 #----------------Conv2dCudnn----------------