refine conv2d for filter size:(1,1)

21ce7042 · chengduoZH · b6f9ba48 · 21ce7042 · 21ce7042
显示空白变更内容
内联并排

Showing with 192 addition and 83 deletion

paddle/operators/conv_op.h paddle/operators/conv_op.h +173 -83

python/paddle/v2/framework/tests/test_conv2d_op.py python/paddle/v2/framework/tests/test_conv2d_op.py +19 -0

未找到文件。
--- a/paddle/operators/conv_op.h
+++ b/paddle/operators/conv_op.h
@@ -35,6 +35,18 @@ inline int OutputSize(int input_size, int filter_size, int dilation,
                    1;
  return output_size;
 }
+inline bool NotExpand(std::vector<int64_t>& filter_dim,
+                      std::vector<int>& strides, std::vector<int>& paddings,
+                      std::vector<int>& dilations) {
+  bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true;
+  for (size_t j = 0; j < strides.size(); ++j) {
+    filter_1 &= (static_cast<int>(filter_dim[j]) == 1);
+    strides_1 &= (strides[j] == 1);
+    padding_0 &= (paddings[j] == 0);
+    dilation_1 &= (dilations[j] == 1);
+  }
+  return filter_1 && strides_1 && padding_0 && dilation_1;
+}
 // Define Op classes in .h file so that other conv
 // operator implementations can reuse the code.
@@ -110,14 +122,17 @@ class GemmConvKernel : public framework::OpKernel<T> {
    framework::DDim col_matrix_shape =
        framework::flatten_to_2d(col_shape, filter_shape_vec.size() + 1);
+    bool not_expand = NotExpand(filter_shape_vec, strides, paddings, dilations);
    Tensor col;
-    col.mutable_data<T>(col_shape, context.GetPlace());
    // col_matrix shares the same piece of data with col,
    // but will be reshaped into a two-dimensional matrix shape
    // to call the matrix multiplication interface.
    Tensor col_matrix;
+    if (!not_expand) {
+      col.mutable_data<T>(col_shape, context.GetPlace());
      col_matrix.ShareDataWith(col);
      col_matrix.Resize(col_matrix_shape);
+    }
    framework::DDim input_shape = framework::slice_ddim(
        input->dims(), 1, static_cast<int>(input->dims().size()));
@@ -134,6 +149,7 @@ class GemmConvKernel : public framework::OpKernel<T> {
    int in_step = static_cast<int>(input->dims()[1]) / groups;
    int out_step = static_cast<int>(output->dims()[1]) / groups;
+    if (!not_expand) {
      for (int i = 0; i < batch_size; i++) {
        Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
        Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
@@ -144,8 +160,8 @@ class GemmConvKernel : public framework::OpKernel<T> {
            // im2col
            math::Im2ColFunctor<math::ColFormat::kCFO, Place, T> im2col;
            im2col(context.device_context(), in_slice, col, dilations[0],
-                 dilations[1], strides[0], strides[1], paddings[0], paddings[0],
+                   dilations[1], strides[0], strides[1], paddings[0],
-                 paddings[1], paddings[1]);
+                   paddings[0], paddings[1], paddings[1]);
          } else if (filter_shape_vec.size() == 3) {
            // vol2col
            math::Vol2ColFunctor<Place, T> vol2col;
@@ -161,6 +177,25 @@ class GemmConvKernel : public framework::OpKernel<T> {
                                 col_matrix, false, T(1.0), &out_slice, T(0.0));
        }
      }
+    } else {
+      for (int i = 0; i < batch_size; i++) {
+        Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+        Tensor out_batch = output->Slice(i, i + 1).Resize(output_matrix_shape);
+        for (int g = 0; g < groups; g++) {
+          Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
+          col.ShareDataWith(in_slice);
+          col_matrix.ShareDataWith(col);
+          col_matrix.Resize(col_matrix_shape);
+          // gemm
+          Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
+          Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+          math::matmul<Place, T>(context.device_context(), filter_slice, false,
+                                 col_matrix, false, T(1.0), &out_slice, T(0.0));
+        }
+      }
+    }
  }
 };
@@ -235,14 +270,17 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
    int in_step = static_cast<int>(input->dims()[1]) / groups;
    int out_step = static_cast<int>(output_grad->dims()[1]) / groups;
+    bool not_expand = NotExpand(filter_shape_vec, strides, paddings, dilations);
    Tensor col;
    // col_matrix shares the same piece of data with col,
    // but will be reshaped into a two-dimensional matrix shape
    // to call the matrix multiplication interface.
    Tensor col_matrix;
+    if (!not_expand) {
      col.mutable_data<T>(col_shape, context.GetPlace());
      col_matrix.ShareDataWith(col);
      col_matrix.Resize(col_matrix_shape);
+    }
    math::SetConstant<Place, T> set_zero;
@@ -250,19 +288,21 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
      input_grad->mutable_data<T>(context.GetPlace());
      set_zero(context.device_context(), input_grad, static_cast<T>(0));
+      if (!not_expand) {
        for (int i = 0; i < batch_size; i++) {
          Tensor out_grad_batch =
              output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
-        Tensor in_grad_batch = input_grad->Slice(i, i + 1).Resize(input_shape);
+          Tensor in_grad_batch =
+              input_grad->Slice(i, i + 1).Resize(input_shape);
          for (int g = 0; g < groups; g++) {
            // gemm
            Tensor out_grad_slice =
                out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
-          Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
+            Tensor filter_slice =
+                filter.Slice(g * out_step, (g + 1) * out_step);
            math::matmul<Place, T>(context.device_context(), filter_slice, true,
                                   out_grad_slice, false, T(1.0), &col_matrix,
                                   T(0.0));
-          // col2im
            Tensor in_grad_slice =
                in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
@@ -280,6 +320,31 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
            }
          }
        }
+      } else {
+        for (int i = 0; i < batch_size; i++) {
+          Tensor out_grad_batch =
+              output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
+          Tensor in_grad_batch =
+              input_grad->Slice(i, i + 1).Resize(input_shape);
+          for (int g = 0; g < groups; g++) {
+            // gemm
+            Tensor out_grad_slice =
+                out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
+            Tensor filter_slice =
+                filter.Slice(g * out_step, (g + 1) * out_step);
+            Tensor in_grad_slice =
+                in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
+            col_matrix.ShareDataWith(in_grad_slice);
+            col_matrix.Resize(col_matrix_shape);
+            math::matmul<Place, T>(context.device_context(), filter_slice, true,
+                                   out_grad_slice, false, T(1.0), &col_matrix,
+                                   T(0.0));
+          }
+        }
+      }
    }
    if (filter_grad) {
@@ -288,6 +353,7 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
      filter_grad_.Resize(filter_matrix_shape);
      set_zero(context.device_context(), filter_grad, static_cast<T>(0));
+      if (!not_expand) {
        for (int i = 0; i < batch_size; i++) {
          Tensor out_grad_batch =
              output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
@@ -318,6 +384,30 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
                                   &filter_grad_slice, T(1.0));
          }
        }
+      } else {
+        for (int i = 0; i < batch_size; i++) {
+          Tensor out_grad_batch =
+              output_grad->Slice(i, i + 1).Resize(output_matrix_shape);
+          Tensor in_batch = input->Slice(i, i + 1).Resize(input_shape);
+          for (int g = 0; g < groups; g++) {
+            // im2col
+            Tensor out_grad_slice =
+                out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
+            Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
+            col.ShareDataWith(in_slice);
+            col_matrix.ShareDataWith(col);
+            col_matrix.Resize(col_matrix_shape);
+            // gemm
+            Tensor filter_grad_slice =
+                filter_grad_.Slice(g * out_step, (g + 1) * out_step);
+            math::matmul<Place, T>(context.device_context(), out_grad_slice,
+                                   false, col_matrix, true, T(1.0),
+                                   &filter_grad_slice, T(1.0));
+          }
+        }
+      }
    }
  }
 };

--- a/python/paddle/v2/framework/tests/test_conv2d_op.py
+++ b/python/paddle/v2/framework/tests/test_conv2d_op.py
@@ -104,6 +104,25 @@ class TestWithGroup(TestConv2dOp):
        self.op_type = "conv2d"
+class TestWith1x1(TestConv2dOp):
+    def init_test_case(self):
+        self.pad = [0, 0]
+        self.stride = [1, 1]
+        self.input_size = [2, 3, 5, 5]  # NCHW
+        assert np.mod(self.input_size[1], self.groups) == 0
+        f_c = self.input_size[1] / self.groups
+        self.filter_size = [6, f_c, 1, 1]
+    def init_dilation(self):
+        self.dilations = [1, 1]
+    def init_group(self):
+        self.groups = 3
+    def init_op_type(self):
+        self.op_type = "conv2d"
 #----------------Conv2dCudnn----------------