Remove unused padding for conv 1x1.

b028e4de · liuqi · 92e3e526 · b028e4de · b028e4de
隐藏空白更改
内联并排

Showing with 14 addition and 19 deletion

mace/kernels/opencl/cl/conv_2d_1x1.cl mace/kernels/opencl/cl/conv_2d_1x1.cl +12 -14

mace/kernels/opencl/conv_2d_opencl_1x1.cc mace/kernels/opencl/conv_2d_opencl_1x1.cc +2 -5

未找到文件。
--- a/mace/kernels/opencl/cl/conv_2d_1x1.cl
+++ b/mace/kernels/opencl/cl/conv_2d_1x1.cl
@@ -14,9 +14,7 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
                          __private const int in_width,
                          __private const int in_ch_blks,
                          __private const int height,
-                          __private const int width,
-                          __private const int padding_top,
-                          __private const int padding_left) {
+                          __private const int width) {
  const int out_ch_blk = get_global_id(0);
  const int out_w_blk = get_global_id(1);
  const int out_w_blks = get_global_size(1);
@@ -38,23 +36,23 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]

  int4 w;
 #if STRIDE == 1
-  w.x = out_w_blk - padding_left;
+  w.x = out_w_blk;
  w.y = w.x + out_w_blks;
  w.z = w.y + out_w_blks;
  w.w = w.z + out_w_blks;
-  int out_hb_idx = (out_hb % height) - padding_top;
+  int out_hb_idx = (out_hb % height);
 #else
-  w.x = out_w_blk * 2 - padding_left;
-  w.y = (out_w_blk + out_w_blks) * 2 - padding_left;
-  w.z = (out_w_blk + 2 * out_w_blks) * 2 - padding_left;
-  w.w = (out_w_blk + 3 * out_w_blks) * 2 - padding_left;
-  int out_hb_idx = (out_hb % height) * 2 - padding_top;
+  w.x = out_w_blk * 2;
+  w.y = (out_w_blk + out_w_blks) * 2;
+  w.z = (out_w_blk + 2 * out_w_blks) * 2;
+  w.w = (out_w_blk + 3 * out_w_blks) * 2;
+  int out_hb_idx = (out_hb % height) * 2;
 #endif

-  w.x = select(w.x, INT_MIN, (w.x < 0 || w.x >= in_width));
-  w.y = select(w.y, INT_MIN, (w.y < 0 || w.y >= in_width));
-  w.z = select(w.z, INT_MIN, (w.z < 0 || w.z >= in_width));
-  w.w = select(w.w, INT_MIN, (w.w < 0 || w.w >= in_width));
+  w.x = select(w.x, INT_MIN, w.x >= in_width);
+  w.y = select(w.y, INT_MIN, w.y >= in_width);
+  w.z = select(w.z, INT_MIN, w.z >= in_width);
+  w.w = select(w.w, INT_MIN, w.w >= in_width);

  out_hb_idx = select(out_hb_idx + (out_hb / height) * in_height,
                      -1,

--- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc
+++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc
@@ -15,7 +15,6 @@ void Conv1x1(const Tensor *input,
             const Tensor *filter,
             const Tensor *bias,
             const int stride,
-             const int *padding,
             Tensor *output) {
  const index_t batch = output->dim(0);
  const index_t height = output->dim(1);
@@ -58,8 +57,6 @@ void Conv1x1(const Tensor *input,
  conv_2d_kernel.setArg(idx++, static_cast<int>(input_channel_blocks));
  conv_2d_kernel.setArg(idx++, static_cast<int>(height));
  conv_2d_kernel.setArg(idx++, static_cast<int>(width));
-  conv_2d_kernel.setArg(idx++, padding[0] / 2);
-  conv_2d_kernel.setArg(idx++, padding[1] / 2);

  auto command_queue = runtime->command_queue();
  cl_int error;
@@ -78,7 +75,7 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input,
                               const Tensor *bias,
                               const int *padding,
                               Tensor *output) {
-  Conv1x1(input, filter, bias, 1, padding, output);
+  Conv1x1(input, filter, bias, 1, output);
 };

 extern void Conv2dOpenclK1x1S2(const Tensor *input,
@@ -86,7 +83,7 @@ extern void Conv2dOpenclK1x1S2(const Tensor *input,
                               const Tensor *bias,
                               const int *padding,
                               Tensor *output) {
-  Conv1x1(input, filter, bias, 2, padding, output);
+  Conv1x1(input, filter, bias, 2, output);
 };

 }  // namespace kernels