support different height and width for pooling

4c69d837 · yejianwu · 27321d33 · 4c69d837 · 4c69d837 · 4c69d837
Showing with 34 addition and 12 deletion

mace/kernels/opencl/cl/pooling.cl mace/kernels/opencl/cl/pooling.cl +16 -12

mace/kernels/opencl/pooling.cc mace/kernels/opencl/pooling.cc +2 -0

mace/ops/pooling_test.cc mace/ops/pooling_test.cc +16 -0

未找到文件。
--- a/mace/kernels/opencl/cl/pooling.cl
+++ b/mace/kernels/opencl/cl/pooling.cl
@@ -2,15 +2,16 @@

 #define MIN_VALUE -FLT_MAX

-inline int calculate_avg_block_size(const int pool_size,
+inline int calculate_avg_block_size(const int pool_size_h,
+                                    const int pool_size_w,
                                    const int pos_h,
                                    const int pos_w,
                                    const int h_size,
                                    const int w_size) {
  const int h_start = max(0, pos_h);
  const int w_start = max(0, pos_w);
-  const int h_end = min(pos_h + pool_size, h_size);
-  const int w_end = min(pos_w + pool_size, w_size);
+  const int h_end = min(pos_h + pool_size_h, h_size);
+  const int w_end = min(pos_w + pool_size_w, w_size);
  return mul24((h_end - h_start), (w_end - w_start));
 }

@@ -23,8 +24,10 @@ __kernel void pooling(KERNEL_ERROR_PARAMS
                      __private const int out_height,
                      __private const int pad_top,
                      __private const int pad_left,
-                      __private const int stride,
-                      __private const int pooling_size,
+                      __private const int stride_h,
+                      __private const int stride_w,
+                      __private const int pooling_size_h,
+                      __private const int pooling_size_w,
                      __write_only image2d_t output) {

  const int out_chan_idx = get_global_id(0);
@@ -40,19 +43,19 @@ __kernel void pooling(KERNEL_ERROR_PARAMS
  const int out_width = global_size_dim1;

  const int batch_idx = mul24((out_hb_idx / out_height), in_height);
-  const int in_height_start = mul24((out_hb_idx % out_height), stride) - pad_top;
-  const int in_width_start = mul24(out_width_idx, stride) - pad_left;
+  const int in_height_start = mul24((out_hb_idx % out_height), stride_h) - pad_top;
+  const int in_width_start = mul24(out_width_idx, stride_w) - pad_left;
  const int in_channel_offset = mul24(out_chan_idx, in_width);


 #ifdef POOL_AVG
  DATA_TYPE4 res = 0;
-  for (int height = 0; height < pooling_size; ++height) {
+  for (int height = 0; height < pooling_size_h; ++height) {
    int in_height_idx = in_height_start + height;
    in_height_idx = select(batch_idx + in_height_idx,
                       -1,
                       (in_height_idx < 0 || in_height_idx >= in_height));
-    for (int width = 0; width < pooling_size; ++width) {
+    for (int width = 0; width < pooling_size_w; ++width) {
      int in_width_idx = in_width_start + width;
      in_width_idx = select(in_channel_offset + in_width_idx,
                            -1,
@@ -62,19 +65,20 @@ __kernel void pooling(KERNEL_ERROR_PARAMS
      res = res + in;
    }
  }
-  const int block_size = calculate_avg_block_size(pooling_size,
+  const int block_size = calculate_avg_block_size(pooling_size_h,
+                                                  pooling_size_w,
                                                  in_height_start, in_width_start,
                                                  in_height, in_width);
  res /= block_size;
 #else
  DATA_TYPE4 res = (DATA_TYPE4)(MIN_VALUE);
-  for (int height = 0; height < pooling_size; ++height) {
+  for (int height = 0; height < pooling_size_h; ++height) {
    int in_height_idx = in_height_start + height;
    in_height_idx = select(batch_idx + in_height_idx,
                           -1,
                           (in_height_idx < 0 || in_height_idx >= in_height));
    if (in_height_idx != -1) {
-      for (int width = 0; width < pooling_size; ++width) {
+      for (int width = 0; width < pooling_size_w; ++width) {
        int in_width_idx = in_width_start + width;
        in_width_idx = select(in_channel_offset + in_width_idx,
                              -1,

--- a/mace/kernels/opencl/pooling.cc
+++ b/mace/kernels/opencl/pooling.cc
@@ -129,7 +129,9 @@ MaceStatus PoolingFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
    kernel_.setArg(idx++, paddings[0] / 2);
    kernel_.setArg(idx++, paddings[1] / 2);
    kernel_.setArg(idx++, strides_[0]);
+    kernel_.setArg(idx++, strides_[1]);
    kernel_.setArg(idx++, kernels_[0]);
+    kernel_.setArg(idx++, kernels_[1]);
    kernel_.setArg(idx++, *(output->opencl_image()));

    input_shape_ = input->shape();

--- a/mace/ops/pooling_test.cc
+++ b/mace/ops/pooling_test.cc
@@ -280,22 +280,28 @@ void MaxPooling3S2(const std::vector<index_t> &input_shape,
 TEST_F(PoolingOpTest, OPENCLAlignedMaxPooling3S2) {
  MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {1, 1}, Padding::VALID);
  MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {2, 2}, Padding::VALID);
+  MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {1, 2}, Padding::VALID);
  MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {1, 1}, Padding::SAME);
  MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {2, 2}, Padding::SAME);
+  MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {2, 1}, Padding::SAME);
 }

 TEST_F(PoolingOpTest, OPENCLHalfAlignedMaxPooling3S2) {
  MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {1, 1}, Padding::VALID);
  MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {2, 2}, Padding::VALID);
+  MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {1, 2}, Padding::VALID);
  MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {1, 1}, Padding::SAME);
  MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {2, 2}, Padding::SAME);
+  MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {2, 1}, Padding::SAME);
 }

 TEST_F(PoolingOpTest, OPENCLUnalignedMaxPooling3S2) {
  MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {1, 1}, Padding::VALID);
  MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {2, 2}, Padding::VALID);
+  MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {1, 2}, Padding::VALID);
  MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {1, 1}, Padding::SAME);
  MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {2, 2}, Padding::SAME);
+  MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {2, 1}, Padding::SAME);
 }

 TEST_F(PoolingOpTest, AVG_VALID) {
@@ -431,19 +437,27 @@ void AvgPoolingTest(const std::vector<index_t> &shape,

 TEST_F(PoolingOpTest, OPENCLAlignedAvgPooling) {
  AvgPoolingTest<GPU, float>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID);
+  AvgPoolingTest<GPU, float>({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::VALID);
  AvgPoolingTest<GPU, float>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME);
+  AvgPoolingTest<GPU, float>({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::SAME);
 }

 TEST_F(PoolingOpTest, OPENCLHalfAlignedAvgPooling) {
  AvgPoolingTest<GPU, half>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID);
+  AvgPoolingTest<GPU, half>({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::VALID);
  AvgPoolingTest<GPU, half>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME);
+  AvgPoolingTest<GPU, half>({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::SAME);
 }

 TEST_F(PoolingOpTest, OPENCLAlignedLargeKernelAvgPooling) {
  AvgPoolingTest<GPU, float>({3, 64, 64, 128}, {16, 16}, {16, 16},
                             Padding::VALID);
+  AvgPoolingTest<GPU, float>({3, 64, 64, 128}, {12, 16}, {12, 8},
+                             Padding::VALID);
  AvgPoolingTest<GPU, float>({3, 64, 64, 128}, {16, 16}, {16, 16},
                             Padding::SAME);
+  AvgPoolingTest<GPU, float>({3, 64, 64, 128}, {8, 16}, {8, 16},
+                             Padding::SAME);
 }

 TEST_F(PoolingOpTest, OPENCLHalfAlignedLargeKernelAvgPooling) {
@@ -622,8 +636,10 @@ void TestQuant(const index_t batch,
 TEST_F(PoolingOpTest, Quant) {
  TestQuant(1, 7, 7, 1024, {7, 7}, {1, 1}, Padding::VALID, PoolingType::AVG);
  TestQuant(1, 3, 3, 2, {3, 3}, {1, 1}, Padding::SAME, PoolingType::AVG);
+  TestQuant(1, 3, 3, 2, {2, 3}, {1, 2}, Padding::SAME, PoolingType::AVG);
  TestQuant(1, 7, 7, 1024, {7, 7}, {1, 1}, Padding::VALID, PoolingType::MAX);
  TestQuant(1, 7, 7, 1024, {7, 7}, {1, 1}, Padding::SAME, PoolingType::MAX);
+  TestQuant(1, 7, 7, 1024, {6, 7}, {4, 5}, Padding::SAME, PoolingType::MAX);
  TestQuant(1, 7, 7, 2048, {7, 7}, {1, 1}, Padding::SAME, PoolingType::AVG);
  TestQuant(3, 15, 15, 128, {4, 4}, {4, 4}, Padding::VALID, PoolingType::AVG);
  TestQuant(3, 15, 15, 128, {4, 4}, {4, 4}, Padding::VALID, PoolingType::MAX);