diff --git a/mace/kernels/opencl/cl/pooling.cl b/mace/kernels/opencl/cl/pooling.cl index 11ff3b89569a31a73437cb8f22b0e009d2ad5769..7b1b4607c6b03ed3f1b98d3dd7e39cec4e2e0a55 100644 --- a/mace/kernels/opencl/cl/pooling.cl +++ b/mace/kernels/opencl/cl/pooling.cl @@ -2,15 +2,16 @@ #define MIN_VALUE -FLT_MAX -inline int calculate_avg_block_size(const int pool_size, +inline int calculate_avg_block_size(const int pool_size_h, + const int pool_size_w, const int pos_h, const int pos_w, const int h_size, const int w_size) { const int h_start = max(0, pos_h); const int w_start = max(0, pos_w); - const int h_end = min(pos_h + pool_size, h_size); - const int w_end = min(pos_w + pool_size, w_size); + const int h_end = min(pos_h + pool_size_h, h_size); + const int w_end = min(pos_w + pool_size_w, w_size); return mul24((h_end - h_start), (w_end - w_start)); } @@ -23,8 +24,10 @@ __kernel void pooling(KERNEL_ERROR_PARAMS __private const int out_height, __private const int pad_top, __private const int pad_left, - __private const int stride, - __private const int pooling_size, + __private const int stride_h, + __private const int stride_w, + __private const int pooling_size_h, + __private const int pooling_size_w, __write_only image2d_t output) { const int out_chan_idx = get_global_id(0); @@ -40,19 +43,19 @@ __kernel void pooling(KERNEL_ERROR_PARAMS const int out_width = global_size_dim1; const int batch_idx = mul24((out_hb_idx / out_height), in_height); - const int in_height_start = mul24((out_hb_idx % out_height), stride) - pad_top; - const int in_width_start = mul24(out_width_idx, stride) - pad_left; + const int in_height_start = mul24((out_hb_idx % out_height), stride_h) - pad_top; + const int in_width_start = mul24(out_width_idx, stride_w) - pad_left; const int in_channel_offset = mul24(out_chan_idx, in_width); #ifdef POOL_AVG DATA_TYPE4 res = 0; - for (int height = 0; height < pooling_size; ++height) { + for (int height = 0; height < pooling_size_h; ++height) { int in_height_idx = in_height_start + height; in_height_idx = select(batch_idx + in_height_idx, -1, (in_height_idx < 0 || in_height_idx >= in_height)); - for (int width = 0; width < pooling_size; ++width) { + for (int width = 0; width < pooling_size_w; ++width) { int in_width_idx = in_width_start + width; in_width_idx = select(in_channel_offset + in_width_idx, -1, @@ -62,19 +65,20 @@ __kernel void pooling(KERNEL_ERROR_PARAMS res = res + in; } } - const int block_size = calculate_avg_block_size(pooling_size, + const int block_size = calculate_avg_block_size(pooling_size_h, + pooling_size_w, in_height_start, in_width_start, in_height, in_width); res /= block_size; #else DATA_TYPE4 res = (DATA_TYPE4)(MIN_VALUE); - for (int height = 0; height < pooling_size; ++height) { + for (int height = 0; height < pooling_size_h; ++height) { int in_height_idx = in_height_start + height; in_height_idx = select(batch_idx + in_height_idx, -1, (in_height_idx < 0 || in_height_idx >= in_height)); if (in_height_idx != -1) { - for (int width = 0; width < pooling_size; ++width) { + for (int width = 0; width < pooling_size_w; ++width) { int in_width_idx = in_width_start + width; in_width_idx = select(in_channel_offset + in_width_idx, -1, diff --git a/mace/kernels/opencl/pooling.cc b/mace/kernels/opencl/pooling.cc index 7111317a8419d0fdf9b2518102495c66bc4d8bdb..18eb6e80f9595ac177db70d49f9ac81b822bcbfc 100644 --- a/mace/kernels/opencl/pooling.cc +++ b/mace/kernels/opencl/pooling.cc @@ -129,7 +129,9 @@ MaceStatus PoolingFunctor::operator()(const Tensor *input, kernel_.setArg(idx++, paddings[0] / 2); kernel_.setArg(idx++, paddings[1] / 2); kernel_.setArg(idx++, strides_[0]); + kernel_.setArg(idx++, strides_[1]); kernel_.setArg(idx++, kernels_[0]); + kernel_.setArg(idx++, kernels_[1]); kernel_.setArg(idx++, *(output->opencl_image())); input_shape_ = input->shape(); diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index f0ac2572decce5df78832ba273b2a8d71480b50a..72a4fdeef86077ff8633a98a14bca24642cfed0e 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -280,22 +280,28 @@ void MaxPooling3S2(const std::vector &input_shape, TEST_F(PoolingOpTest, OPENCLAlignedMaxPooling3S2) { MaxPooling3S2({3, 64, 32, 32}, {1, 1}, Padding::VALID); MaxPooling3S2({3, 64, 32, 32}, {2, 2}, Padding::VALID); + MaxPooling3S2({3, 64, 32, 32}, {1, 2}, Padding::VALID); MaxPooling3S2({3, 64, 32, 32}, {1, 1}, Padding::SAME); MaxPooling3S2({3, 64, 32, 32}, {2, 2}, Padding::SAME); + MaxPooling3S2({3, 64, 32, 32}, {2, 1}, Padding::SAME); } TEST_F(PoolingOpTest, OPENCLHalfAlignedMaxPooling3S2) { MaxPooling3S2({3, 64, 32, 32}, {1, 1}, Padding::VALID); MaxPooling3S2({3, 64, 32, 32}, {2, 2}, Padding::VALID); + MaxPooling3S2({3, 64, 32, 32}, {1, 2}, Padding::VALID); MaxPooling3S2({3, 64, 32, 32}, {1, 1}, Padding::SAME); MaxPooling3S2({3, 64, 32, 32}, {2, 2}, Padding::SAME); + MaxPooling3S2({3, 64, 32, 32}, {2, 1}, Padding::SAME); } TEST_F(PoolingOpTest, OPENCLUnalignedMaxPooling3S2) { MaxPooling3S2({3, 41, 43, 47}, {1, 1}, Padding::VALID); MaxPooling3S2({3, 41, 43, 47}, {2, 2}, Padding::VALID); + MaxPooling3S2({3, 41, 43, 47}, {1, 2}, Padding::VALID); MaxPooling3S2({3, 41, 43, 47}, {1, 1}, Padding::SAME); MaxPooling3S2({3, 41, 43, 47}, {2, 2}, Padding::SAME); + MaxPooling3S2({3, 41, 43, 47}, {2, 1}, Padding::SAME); } TEST_F(PoolingOpTest, AVG_VALID) { @@ -431,19 +437,27 @@ void AvgPoolingTest(const std::vector &shape, TEST_F(PoolingOpTest, OPENCLAlignedAvgPooling) { AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID); + AvgPoolingTest({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::VALID); AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME); + AvgPoolingTest({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::SAME); } TEST_F(PoolingOpTest, OPENCLHalfAlignedAvgPooling) { AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID); + AvgPoolingTest({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::VALID); AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME); + AvgPoolingTest({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::SAME); } TEST_F(PoolingOpTest, OPENCLAlignedLargeKernelAvgPooling) { AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::VALID); + AvgPoolingTest({3, 64, 64, 128}, {12, 16}, {12, 8}, + Padding::VALID); AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::SAME); + AvgPoolingTest({3, 64, 64, 128}, {8, 16}, {8, 16}, + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLHalfAlignedLargeKernelAvgPooling) { @@ -622,8 +636,10 @@ void TestQuant(const index_t batch, TEST_F(PoolingOpTest, Quant) { TestQuant(1, 7, 7, 1024, {7, 7}, {1, 1}, Padding::VALID, PoolingType::AVG); TestQuant(1, 3, 3, 2, {3, 3}, {1, 1}, Padding::SAME, PoolingType::AVG); + TestQuant(1, 3, 3, 2, {2, 3}, {1, 2}, Padding::SAME, PoolingType::AVG); TestQuant(1, 7, 7, 1024, {7, 7}, {1, 1}, Padding::VALID, PoolingType::MAX); TestQuant(1, 7, 7, 1024, {7, 7}, {1, 1}, Padding::SAME, PoolingType::MAX); + TestQuant(1, 7, 7, 1024, {6, 7}, {4, 5}, Padding::SAME, PoolingType::MAX); TestQuant(1, 7, 7, 2048, {7, 7}, {1, 1}, Padding::SAME, PoolingType::AVG); TestQuant(3, 15, 15, 128, {4, 4}, {4, 4}, Padding::VALID, PoolingType::AVG); TestQuant(3, 15, 15, 128, {4, 4}, {4, 4}, Padding::VALID, PoolingType::MAX);