提交 4c69d837 编写于 作者: Y yejianwu

support different height and width for pooling

上级 27321d33
......@@ -2,15 +2,16 @@
#define MIN_VALUE -FLT_MAX
inline int calculate_avg_block_size(const int pool_size,
inline int calculate_avg_block_size(const int pool_size_h,
const int pool_size_w,
const int pos_h,
const int pos_w,
const int h_size,
const int w_size) {
const int h_start = max(0, pos_h);
const int w_start = max(0, pos_w);
const int h_end = min(pos_h + pool_size, h_size);
const int w_end = min(pos_w + pool_size, w_size);
const int h_end = min(pos_h + pool_size_h, h_size);
const int w_end = min(pos_w + pool_size_w, w_size);
return mul24((h_end - h_start), (w_end - w_start));
}
......@@ -23,8 +24,10 @@ __kernel void pooling(KERNEL_ERROR_PARAMS
__private const int out_height,
__private const int pad_top,
__private const int pad_left,
__private const int stride,
__private const int pooling_size,
__private const int stride_h,
__private const int stride_w,
__private const int pooling_size_h,
__private const int pooling_size_w,
__write_only image2d_t output) {
const int out_chan_idx = get_global_id(0);
......@@ -40,19 +43,19 @@ __kernel void pooling(KERNEL_ERROR_PARAMS
const int out_width = global_size_dim1;
const int batch_idx = mul24((out_hb_idx / out_height), in_height);
const int in_height_start = mul24((out_hb_idx % out_height), stride) - pad_top;
const int in_width_start = mul24(out_width_idx, stride) - pad_left;
const int in_height_start = mul24((out_hb_idx % out_height), stride_h) - pad_top;
const int in_width_start = mul24(out_width_idx, stride_w) - pad_left;
const int in_channel_offset = mul24(out_chan_idx, in_width);
#ifdef POOL_AVG
DATA_TYPE4 res = 0;
for (int height = 0; height < pooling_size; ++height) {
for (int height = 0; height < pooling_size_h; ++height) {
int in_height_idx = in_height_start + height;
in_height_idx = select(batch_idx + in_height_idx,
-1,
(in_height_idx < 0 || in_height_idx >= in_height));
for (int width = 0; width < pooling_size; ++width) {
for (int width = 0; width < pooling_size_w; ++width) {
int in_width_idx = in_width_start + width;
in_width_idx = select(in_channel_offset + in_width_idx,
-1,
......@@ -62,19 +65,20 @@ __kernel void pooling(KERNEL_ERROR_PARAMS
res = res + in;
}
}
const int block_size = calculate_avg_block_size(pooling_size,
const int block_size = calculate_avg_block_size(pooling_size_h,
pooling_size_w,
in_height_start, in_width_start,
in_height, in_width);
res /= block_size;
#else
DATA_TYPE4 res = (DATA_TYPE4)(MIN_VALUE);
for (int height = 0; height < pooling_size; ++height) {
for (int height = 0; height < pooling_size_h; ++height) {
int in_height_idx = in_height_start + height;
in_height_idx = select(batch_idx + in_height_idx,
-1,
(in_height_idx < 0 || in_height_idx >= in_height));
if (in_height_idx != -1) {
for (int width = 0; width < pooling_size; ++width) {
for (int width = 0; width < pooling_size_w; ++width) {
int in_width_idx = in_width_start + width;
in_width_idx = select(in_channel_offset + in_width_idx,
-1,
......
......@@ -129,7 +129,9 @@ MaceStatus PoolingFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
kernel_.setArg(idx++, paddings[0] / 2);
kernel_.setArg(idx++, paddings[1] / 2);
kernel_.setArg(idx++, strides_[0]);
kernel_.setArg(idx++, strides_[1]);
kernel_.setArg(idx++, kernels_[0]);
kernel_.setArg(idx++, kernels_[1]);
kernel_.setArg(idx++, *(output->opencl_image()));
input_shape_ = input->shape();
......
......@@ -280,22 +280,28 @@ void MaxPooling3S2(const std::vector<index_t> &input_shape,
TEST_F(PoolingOpTest, OPENCLAlignedMaxPooling3S2) {
MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {1, 1}, Padding::VALID);
MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {2, 2}, Padding::VALID);
MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {1, 2}, Padding::VALID);
MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {1, 1}, Padding::SAME);
MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {2, 2}, Padding::SAME);
MaxPooling3S2<GPU, float>({3, 64, 32, 32}, {2, 1}, Padding::SAME);
}
TEST_F(PoolingOpTest, OPENCLHalfAlignedMaxPooling3S2) {
MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {1, 1}, Padding::VALID);
MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {2, 2}, Padding::VALID);
MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {1, 2}, Padding::VALID);
MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {1, 1}, Padding::SAME);
MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {2, 2}, Padding::SAME);
MaxPooling3S2<GPU, half>({3, 64, 32, 32}, {2, 1}, Padding::SAME);
}
TEST_F(PoolingOpTest, OPENCLUnalignedMaxPooling3S2) {
MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {1, 1}, Padding::VALID);
MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {2, 2}, Padding::VALID);
MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {1, 2}, Padding::VALID);
MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {1, 1}, Padding::SAME);
MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {2, 2}, Padding::SAME);
MaxPooling3S2<GPU, half>({3, 41, 43, 47}, {2, 1}, Padding::SAME);
}
TEST_F(PoolingOpTest, AVG_VALID) {
......@@ -431,19 +437,27 @@ void AvgPoolingTest(const std::vector<index_t> &shape,
TEST_F(PoolingOpTest, OPENCLAlignedAvgPooling) {
AvgPoolingTest<GPU, float>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID);
AvgPoolingTest<GPU, float>({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::VALID);
AvgPoolingTest<GPU, float>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME);
AvgPoolingTest<GPU, float>({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::SAME);
}
TEST_F(PoolingOpTest, OPENCLHalfAlignedAvgPooling) {
AvgPoolingTest<GPU, half>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID);
AvgPoolingTest<GPU, half>({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::VALID);
AvgPoolingTest<GPU, half>({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME);
AvgPoolingTest<GPU, half>({3, 15, 15, 128}, {3, 4}, {1, 2}, Padding::SAME);
}
TEST_F(PoolingOpTest, OPENCLAlignedLargeKernelAvgPooling) {
AvgPoolingTest<GPU, float>({3, 64, 64, 128}, {16, 16}, {16, 16},
Padding::VALID);
AvgPoolingTest<GPU, float>({3, 64, 64, 128}, {12, 16}, {12, 8},
Padding::VALID);
AvgPoolingTest<GPU, float>({3, 64, 64, 128}, {16, 16}, {16, 16},
Padding::SAME);
AvgPoolingTest<GPU, float>({3, 64, 64, 128}, {8, 16}, {8, 16},
Padding::SAME);
}
TEST_F(PoolingOpTest, OPENCLHalfAlignedLargeKernelAvgPooling) {
......@@ -622,8 +636,10 @@ void TestQuant(const index_t batch,
TEST_F(PoolingOpTest, Quant) {
TestQuant(1, 7, 7, 1024, {7, 7}, {1, 1}, Padding::VALID, PoolingType::AVG);
TestQuant(1, 3, 3, 2, {3, 3}, {1, 1}, Padding::SAME, PoolingType::AVG);
TestQuant(1, 3, 3, 2, {2, 3}, {1, 2}, Padding::SAME, PoolingType::AVG);
TestQuant(1, 7, 7, 1024, {7, 7}, {1, 1}, Padding::VALID, PoolingType::MAX);
TestQuant(1, 7, 7, 1024, {7, 7}, {1, 1}, Padding::SAME, PoolingType::MAX);
TestQuant(1, 7, 7, 1024, {6, 7}, {4, 5}, Padding::SAME, PoolingType::MAX);
TestQuant(1, 7, 7, 2048, {7, 7}, {1, 1}, Padding::SAME, PoolingType::AVG);
TestQuant(3, 15, 15, 128, {4, 4}, {4, 4}, Padding::VALID, PoolingType::AVG);
TestQuant(3, 15, 15, 128, {4, 4}, {4, 4}, Padding::VALID, PoolingType::MAX);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册