diff --git a/src/operators/kernel/central-arm-func/pool_arm_func.h b/src/operators/kernel/central-arm-func/pool_arm_func.h index b8086b4ecbc2592c3789a7d176eefb02bb02ada5..82c24d0ab4ca93ae23218eb7441f37ef2f68efee 100644 --- a/src/operators/kernel/central-arm-func/pool_arm_func.h +++ b/src/operators/kernel/central-arm-func/pool_arm_func.h @@ -32,6 +32,7 @@ void PoolCompute(const PoolParam ¶m) { std::vector ksize = param.Ksize(); std::vector strides = param.Strides(); std::vector paddings = param.Paddings(); + const bool exclusive = param.isExclusive(); if (param.isGlobalPooling()) { for (size_t i = 0; i < ksize.size(); ++i) { paddings[i] = 0; @@ -41,17 +42,17 @@ void PoolCompute(const PoolParam ¶m) { if (ksize[0] == 3 && ksize[0] == ksize[1]) { if (pooling_type == "max" && strides[0] == strides[1]) { if (strides[0] == 1) { - math::Pooling3x3()(*input, paddings, output); + math::Pooling3x3()(*input, paddings, exclusive, output); } else if (strides[0] == 2) { - math::Pooling3x3()(*input, paddings, output); + math::Pooling3x3()(*input, paddings, exclusive, output); } else { math::Pooling()(*input, ksize, strides, paddings, output); } } else if (pooling_type == "avg" && strides[0] == strides[1]) { if (strides[0] == 1) { - math::Pooling3x3()(*input, paddings, output); + math::Pooling3x3()(*input, paddings, exclusive, output); } else if (strides[0] == 2) { - math::Pooling3x3()(*input, paddings, output); + math::Pooling3x3()(*input, paddings, exclusive, output); } else { math::Pooling()(*input, ksize, strides, paddings, output); } diff --git a/src/operators/math/pooling.h b/src/operators/math/pooling.h index 909d289767ca1835d397370b9e075762ccc61d94..70280ad0a0ff24d5df58349b3bf98104ffb6d2ea 100644 --- a/src/operators/math/pooling.h +++ b/src/operators/math/pooling.h @@ -41,6 +41,9 @@ struct PoolingVal { return *this; } inline float Value() { return (count > 0) ? val : 0.f; } + inline float ExclusiveSum(int total) { + return ((count > 0) ? val : 0.f) * total; + } }; template <> @@ -54,6 +57,7 @@ struct PoolingVal { return *this; } inline float Value() { return (count > 0) ? val * (1.f / count) : 0.f; } + inline float ExclusiveSum(int total) { return (count > 0) ? val : 0.f; } }; #if defined(__ARM_NEON) || defined(__ARM_NEON__) @@ -172,7 +176,8 @@ struct Pooling2x2 { template struct Pooling3x3 { void operator()(const framework::Tensor &input, - const std::vector &paddings, framework::Tensor *output); + const std::vector &paddings, const bool exclusive, + framework::Tensor *output); }; template diff --git a/src/operators/math/pooling3x3.cpp b/src/operators/math/pooling3x3.cpp index e67404469334aec33d66fe1c0bc51aadbb0ffe93..3303dabb8d778d721e638b63bb6c141cf7446b6e 100644 --- a/src/operators/math/pooling3x3.cpp +++ b/src/operators/math/pooling3x3.cpp @@ -23,19 +23,19 @@ namespace paddle_mobile { namespace operators { namespace math { -#define POOLING3X3_NORMAL_BORDER(start, end) \ - for (int w = start; w < end; ++w) { \ - const int w_in_start = -padding_w + w * Stride; \ - const int w_in_end = w_in_start + 3; \ - const int w_start = w_in_start > 0 ? w_in_start : 0; \ - const int w_end = w_in_end < input_w ? w_in_end : input_w; \ - PoolingVal

val; \ - for (int h_in = h_start; h_in < h_end; ++h_in) { \ - for (int w_in = w_start; w_in < w_end; ++w_in) { \ - val += input[h_in * input_w + w_in]; \ - } \ - } \ - output_ptr[w] = val.Value(); \ +#define POOLING3X3_NORMAL_BORDER(start, end, exclusive) \ + for (int w = start; w < end; ++w) { \ + const int w_in_start = -padding_w + w * Stride; \ + const int w_in_end = w_in_start + 3; \ + const int w_start = w_in_start > 0 ? w_in_start : 0; \ + const int w_end = w_in_end < input_w ? w_in_end : input_w; \ + PoolingVal

val; \ + for (int h_in = h_start; h_in < h_end; ++h_in) { \ + for (int w_in = w_start; w_in < w_end; ++w_in) { \ + val += input[h_in * input_w + w_in]; \ + } \ + } \ + output_ptr[w] = exclusive ? val.Value() : val.ExclusiveSum(9) / 9.f; \ } template @@ -80,7 +80,8 @@ template inline void Pooling3x3NormalRow(const float *input, const int h_output, const int input_h, const int input_w, const int padding_h, const int padding_w, - const int output_w, float *output) { + const int output_w, const bool exclusive, + float *output) { const int h_in_start = -padding_h + h_output * Stride; const int h_in_end = h_in_start + 3; const int h_start = h_in_start > 0 ? h_in_start : 0; @@ -97,13 +98,14 @@ inline void Pooling3x3NormalRow(const float *input, const int h_output, const int valid_w = valid_w_end - valid_w_start; // border left - POOLING3X3_NORMAL_BORDER(0, valid_w_start) + POOLING3X3_NORMAL_BORDER(0, valid_w_start, exclusive) // middle int output_tiles = (valid_w_end - valid_w_start) / 6; int output_tiles_w = output_tiles * 6; Pooling3x3NormalRowLoadInput PoolingCompute; float32x4x2_t x0, x1, x2, y0; - float32x4_t post = vdupq_n_f32(1.f / (3 * (h_end - h_start))); + float32x4_t post = exclusive ? vdupq_n_f32(1.f / (3 * (h_end - h_start))) + : vdupq_n_f32(1.f / 9); for (int w = 0; w < output_tiles_w; w += 6) { int output_offset = valid_w_start + w; int input_w_offset = output_offset * Stride - padding_w; @@ -150,13 +152,13 @@ inline void Pooling3x3NormalRow(const float *input, const int h_output, } } // border right - POOLING3X3_NORMAL_BORDER(valid_w_end, output_w) + POOLING3X3_NORMAL_BORDER(valid_w_end, output_w, exclusive) } template struct Pooling3x3 { inline void operator()(const framework::Tensor &input, - const std::vector &paddings, + const std::vector &paddings, const bool exclusive, framework::Tensor *output) { const float *input_data = input.data(); float *output_data = output->mutable_data(); @@ -184,7 +186,7 @@ struct Pooling3x3 { // top for (int h = 0; h < valid_h_start; ++h) { Pooling3x3NormalRow(input_ptr, h, input_h, input_w, padding_h, - padding_w, output_w, output_ptr); + padding_w, output_w, exclusive, output_ptr); } // valid int output_w_tiles = valid_w / 6; @@ -218,7 +220,8 @@ struct Pooling3x3 { output_ptr2[w] = 0.f; output_ptr3[w] = 0.f; } else { - post = vdup_n_f32(1.f / (3 * (3 - padding))); + post = exclusive ? vdup_n_f32(1.f / (3 * (3 - padding))) + : vdup_n_f32(1.f / 9); acc12 = vPoolPre_f32

(row1, row2); acc34 = vPoolPre_f32

(row3, row4); acc0 = vPoolPre_f32

(row0, acc12); @@ -526,7 +529,8 @@ struct Pooling3x3 { *output_ptr2 = 0.f; *output_ptr3 = 0.f; } else { - post = vdup_n_f32(1.f / (3 * (3 - padding))); + post = exclusive ? vdup_n_f32(1.f / (3 * (3 - padding))) + : vdup_n_f32(1.f / 9); acc12 = vPoolPre_f32

(row1, row2); acc34 = vPoolPre_f32

(row3, row4); acc0 = vPoolPre_f32

(row0, acc12); @@ -578,7 +582,8 @@ struct Pooling3x3 { if (padding >= 3) { output_ptr0[w] = 0.f; } else { - post = vdup_n_f32(1.f / (3 * (3 - padding))); + post = exclusive ? vdup_n_f32(1.f / (3 * (3 - padding))) + : vdup_n_f32(1.f / 9); acc0 = vPoolPre_f32

(row0, row1); acc0 = vPoolPre_f32

(acc0, row2); acc0 = vpPoolPre_f32

(acc0, acc0); @@ -718,7 +723,8 @@ struct Pooling3x3 { if (padding >= 3) { *output_ptr0 = 0.f; } else { - post = vdup_n_f32(1.f / (3 * (3 - padding))); + post = exclusive ? vdup_n_f32(1.f / (3 * (3 - padding))) + : vdup_n_f32(1.f / 9); acc0 = vPoolPre_f32

(row0, row1); acc0 = vPoolPre_f32

(acc0, row2); acc0 = vpPoolPre_f32

(acc0, acc0); @@ -735,7 +741,7 @@ struct Pooling3x3 { // pad bottom for (int h = valid_h_end; h < output_h; ++h) { Pooling3x3NormalRow(input_ptr, h, input_h, input_w, padding_h, - padding_w, output_w, output_ptr); + padding_w, output_w, exclusive, output_ptr); } } } @@ -745,7 +751,7 @@ struct Pooling3x3 { template struct Pooling3x3 { inline void operator()(const framework::Tensor &input, - const std::vector &paddings, + const std::vector &paddings, const bool exclusive, framework::Tensor *output) { const float *input_data = input.data(); float *output_data = output->mutable_data(); @@ -784,7 +790,7 @@ struct Pooling3x3 { // top for (int h = 0; h < valid_h_start; ++h) { Pooling3x3NormalRow(input_ptr, h, input_h, input_w, padding_h, - padding_w, output_w, output_ptr); + padding_w, output_w, exclusive, output_ptr); } // valid int output_w_tiles = valid_w / 6; @@ -818,7 +824,8 @@ struct Pooling3x3 { output_ptr1[w] = 0.f; output_ptr2[w] = 0.f; } else { - post = vdup_n_f32(1.f / (3 * (3 - padding))); + post = exclusive ? vdup_n_f32(1.f / (3 * (3 - padding))) + : vdup_n_f32(1.f / 9); acc0 = vPoolPre_f32

(row0, row1); acc1 = vPoolPre_f32

(row2, row3); acc2 = vPoolPre_f32

(row4, row5); @@ -1097,7 +1104,8 @@ struct Pooling3x3 { *output_ptr1 = 0.f; *output_ptr2 = 0.f; } else { - post = vdup_n_f32(1.f / (3 * (3 - padding))); + post = exclusive ? vdup_n_f32(1.f / (3 * (3 - padding))) + : vdup_n_f32(1.f / 9); acc0 = vPoolPre_f32

(row0, row1); acc1 = vPoolPre_f32

(row2, row3); acc2 = vPoolPre_f32

(row4, row5); @@ -1141,7 +1149,8 @@ struct Pooling3x3 { if (padding >= 3) { output_ptr0[w] = 0.f; } else { - post = vdup_n_f32(1.f / (3 * (3 - padding))); + post = exclusive ? vdup_n_f32(1.f / (3 * (3 - padding))) + : vdup_n_f32(1.f / 9); acc0 = vPoolPre_f32

(row0, row1); acc0 = vPoolPre_f32

(acc0, row2); if (padding == 1) { @@ -1271,7 +1280,8 @@ struct Pooling3x3 { if (padding >= 3) { *output_ptr0 = 0.f; } else { - post = vdup_n_f32(1.f / (3 * (3 - padding))); + post = exclusive ? vdup_n_f32(1.f / (3 * (3 - padding))) + : vdup_n_f32(1.f / 9); acc0 = vPoolPre_f32

(row0, row1); acc0 = vPoolPre_f32

(acc0, row2); if (padding == 1) { @@ -1287,7 +1297,7 @@ struct Pooling3x3 { // bottom for (int h = valid_h_end; h < output_h; ++h) { Pooling3x3NormalRow(input_ptr, h, input_h, input_w, padding_h, - padding_w, output_w, output_ptr); + padding_w, output_w, exclusive, output_ptr); } } } diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 20b9ce4ddbb90944b0a2d0a5aed1d3c4ef759772..751c7b87ff54a111e6a761b2cf38260976891b8a 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -909,6 +909,7 @@ class PoolParam : public OpParam { paddings_ = GetAttr>("paddings", attrs); ceil_mode_ = GetAttr("ceil_mode", attrs); global_pooling_ = GetAttr("global_pooling", attrs); + exclusive_ = GetAttr("exclusive", attrs); } const GType *Input() const { return input_; } @@ -927,6 +928,8 @@ class PoolParam : public OpParam { bool isGlobalPooling() const { return global_pooling_; } + bool isExclusive() const { return exclusive_; } + private: GType *input_; GType *output_; @@ -936,6 +939,7 @@ class PoolParam : public OpParam { vector paddings_; bool ceil_mode_; bool global_pooling_ = false; + bool exclusive_ = true; #ifdef PADDLE_MOBILE_FPGA private: diff --git a/test/operators/test_pool_op.cpp b/test/operators/test_pool_op.cpp index 7c7f54c9d0db2725629d32977e4be5b273ab61bc..4b9bc3ad523b5086681ac837e7734468b90eb927 100644 --- a/test/operators/test_pool_op.cpp +++ b/test/operators/test_pool_op.cpp @@ -62,6 +62,7 @@ int TestPoolOp(int in_channels, int in_height, int in_width) { attrs["ceil_mode"].Set(true); // attrs["ceil_mode"].Set(false); attrs["global_pooling"].Set(false); + attrs["exclusive"].Set(true); auto *op = new operators::PoolOp("pool2d", inputs, outputs, attrs, scope.get());