From 4bdb6171797c86b58783900a997a2f0454c1eaa5 Mon Sep 17 00:00:00 2001 From: HappyAngel Date: Fri, 22 Nov 2019 13:56:31 +0800 Subject: [PATCH] update pooling 2-padding to 4-padding (#2410) * fix pooling bug and speed * fix build error * delete VLOGin pool, test=develop * add openmp, test=develop * fix lite/kernels/arm/pool_compute_test basic_pooling compute error bug, test=develop * update pooling 2-pad to 4-pad, test=develop * fix 2-pad to 4-pad in operators/pool_op.h, AttachKernel will set param, so 2-pad to 4-pad funcs should put in AttachKernel. test=ddevellop * put 2-pad to 4-pad in AttachImpl, test=develop * according to reviews, fix some format error. test=develop * fix format errorr, add (). test=develop * change paddings type to support dynamically modify, test=develop * update padding type int other devices, test=develop * fix x8d build error on shared_ptr, test=ddevelop * fix formmat in operators pool_op.cc, test=develop --- lite/backends/arm/math/pooling.cc | 18 ++- lite/backends/fpga/KD/pes/pooling_pe.hpp | 10 +- lite/backends/x86/math/pooling.cc | 8 +- lite/kernels/arm/pool_compute.cc | 22 +-- lite/kernels/arm/pool_compute_test.cc | 176 +++++++++++++---------- lite/kernels/cuda/pool_compute.cu | 5 +- lite/kernels/cuda/pool_compute_test.cc | 30 ++-- lite/kernels/npu/bridges/pool_op.cc | 7 +- lite/kernels/npu/bridges/pool_op_test.cc | 5 +- lite/kernels/opencl/pool_compute.cc | 14 +- lite/kernels/opencl/pool_compute_test.cc | 4 +- lite/kernels/x86/pool_compute.h | 5 +- lite/kernels/x86/pool_compute_test.cc | 3 +- lite/kernels/xpu/bridges/pool_op_test.cc | 5 +- lite/operators/op_params.h | 8 +- lite/operators/pool_op.cc | 71 +++++++-- lite/operators/pool_op.h | 22 ++- lite/tests/math/pool_compute_test.cc | 90 +++++++----- 18 files changed, 329 insertions(+), 174 deletions(-) diff --git a/lite/backends/arm/math/pooling.cc b/lite/backends/arm/math/pooling.cc index a857e9830c..8524d7376f 100644 --- a/lite/backends/arm/math/pooling.cc +++ b/lite/backends/arm/math/pooling.cc @@ -46,7 +46,7 @@ void pooling_basic(const float* din, int stride_h = strides[0]; int stride_w = strides[1]; int pad_h = paddings[0]; - int pad_w = paddings[1]; + int pad_w = paddings[2]; int size_channel_in = win * hin; int size_channel_out = wout * hout; if (global_pooling) { @@ -125,18 +125,22 @@ void pooling_basic(const float* din, int bh = kernel_h; int bw = kernel_w; if (ew == win) { - bw = sw + kernel_w >= win + pad_w ? win + pad_w - : sw + kernel_w; + bw = (sw + kernel_w) >= (win + paddings[3]) + ? (win + paddings[3]) + : (sw + kernel_w); bw -= sw; - if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) { + if ((sw - pad_w) < 0 && + (sw + kernel_w) > (win + paddings[3])) { bw += pad_w; } } if (eh == hin) { - bh = sh + kernel_h >= hin + pad_h ? hin + pad_h - : sh + kernel_h; + bh = (sh + kernel_h) >= (hin + paddings[1]) + ? (hin + paddings[1]) + : (sh + kernel_h); bh -= sh; - if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) { + if ((sh - pad_h) < 0 && + (sh + kernel_h) > (hin + paddings[1])) { bh += pad_h; } } diff --git a/lite/backends/fpga/KD/pes/pooling_pe.hpp b/lite/backends/fpga/KD/pes/pooling_pe.hpp index fd3be1f463..5bb4f5285a 100644 --- a/lite/backends/fpga/KD/pes/pooling_pe.hpp +++ b/lite/backends/fpga/KD/pes/pooling_pe.hpp @@ -45,13 +45,14 @@ class PoolingPE : public PE { PoolingArgs args = {0}; args.mode = param_.type; + auto paddings = *param_.paddings; args.kernel_reciprocal = fp32_2_fp16(1.0f / (k_width * k_height)); args.image.address = input->data(); args.image.channels = input->shape().channel(); args.image.height = input->shape().height(); args.image.width = input->shape().width(); - args.image.pad_height = param_.paddings[0]; - args.image.pad_width = param_.paddings[1]; + args.image.pad_height = paddings[0]; + args.image.pad_width = paddings[2]; args.image.scale_address = input->scale(); args.output.address = output->mutableData(); args.output.scale_address = output->scale(); @@ -76,12 +77,13 @@ class PoolingPE : public PE { float* image_addr = float_input.mutableData(FP32, input->shape()); float_input.copyFrom(input); float16* data_out = output->data(); + auto paddings = *param_.paddings; int image_height = input->shape().height(); int image_width = input->shape().width(); int image_channels = input->shape().channel(); - int image_pad_h = param_.paddings[0]; - int image_pad_w = param_.paddings[1]; + int image_pad_h = paddings[0]; + int image_pad_w = paddings[2]; int kernel_height = param_.kernelSize[1]; int kernel_width = param_.kernelSize[0]; int kernel_step_h = param_.strides[0]; diff --git a/lite/backends/x86/math/pooling.cc b/lite/backends/x86/math/pooling.cc index 9da239f9c6..ab6c1edb48 100644 --- a/lite/backends/x86/math/pooling.cc +++ b/lite/backends/x86/math/pooling.cc @@ -49,7 +49,7 @@ class Pool2dFunctor { const int stride_height = strides[0]; const int stride_width = strides[1]; const int padding_height = paddings[0]; - const int padding_width = paddings[1]; + const int padding_width = paddings[2]; const int input_stride = input_height * input_width; const int output_stride = output_height * output_width; @@ -130,7 +130,7 @@ class Pool2dGradFunctor { const int stride_height = strides[0]; const int stride_width = strides[1]; const int padding_height = paddings[0]; - const int padding_width = paddings[1]; + const int padding_width = paddings[2]; const int input_stride = input_height * input_width; const int output_stride = output_height * output_width; @@ -213,7 +213,7 @@ class MaxPool2dGradFunctor { const int stride_height = strides[0]; const int stride_width = strides[1]; const int padding_height = paddings[0]; - const int padding_width = paddings[1]; + const int padding_width = paddings[2]; const int input_stride = input_height * input_width; const int output_stride = output_height * output_width; @@ -629,7 +629,7 @@ class MaxPool2dWithIndexFunctor { const int stride_height = strides[0]; const int stride_width = strides[1]; const int padding_height = paddings[0]; - const int padding_width = paddings[1]; + const int padding_width = paddings[2]; const int input_stride = input_height * input_width; const int output_stride = output_height * output_width; diff --git a/lite/kernels/arm/pool_compute.cc b/lite/kernels/arm/pool_compute.cc index 9f02a462a5..c9f0fed478 100644 --- a/lite/kernels/arm/pool_compute.cc +++ b/lite/kernels/arm/pool_compute.cc @@ -38,7 +38,7 @@ void PoolCompute::Run() { std::vector& ksize = param.ksize; std::vector& strides = param.strides; - std::vector& paddings = param.paddings; + std::vector& paddings = *param.paddings; std::string& pooling_type = param.pooling_type; bool global_pooling = param.global_pooling; @@ -48,12 +48,15 @@ void PoolCompute::Run() { bool use_quantizer = param.use_quantizer; std::string& data_format = param.data_format; - bool kps_equal = (ksize[0] == ksize[1]) && (strides[0] == strides[1]) && - (paddings[0] == paddings[1]); + bool pads_equal = + (paddings[0] == paddings[1]) && (paddings[2] == paddings[3]); + bool kps_equal = (ksize[0] == ksize[1]) && (strides[0] == strides[1]) && + (paddings[0] == paddings[2]); if (global_pooling) { for (size_t i = 0; i < ksize.size(); ++i) { - paddings[i] = 0; + paddings[2 * i] = 0; + paddings[2 * i + 1] = 0; ksize[i] = static_cast(in_dims[i + 2]); } if (pooling_type == "max") { @@ -80,7 +83,8 @@ void PoolCompute::Run() { return; } } else { - if (ksize[0] == 2 && strides[0] == 2 && paddings[0] == 0 && kps_equal) { + if (ksize[0] == 2 && strides[0] == 2 && paddings[0] == 0 && pads_equal && + kps_equal) { if (pooling_type == "max") { lite::arm::math::pooling2x2s2_max(din, dout, @@ -106,7 +110,7 @@ void PoolCompute::Run() { return; } } else if (ksize[0] == 3 && strides[0] == 1 && paddings[0] == 1 && - kps_equal) { + pads_equal && kps_equal) { if (pooling_type == "max") { lite::arm::math::pooling3x3s1p1_max(din, dout, @@ -132,7 +136,7 @@ void PoolCompute::Run() { return; } } else if (ksize[0] == 3 && strides[0] == 1 && paddings[0] == 0 && - kps_equal) { + pads_equal && kps_equal) { if (pooling_type == "max") { lite::arm::math::pooling3x3s1p0_max(din, dout, @@ -158,7 +162,7 @@ void PoolCompute::Run() { return; } } else if (ksize[0] == 3 && strides[0] == 2 && paddings[0] == 0 && - kps_equal) { + pads_equal && kps_equal) { if (pooling_type == "max") { lite::arm::math::pooling3x3s2p0_max(din, dout, @@ -184,7 +188,7 @@ void PoolCompute::Run() { return; } } else if (ksize[0] == 3 && strides[0] == 2 && paddings[0] == 1 && - kps_equal) { + pads_equal && kps_equal) { if (pooling_type == "max") { lite::arm::math::pooling3x3s2p1_max(din, dout, diff --git a/lite/kernels/arm/pool_compute_test.cc b/lite/kernels/arm/pool_compute_test.cc index 79e5332172..7ed8a142dd 100644 --- a/lite/kernels/arm/pool_compute_test.cc +++ b/lite/kernels/arm/pool_compute_test.cc @@ -15,6 +15,7 @@ #include "lite/kernels/arm/pool_compute.h" #include #include +#include #include #include #include "lite/backends/arm/math/funcs.h" @@ -25,14 +26,21 @@ namespace lite { namespace kernels { namespace arm { -int PoolOutputSize( - int input_size, int filter_size, int padding, int stride, bool ceil_mode) { +int PoolOutputSize(int input_size, + int filter_size, + int pad_left, + int pad_right, + int stride, + bool ceil_mode) { int output_size; if (!ceil_mode) { - output_size = (input_size - filter_size + 2 * padding) / stride + 1; + output_size = + (input_size - filter_size + pad_left + pad_right) / stride + 1; } else { output_size = - (input_size - filter_size + 2 * padding + stride - 1) / stride + 1; + (input_size - filter_size + pad_left + pad_right + stride - 1) / + stride + + 1; } return output_size; } @@ -40,10 +48,12 @@ int PoolOutputSize( std::vector compute_output_shape(operators::PoolParam* param_) { const auto x_dims = param_->x->dims(); std::vector& ksize = param_->ksize; + auto paddings = *param_->paddings; if (param_->global_pooling) { ksize.resize(static_cast(x_dims.size()) - 2); for (size_t i = 0; i < ksize.size(); ++i) { - param_->paddings[i] = 0; + paddings[2 * i] = 0; + paddings[2 * i + 1] = 0; ksize[i] = static_cast(x_dims[i + 2]); } } @@ -56,7 +66,8 @@ std::vector compute_output_shape(operators::PoolParam* param_) { for (size_t i = 0; i < param_->ksize.size(); ++i) { output_shape.push_back(PoolOutputSize(x_dims[i + 2], param_->ksize[i], - param_->paddings[i], + paddings[2 * i], + paddings[2 * i + 1], param_->strides[i], param_->ceil_mode)); } @@ -73,7 +84,7 @@ void pool_compute_ref(const operators::PoolParam& param) { std::vector ksize = param.ksize; std::vector strides = param.strides; - std::vector paddings = param.paddings; + std::vector paddings = *param.paddings; std::string pooling_type = param.pooling_type; bool global_pooling = param.global_pooling; @@ -99,7 +110,7 @@ void pool_compute_ref(const operators::PoolParam& param) { int stride_h = strides[0]; int stride_w = strides[1]; int pad_h = paddings[0]; - int pad_w = paddings[1]; + int pad_w = paddings[2]; int size_channel_in = win * hin; int size_channel_out = wout * hout; if (global_pooling) { @@ -178,18 +189,22 @@ void pool_compute_ref(const operators::PoolParam& param) { int bh = kernel_h; int bw = kernel_w; if (ew == win) { - bw = sw + kernel_w >= win + pad_w ? win + pad_w - : sw + kernel_w; + bw = (sw + kernel_w) >= (win + paddings[3]) + ? (win + paddings[3]) + : (sw + kernel_w); bw -= sw; - if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) { + if ((sw - pad_w) < 0 && + (sw + kernel_w) > (win + paddings[3])) { bw += pad_w; } } if (eh == hin) { - bh = sh + kernel_h >= hin + pad_h ? hin + pad_h - : sh + kernel_h; + bh = (sh + kernel_h) >= (hin + paddings[1]) + ? (hin + paddings[1]) + : (sh + kernel_h); bh -= sh; - if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) { + if ((sh - pad_h) < 0 && + (sh + kernel_h) > (hin + paddings[1])) { bh += pad_h; } } @@ -225,75 +240,92 @@ TEST(pool_arm, compute) { for (auto exclusive : {true, false}) { for (auto ksize : {2, 3}) { for (auto stride : {1, 2}) { - for (auto pad : {0, 1}) { - for (auto n : {1, 2}) { - for (auto c : {1, 3}) { + for (auto pad_left : {0, 1}) { + for (auto pad_right : {0, 1}) { + for (auto pad_top : {0, 1}) { + for (auto pad_bottom : {0, 1}) { + for (auto n : {1, 2}) { + for (auto c : {1, 3}) { #if 1 - for (auto h : {2, 3, 4, 11}) { - for (auto w : {2, 3, 4, 11}) { + for (auto h : {2, 3, 4, 11}) { + for (auto w : {2, 3, 4, 11}) { #else - for (int h = 2; h < 25; h++) { - for (int w = 2; w < 25; w++) { + for (int h = 2; h < 25; h++) { + for (int w = 2; w < 25; w++) { #endif - VLOG(3) << "n:" << n << " c:" << c << " h:" << h - << " w:" << w << " ksize:" << ksize - << " stride:" << stride << " pad:" << pad - << " exclusive:" << exclusive - << " global_pooling:" << global_pooling - << " ceil_mode: " << ceil_mode - << " pooling_type:" << pooling_type; + VLOG(3) << "n:" << n << " c:" << c << " h:" << h + << " w:" << w << " ksize:" << ksize + << " stride:" << stride + << " pad_left:" << pad_left + << " pad_right:" << pad_right + << " pad_top:" << pad_top + << " pad_bottom:" << pad_bottom + << " exclusive:" << exclusive + << " global_pooling:" << global_pooling + << " ceil_mode: " << ceil_mode + << " pooling_type:" << pooling_type; - // init x, output - x.Resize(DDim(std::vector({n, c, h, w}))); - auto* x_data = x.mutable_data(); - for (int i = 0; i < x.dims().production(); ++i) { - float sign = i % 3 == 0 ? -0.03 : 0.05f; - x_data[i] = sign * (i % 128); - } + // init x, output + x.Resize( + DDim(std::vector({n, c, h, w}))); + auto* x_data = x.mutable_data(); + for (int i = 0; i < x.dims().production(); ++i) { + float sign = i % 3 == 0 ? -0.03 : 0.05f; + x_data[i] = sign * (i % 128); + } - // fill param - param.x = &x; - param.output = &output; - param.pooling_type = pooling_type; - if (global_pooling) { - param.ksize = {h, w}; - } else { - param.ksize = {ksize, ksize}; - } - param.global_pooling = global_pooling; - param.strides = {stride, stride}; - param.paddings = {pad, pad}; - param.exclusive = exclusive; - param.ceil_mode = ceil_mode; - param.adaptive = false; - param.use_quantizer = false; + // fill param + param.x = &x; + param.output = &output; + param.pooling_type = pooling_type; + if (global_pooling) { + param.ksize = {h, w}; + } else { + param.ksize = {ksize, ksize}; + } + param.global_pooling = global_pooling; + param.strides = {stride, stride}; + std::vector paddings = { + pad_top, pad_bottom, pad_left, pad_right}; + param.exclusive = exclusive; + param.paddings = + std::make_shared>(paddings); + param.ceil_mode = ceil_mode; + param.adaptive = false; + param.use_quantizer = false; - const std::vector& output_shape = - compute_output_shape(¶m); - output.Resize(DDim(output_shape)); - output_ref.Resize(DDim(output_shape)); + const std::vector& output_shape = + compute_output_shape(¶m); + output.Resize(DDim(output_shape)); + output_ref.Resize(DDim(output_shape)); - auto* output_data = output.mutable_data(); - auto* output_ref_data = - output_ref.mutable_data(); - for (int i = 0; i < output.dims().production(); ++i) { - output_data[i] = -2; - output_ref_data[i] = -2; - } + auto* output_data = output.mutable_data(); + auto* output_ref_data = + output_ref.mutable_data(); + for (int i = 0; i < output.dims().production(); + ++i) { + output_data[i] = -2; + output_ref_data[i] = -2; + } - // compute - pool.SetParam(param); - pool.Run(); + // compute + pool.SetParam(param); + pool.Run(); - // compute ref - param.output = &output_ref; - pool_compute_ref(param); + // compute ref + param.output = &output_ref; + pool_compute_ref(param); - // compare - for (int i = 0; i < output.dims().production(); i++) { - EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-4); + // compare + for (int i = 0; i < output.dims().production(); + i++) { + EXPECT_NEAR( + output_data[i], output_ref_data[i], 1e-4); + } + VLOG(3) << "compare pass"; + } + } } - VLOG(3) << "compare pass"; } } } diff --git a/lite/kernels/cuda/pool_compute.cu b/lite/kernels/cuda/pool_compute.cu index a2483a2c75..456a2ce911 100644 --- a/lite/kernels/cuda/pool_compute.cu +++ b/lite/kernels/cuda/pool_compute.cu @@ -256,6 +256,7 @@ void PoolCompute::Run() { bool adaptive = param.adaptive; auto x_dims = param.x->dims(); auto out_dims = param.output->dims(); + auto paddings = *param.paddings; const int in_h = x_dims[2]; const int in_w = x_dims[3]; const int out_h = out_dims[2]; @@ -266,8 +267,8 @@ void PoolCompute::Run() { const int win_w = param.ksize[1]; const int stride_h = param.strides[0]; const int stride_w = param.strides[1]; - const int pad_h = param.paddings[0]; - const int pad_w = param.paddings[1]; + const int pad_h = paddings[0]; + const int pad_w = paddings[2]; const int total_threads = out_dims.production(); const int threads = 512; const int blocks = (total_threads + threads - 1) / threads; diff --git a/lite/kernels/cuda/pool_compute_test.cc b/lite/kernels/cuda/pool_compute_test.cc index fe6ff92c0c..308905c1d0 100644 --- a/lite/kernels/cuda/pool_compute_test.cc +++ b/lite/kernels/cuda/pool_compute_test.cc @@ -27,14 +27,21 @@ namespace cuda { using Tensor = lite::Tensor; using DDim = lite::DDim; -static int PoolOutputSize( - int input_size, int filter_size, int padding, int stride, bool ceil_mode) { +static int PoolOutputSize(int input_size, + int filter_size, + int pad_left, + int pad_right, + int stride, + bool ceil_mode) { int output_size; if (!ceil_mode) { - output_size = (input_size - filter_size + 2 * padding) / stride + 1; + output_size = + (input_size - filter_size + pad_left + pad_right) / stride + 1; } else { output_size = - (input_size - filter_size + 2 * padding + stride - 1) / stride + 1; + (input_size - filter_size + pad_left + pad_right + stride - 1) / + stride + + 1; } return output_size; } @@ -44,8 +51,10 @@ static std::vector compute_output_shape(operators::PoolParam* param_) { std::vector& ksize = param_->ksize; if (param_->global_pooling) { ksize.resize(static_cast(x_dims.size()) - 2); + auto paddings = *param_->paddings; for (size_t i = 0; i < ksize.size(); ++i) { - param_->paddings[i] = 0; + paddings[2 * i] = 0; + paddings[2 * i + 1] = 0; ksize[i] = static_cast(x_dims[i + 2]); } } @@ -58,7 +67,8 @@ static std::vector compute_output_shape(operators::PoolParam* param_) { for (size_t i = 0; i < param_->ksize.size(); ++i) { output_shape.push_back(PoolOutputSize(x_dims[i + 2], param_->ksize[i], - param_->paddings[i], + paddings[2 * i], + paddings[2 * i + 1], param_->strides[i], param_->ceil_mode)); } @@ -75,7 +85,7 @@ static void pool_compute_ref(const operators::PoolParam& param) { std::vector ksize = param.ksize; std::vector strides = param.strides; - std::vector paddings = param.paddings; + std::vector paddings = *param.paddings; std::string pooling_type = param.pooling_type; bool global_pooling = param.global_pooling; @@ -99,7 +109,7 @@ static void pool_compute_ref(const operators::PoolParam& param) { int stride_h = strides[0]; int stride_w = strides[1]; int pad_h = paddings[0]; - int pad_w = paddings[1]; + int pad_w = paddings[2]; if (global_pooling == true) { for (int n = 0; n < in_n; ++n) { @@ -226,7 +236,9 @@ TEST(pool_cuda, compute) { } param.global_pooling = global_pooling; param.strides = {stride, stride}; - param.paddings = {pad, pad}; + std::vector paddings = {pad, pad, pad, pad}; + param.paddings = + std::make_shared>(paddings); param.exclusive = exclusive; param.ceil_mode = ceil_mode; param.adaptive = false; diff --git a/lite/kernels/npu/bridges/pool_op.cc b/lite/kernels/npu/bridges/pool_op.cc index 5915b7a8aa..87fe705705 100644 --- a/lite/kernels/npu/bridges/pool_op.cc +++ b/lite/kernels/npu/bridges/pool_op.cc @@ -48,8 +48,13 @@ node_map_type PoolConverter(const std::shared_ptr pool_op, auto npu_window = ge::AttrValue::LIST_INT(ksize.begin(), ksize.end()); auto padding = op_info->GetAttr>("paddings"); + bool pads_equal = (padding[0] == padding[1]) && (padding[2] == padding[3]); + if (!pads_equal) { + LOG(FATAL) + << "padding requires pad_left == pad_right, pad_top == pad_bottom"; + } auto npu_pad = - ge::AttrValue::LIST_INT{padding[0], padding[0], padding[1], padding[1]}; + ge::AttrValue::LIST_INT{padding[0], padding[1], padding[2], padding[3]}; auto strides = op_info->GetAttr>("strides"); auto npu_stride = ge::AttrValue::LIST_INT(strides.begin(), strides.end()); int npu_ceil_mode = 0; diff --git a/lite/kernels/npu/bridges/pool_op_test.cc b/lite/kernels/npu/bridges/pool_op_test.cc index d4543a6ae1..298e065547 100644 --- a/lite/kernels/npu/bridges/pool_op_test.cc +++ b/lite/kernels/npu/bridges/pool_op_test.cc @@ -61,7 +61,7 @@ void pool_ref(const std::shared_ptr op) { int stride_h = strides[0]; int stride_w = strides[1]; int pad_h = paddings[0]; - int pad_w = paddings[1]; + int pad_w = paddings[2]; if (global_pooling == true) { for (int n = 0; n < in_n; ++n) { @@ -163,7 +163,8 @@ void test_pool(int bs, opdesc.SetAttr("global_pooling", global_pooling); opdesc.SetAttr("exclusive", exclusive); opdesc.SetAttr("strides", std::vector({stride, stride})); - opdesc.SetAttr("paddings", std::vector({padding, padding})); + opdesc.SetAttr("paddings", + std::vector({padding, padding, padding, padding})); // create and convert op to NPU model, then run it on NPU auto op = CreateOp(opdesc, &scope); diff --git a/lite/kernels/opencl/pool_compute.cc b/lite/kernels/opencl/pool_compute.cc index dc2e851595..d275b312d6 100644 --- a/lite/kernels/opencl/pool_compute.cc +++ b/lite/kernels/opencl/pool_compute.cc @@ -44,16 +44,22 @@ class PoolCompute const auto& out_dims = param.output->dims(); const std::string pooling_type = param.pooling_type; const bool global_pooling = param.global_pooling; - std::vector paddings = param.paddings; + std::vector paddings = *param.paddings; std::vector strides = param.strides; std::vector ksize = param.ksize; if (global_pooling) { for (size_t i = 0; i < ksize.size(); ++i) { - paddings[i] = 0; + paddings[2 * i] = 0; + paddings[2 * i + 1] = 0; ksize[i] = static_cast(in_dims[i + 2]); } } - + bool pads_equal = + (paddings[0] == paddings[1]) && (paddings[2] == paddings[3]); + if (!pads_equal) { + LOG(FATAL) + << "padding requires pad_left == pad_right, pad_top == pad_bottom"; + } auto& context = ctx_->As(); CHECK(context.cl_context() != nullptr); auto* input_buf = param.x->data(); @@ -89,7 +95,7 @@ class PoolCompute CL_CHECK_FATAL(status); status = kernel.setArg(++arg_idx, static_cast(paddings[0])); CL_CHECK_FATAL(status); - status = kernel.setArg(++arg_idx, static_cast(paddings[1])); + status = kernel.setArg(++arg_idx, static_cast(paddings[2])); CL_CHECK_FATAL(status); status = kernel.setArg(++arg_idx, *output_buf); CL_CHECK_FATAL(status); diff --git a/lite/kernels/opencl/pool_compute_test.cc b/lite/kernels/opencl/pool_compute_test.cc index 53f64e9505..25f0e72634 100644 --- a/lite/kernels/opencl/pool_compute_test.cc +++ b/lite/kernels/opencl/pool_compute_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include "lite/backends/opencl/target_wrapper.h" #include "lite/core/op_registry.h" @@ -88,9 +89,10 @@ TEST(pool2d, compute) { param.output = &out; param.global_pooling = true; param.pooling_type = "avg"; - param.paddings = std::vector{0, 0}; + std::vector paddings = {0, 0, 0, 0}; param.strides = std::vector{1, 1}; param.ksize = std::vector{7, 7}; + param.paddings = std::make_shared>(paddings); std::unique_ptr context(new KernelContext); context->As().InitOnce(); diff --git a/lite/kernels/x86/pool_compute.h b/lite/kernels/x86/pool_compute.h index 57bcddcec9..0dccb245b1 100644 --- a/lite/kernels/x86/pool_compute.h +++ b/lite/kernels/x86/pool_compute.h @@ -35,7 +35,6 @@ class PoolCompute : public KernelLite { auto& param = *param_.get_mutable(); if (param.global_pooling) { for (size_t i = 0; i < param.ksize.size(); ++i) { - param.paddings[i] = 0; param.ksize[i] = static_cast(param.x->dims()[i + 2]); } } @@ -52,7 +51,7 @@ class PoolCompute : public KernelLite { param.x, param.ksize, param.strides, - param.paddings, + *param.paddings, pool_process, true, false, @@ -68,7 +67,7 @@ class PoolCompute : public KernelLite { param.x, param.ksize, param.strides, - param.paddings, + *param.paddings, pool_process, param.exclusive, param.adaptive, diff --git a/lite/kernels/x86/pool_compute_test.cc b/lite/kernels/x86/pool_compute_test.cc index 87b75a0760..4ea727cedd 100644 --- a/lite/kernels/x86/pool_compute_test.cc +++ b/lite/kernels/x86/pool_compute_test.cc @@ -60,7 +60,8 @@ TEST(pool2d_x86, run_test) { param.x = &x; param.output = &out; param.strides = {2, 2}; - param.paddings = {0, 0}; + std::vector paddings = {0, 0, 0, 0}; + param.paddings = std::make_shared>(paddings); param.ksize = {2, 2}; param.pooling_type = "max"; std::unique_ptr ctx(new KernelContext); diff --git a/lite/kernels/xpu/bridges/pool_op_test.cc b/lite/kernels/xpu/bridges/pool_op_test.cc index ed5f922d59..7efc6b464c 100644 --- a/lite/kernels/xpu/bridges/pool_op_test.cc +++ b/lite/kernels/xpu/bridges/pool_op_test.cc @@ -60,7 +60,7 @@ void pool_ref(const std::shared_ptr op) { int stride_h = strides[0]; int stride_w = strides[1]; int pad_h = paddings[0]; - int pad_w = paddings[1]; + int pad_w = paddings[2]; if (global_pooling == true) { for (int n = 0; n < in_n; ++n) { @@ -162,7 +162,8 @@ void test_pool(int bs, opdesc.SetAttr("global_pooling", global_pooling); opdesc.SetAttr("exclusive", exclusive); opdesc.SetAttr("strides", std::vector({stride, stride})); - opdesc.SetAttr("paddings", std::vector({padding, padding})); + opdesc.SetAttr("paddings", + std::vector({padding, padding, padding, padding})); opdesc.SetAttr("ceil_mode", ceil_mode); // create and convert op to XPU model, then run it on XPU diff --git a/lite/operators/op_params.h b/lite/operators/op_params.h index 035b3e18e8..d455743c4d 100644 --- a/lite/operators/op_params.h +++ b/lite/operators/op_params.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #include #include @@ -302,7 +303,12 @@ struct PoolParam { bool global_pooling{ false}; // if true, knernel size and paddings will be ignored std::vector strides{1, 1}; - std::vector paddings{0, 0}; + /* paddings type change + * from std::vector to std::shared_ptr> + * to support dynamically modify padding + * let kernel param and operator param Synchronous update + */ + std::shared_ptr> paddings; bool exclusive{true}; bool adaptive{false}; bool ceil_mode{false}; diff --git a/lite/operators/pool_op.cc b/lite/operators/pool_op.cc index 1ebbc059b7..7f2d2ccd9a 100644 --- a/lite/operators/pool_op.cc +++ b/lite/operators/pool_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "lite/operators/pool_op.h" +#include #include "lite/core/op_registry.h" namespace paddle { @@ -26,7 +27,7 @@ bool PoolOpLite::CheckShape() const { const auto& x_dims = param_.x->dims(); const auto& ksize = param_.ksize; const auto& strides = param_.strides; - const auto& paddings = param_.paddings; + const auto& paddings = *param_.paddings; // "Pooling intput should be 4-D or 5-D tensor." CHECK_OR_FALSE(x_dims.size() == 4 || x_dims.size() == 5); @@ -34,20 +35,60 @@ bool PoolOpLite::CheckShape() const { CHECK_OR_FALSE(x_dims.size() - ksize.size() == 2U); // Strides size and pooling size should be the same. CHECK_OR_FALSE(ksize.size() == strides.size()); - // Paddings size and pooling size should be the same. - CHECK_OR_FALSE(ksize.size() == paddings.size()); + // Paddings size must be 4. + CHECK_OR_FALSE(paddings.size() == 4L); return true; } -int PoolOutputSize( - int input_size, int filter_size, int padding, int stride, bool ceil_mode) { +inline void UpdatePadding(std::vector* paddings, + const bool global_pooling, + const bool adaptive, + const std::string padding_algorithm, + const lite::DDim data_dims, + const std::vector& strides, + const std::vector& ksize) { + // when padding_algorithm is "VALID" or "SAME" + if (padding_algorithm == "SAME") { + for (int i = 0; i < strides.size(); ++i) { + int out_size = (data_dims[i + 2] + strides[i] - 1) / strides[i]; + int pad_sum = + std::max((out_size - 1) * strides[i] + ksize[i] - data_dims[i + 2], + (int64_t)0); + int pad_0 = pad_sum / 2; + int pad_1 = pad_sum - pad_0; + *(paddings->begin() + i * 2) = pad_0; + *(paddings->begin() + i * 2 + 1) = pad_1; + } + } else if (padding_algorithm == "VALID") { + for (auto it = paddings->begin(); it != paddings->end(); it++) { + *it = 0; + } + } + + // if global_pooling == true or adaptive == true, padding will be ignore + if (global_pooling || adaptive) { + for (auto it = paddings->begin(); it != paddings->end(); it++) { + *it = 0; + } + } +} + +int PoolOutputSize(int input_size, + int filter_size, + int pad_left, + int pad_right, + int stride, + bool ceil_mode) { int output_size; if (!ceil_mode) { - output_size = (input_size - filter_size + 2 * padding) / stride + 1; + output_size = + (input_size - filter_size + pad_left + pad_right) / stride + 1; } else { output_size = - (input_size - filter_size + 2 * padding + stride - 1) / stride + 1; + (input_size - filter_size + pad_left + pad_right + stride - 1) / + stride + + 1; } return output_size; } @@ -55,14 +96,21 @@ int PoolOutputSize( bool PoolOpLite::InferShape() const { const auto x_dims = param_.x->dims(); std::vector& ksize = param_.ksize; + // dynamic update 4-pad + UpdatePadding(param_.paddings.get(), + param_.global_pooling, + param_.adaptive, + padding_algorithm_, + x_dims, + param_.strides, + ksize); if (param_.global_pooling) { ksize.resize(static_cast(x_dims.size()) - 2); for (size_t i = 0; i < ksize.size(); ++i) { - param_.paddings[i] = 0; ksize[i] = static_cast(x_dims[i + 2]); } } - + auto paddings = *param_.paddings; std::vector output_shape({x_dims[0], x_dims[1]}); if (param_.adaptive) { output_shape.insert( @@ -71,15 +119,14 @@ bool PoolOpLite::InferShape() const { for (size_t i = 0; i < param_.ksize.size(); ++i) { output_shape.push_back(PoolOutputSize(x_dims[i + 2], param_.ksize[i], - param_.paddings[i], + paddings[2 * i], + paddings[2 * i + 1], param_.strides[i], param_.ceil_mode)); } } param_.output->Resize(lite::DDim(output_shape)); - // ctx->SetOutputDim("Out", framework::make_ddim(output_shape)); - // ctx->ShareLoD("X", "Out"); return true; } diff --git a/lite/operators/pool_op.h b/lite/operators/pool_op.h index aecec4c619..aefb58ed81 100644 --- a/lite/operators/pool_op.h +++ b/lite/operators/pool_op.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include "lite/core/kernel.h" @@ -51,7 +52,7 @@ class PoolOpLite : public OpLite { param_.ksize = op_desc.GetAttr>("ksize"); param_.global_pooling = op_desc.GetAttr("global_pooling"); param_.strides = op_desc.GetAttr>("strides"); - param_.paddings = op_desc.GetAttr>("paddings"); + auto paddings = op_desc.GetAttr>("paddings"); if (op_desc.HasAttr("exclusive")) { param_.exclusive = op_desc.GetAttr("exclusive"); @@ -65,7 +66,23 @@ class PoolOpLite : public OpLite { if (op_desc.HasAttr("use_quantizer")) { param_.use_quantizer = op_desc.GetAttr("use_quantizer"); } - // param_.data_format = op_desc.GetAttr("data_format"); + if (op_desc.HasAttr("padding_algorithm")) { + padding_algorithm_ = op_desc.GetAttr("padding_algorithm"); + } + // 2-pad to 4-pad + if (paddings.size() == 2L) { + for (size_t i = 0; i < 2L; ++i) { + int copy_pad = *(paddings.begin() + 2 * i); + paddings.insert(paddings.begin() + 2 * i + 1, copy_pad); + } + } else { + if (paddings.size() != 4L) { + LOG(FATAL) + << "Paddings size should be the same or twice as the inputs size."; + } + } + param_.paddings = std::make_shared>(paddings); + return true; } @@ -75,6 +92,7 @@ class PoolOpLite : public OpLite { private: mutable PoolParam param_; + std::string padding_algorithm_{""}; }; } // namespace operators diff --git a/lite/tests/math/pool_compute_test.cc b/lite/tests/math/pool_compute_test.cc index 9f4a943594..2d6a0be628 100644 --- a/lite/tests/math/pool_compute_test.cc +++ b/lite/tests/math/pool_compute_test.cc @@ -69,8 +69,7 @@ DDim compute_out_dim(const DDim& dim_in, auto kernel_w = param.ksize[1]; auto h = dim_in[2]; auto w = dim_in[3]; - int pad_h = param.paddings[0]; - int pad_w = param.paddings[1]; + auto paddings = *param.paddings; int stride_h = param.strides[0]; int stride_w = param.strides[1]; bool ceil_mode = param.ceil_mode; @@ -79,11 +78,15 @@ DDim compute_out_dim(const DDim& dim_in, int wout = 1; if (!flag_global) { if (!ceil_mode) { - hout = (h - kernel_h + 2 * pad_h) / stride_h + 1; - wout = (w - kernel_w + 2 * pad_w) / stride_w + 1; + hout = (h - kernel_h + paddings[0] + paddings[1]) / stride_h + 1; + wout = (w - kernel_w + paddings[2] + paddings[3]) / stride_w + 1; } else { - hout = (h - kernel_h + 2 * pad_h + stride_h - 1) / stride_h + 1; - wout = (w - kernel_w + 2 * pad_w + stride_w - 1) / stride_w + 1; + hout = + (h - kernel_h + paddings[0] + paddings[1] + stride_h - 1) / stride_h + + 1; + wout = + (w - kernel_w + paddings[2] + paddings[3] + stride_w - 1) / stride_w + + 1; } } dim_out[2] = hout; @@ -116,7 +119,7 @@ void pooling_basic(const float* din, int stride_h = strides[0]; int stride_w = strides[1]; int pad_h = paddings[0]; - int pad_w = paddings[1]; + int pad_w = paddings[2]; int size_channel_in = win * hin; int size_channel_out = wout * hout; if (global_pooling) { @@ -195,18 +198,22 @@ void pooling_basic(const float* din, int bh = kernel_h; int bw = kernel_w; if (ew == win) { - bw = sw + kernel_w >= win + pad_w ? win + pad_w - : sw + kernel_w; + bw = (sw + kernel_w) >= (win + paddings[3]) + ? (win + paddings[3]) + : (sw + kernel_w); bw -= sw; - if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) { + if ((sw - pad_w) < 0 && + (sw + kernel_w) > (win + paddings[3])) { bw += pad_w; } } if (eh == hin) { - bh = sh + kernel_h >= hin + pad_h ? hin + pad_h - : sh + kernel_h; + bh = (sh + kernel_h) >= (hin + paddings[1]) + ? (hin + paddings[1]) + : (sh + kernel_h); bh -= sh; - if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) { + if ((sh - pad_h) < 0 && + (sh + kernel_h) > (hin + paddings[1])) { bh += pad_h; } } @@ -243,7 +250,7 @@ void test_pool_fp32(const std::vector& input_dims, param.ksize = ksize; param.strides = strides; - param.paddings = pads; + param.paddings = std::make_shared>(pads); param.ceil_mode = ceil_mode; param.global_pooling = flag_global; param.pooling_type = pooling_type; @@ -399,31 +406,38 @@ TEST(TestPoolRand, test_pool_rand) { for (auto& kw : {1, 2, 3}) { for (auto& kh : {1, 2, 3}) { for (auto& stride : {1, 2}) { - for (auto& pad : {0, 1, 2}) { - for (auto& flag_global : {false, true}) { - for (auto& exclusive : {false, true}) { - for (auto& ceil_mode : {false, true}) { - for (auto& pooling_type : {"max", "avg"}) { - bool adaptive = false; - bool use_quantizer = false; - std::vector dims; - for (auto& batch : {1, 2}) { - for (auto& h : {1, 2, 3, 4, 11, 19, 32, 28}) { - dims.push_back(DDim({batch, cin, h, h})); + for (auto& pad_top : {0, 1, 2}) { + for (auto& pad_bottom : {0, 1, 2}) { + for (auto& pad_left : {0, 1, 2}) { + for (auto& pad_right : {0, 1, 2}) { + for (auto& flag_global : {false, true}) { + for (auto& exclusive : {false, true}) { + for (auto& ceil_mode : {false, true}) { + for (auto& pooling_type : {"max", "avg"}) { + bool adaptive = false; + bool use_quantizer = false; + std::vector dims; + for (auto& batch : {1, 2}) { + for (auto& h : {1, 2, 3, 4, 11, 19, 32, 28}) { + dims.push_back(DDim({batch, cin, h, h})); + } + } + test_pool_fp32( + dims, + {kh, kw}, + {stride, stride}, + {pad_top, pad_bottom, pad_left, pad_right}, + ceil_mode, + flag_global, + exclusive, + adaptive, + use_quantizer, + pooling_type, + {1, 2, 4}, + {FLAGS_power_mode}); + } } } - test_pool_fp32(dims, - {kh, kw}, - {stride, stride}, - {pad, pad}, - ceil_mode, - flag_global, - exclusive, - adaptive, - use_quantizer, - pooling_type, - {1, 2, 4}, - {FLAGS_power_mode}); } } } @@ -443,7 +457,7 @@ TEST(TesPoolCustom, test_pool_fp32_custom_size) { {DDim({FLAGS_batch, FLAGS_in_channel, FLAGS_in_height, FLAGS_in_width})}, {FLAGS_kernel_h, FLAGS_kernel_w}, {FLAGS_stride_h, FLAGS_stride_w}, - {FLAGS_pad_h, FLAGS_pad_w}, + {FLAGS_pad_h, FLAGS_pad_h, FLAGS_pad_w, FLAGS_pad_w}, FLAGS_ceil_mode, FLAGS_flag_global, FLAGS_exclusive, -- GitLab