提交 4bdb6171 编写于 作者: H HappyAngel 提交者: GitHub

update pooling 2-padding to 4-padding (#2410)

* fix pooling bug and speed

* fix build error

* delete VLOGin pool, test=develop

* add openmp, test=develop

* fix lite/kernels/arm/pool_compute_test basic_pooling compute error bug, test=develop

* update pooling 2-pad to 4-pad, test=develop

* fix 2-pad to 4-pad in operators/pool_op.h, AttachKernel will set param, so 2-pad to 4-pad funcs should put in AttachKernel. test=ddevellop

* put 2-pad to 4-pad in AttachImpl, test=develop

* according to reviews, fix some format error. test=develop

* fix format errorr, add (). test=develop

* change paddings type to support dynamically modify, test=develop

* update padding type int other devices, test=develop

* fix x8d build error on shared_ptr, test=ddevelop

* fix formmat in operators pool_op.cc, test=develop
上级 b5fe3840
...@@ -46,7 +46,7 @@ void pooling_basic(const float* din, ...@@ -46,7 +46,7 @@ void pooling_basic(const float* din,
int stride_h = strides[0]; int stride_h = strides[0];
int stride_w = strides[1]; int stride_w = strides[1];
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[1]; int pad_w = paddings[2];
int size_channel_in = win * hin; int size_channel_in = win * hin;
int size_channel_out = wout * hout; int size_channel_out = wout * hout;
if (global_pooling) { if (global_pooling) {
...@@ -125,18 +125,22 @@ void pooling_basic(const float* din, ...@@ -125,18 +125,22 @@ void pooling_basic(const float* din,
int bh = kernel_h; int bh = kernel_h;
int bw = kernel_w; int bw = kernel_w;
if (ew == win) { if (ew == win) {
bw = sw + kernel_w >= win + pad_w ? win + pad_w bw = (sw + kernel_w) >= (win + paddings[3])
: sw + kernel_w; ? (win + paddings[3])
: (sw + kernel_w);
bw -= sw; bw -= sw;
if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) { if ((sw - pad_w) < 0 &&
(sw + kernel_w) > (win + paddings[3])) {
bw += pad_w; bw += pad_w;
} }
} }
if (eh == hin) { if (eh == hin) {
bh = sh + kernel_h >= hin + pad_h ? hin + pad_h bh = (sh + kernel_h) >= (hin + paddings[1])
: sh + kernel_h; ? (hin + paddings[1])
: (sh + kernel_h);
bh -= sh; bh -= sh;
if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) { if ((sh - pad_h) < 0 &&
(sh + kernel_h) > (hin + paddings[1])) {
bh += pad_h; bh += pad_h;
} }
} }
......
...@@ -45,13 +45,14 @@ class PoolingPE : public PE { ...@@ -45,13 +45,14 @@ class PoolingPE : public PE {
PoolingArgs args = {0}; PoolingArgs args = {0};
args.mode = param_.type; args.mode = param_.type;
auto paddings = *param_.paddings;
args.kernel_reciprocal = fp32_2_fp16(1.0f / (k_width * k_height)); args.kernel_reciprocal = fp32_2_fp16(1.0f / (k_width * k_height));
args.image.address = input->data<float16>(); args.image.address = input->data<float16>();
args.image.channels = input->shape().channel(); args.image.channels = input->shape().channel();
args.image.height = input->shape().height(); args.image.height = input->shape().height();
args.image.width = input->shape().width(); args.image.width = input->shape().width();
args.image.pad_height = param_.paddings[0]; args.image.pad_height = paddings[0];
args.image.pad_width = param_.paddings[1]; args.image.pad_width = paddings[2];
args.image.scale_address = input->scale(); args.image.scale_address = input->scale();
args.output.address = output->mutableData<float16>(); args.output.address = output->mutableData<float16>();
args.output.scale_address = output->scale(); args.output.scale_address = output->scale();
...@@ -76,12 +77,13 @@ class PoolingPE : public PE { ...@@ -76,12 +77,13 @@ class PoolingPE : public PE {
float* image_addr = float_input.mutableData<float>(FP32, input->shape()); float* image_addr = float_input.mutableData<float>(FP32, input->shape());
float_input.copyFrom(input); float_input.copyFrom(input);
float16* data_out = output->data<float16>(); float16* data_out = output->data<float16>();
auto paddings = *param_.paddings;
int image_height = input->shape().height(); int image_height = input->shape().height();
int image_width = input->shape().width(); int image_width = input->shape().width();
int image_channels = input->shape().channel(); int image_channels = input->shape().channel();
int image_pad_h = param_.paddings[0]; int image_pad_h = paddings[0];
int image_pad_w = param_.paddings[1]; int image_pad_w = paddings[2];
int kernel_height = param_.kernelSize[1]; int kernel_height = param_.kernelSize[1];
int kernel_width = param_.kernelSize[0]; int kernel_width = param_.kernelSize[0];
int kernel_step_h = param_.strides[0]; int kernel_step_h = param_.strides[0];
......
...@@ -49,7 +49,7 @@ class Pool2dFunctor<lite::TargetType::kX86, PoolProcess, T> { ...@@ -49,7 +49,7 @@ class Pool2dFunctor<lite::TargetType::kX86, PoolProcess, T> {
const int stride_height = strides[0]; const int stride_height = strides[0];
const int stride_width = strides[1]; const int stride_width = strides[1];
const int padding_height = paddings[0]; const int padding_height = paddings[0];
const int padding_width = paddings[1]; const int padding_width = paddings[2];
const int input_stride = input_height * input_width; const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
...@@ -130,7 +130,7 @@ class Pool2dGradFunctor<lite::TargetType::kX86, PoolProcess, T> { ...@@ -130,7 +130,7 @@ class Pool2dGradFunctor<lite::TargetType::kX86, PoolProcess, T> {
const int stride_height = strides[0]; const int stride_height = strides[0];
const int stride_width = strides[1]; const int stride_width = strides[1];
const int padding_height = paddings[0]; const int padding_height = paddings[0];
const int padding_width = paddings[1]; const int padding_width = paddings[2];
const int input_stride = input_height * input_width; const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
...@@ -213,7 +213,7 @@ class MaxPool2dGradFunctor<lite::TargetType::kX86, T> { ...@@ -213,7 +213,7 @@ class MaxPool2dGradFunctor<lite::TargetType::kX86, T> {
const int stride_height = strides[0]; const int stride_height = strides[0];
const int stride_width = strides[1]; const int stride_width = strides[1];
const int padding_height = paddings[0]; const int padding_height = paddings[0];
const int padding_width = paddings[1]; const int padding_width = paddings[2];
const int input_stride = input_height * input_width; const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
...@@ -629,7 +629,7 @@ class MaxPool2dWithIndexFunctor<lite::TargetType::kX86, T1, T2> { ...@@ -629,7 +629,7 @@ class MaxPool2dWithIndexFunctor<lite::TargetType::kX86, T1, T2> {
const int stride_height = strides[0]; const int stride_height = strides[0];
const int stride_width = strides[1]; const int stride_width = strides[1];
const int padding_height = paddings[0]; const int padding_height = paddings[0];
const int padding_width = paddings[1]; const int padding_width = paddings[2];
const int input_stride = input_height * input_width; const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
......
...@@ -38,7 +38,7 @@ void PoolCompute::Run() { ...@@ -38,7 +38,7 @@ void PoolCompute::Run() {
std::vector<int>& ksize = param.ksize; std::vector<int>& ksize = param.ksize;
std::vector<int>& strides = param.strides; std::vector<int>& strides = param.strides;
std::vector<int>& paddings = param.paddings; std::vector<int>& paddings = *param.paddings;
std::string& pooling_type = param.pooling_type; std::string& pooling_type = param.pooling_type;
bool global_pooling = param.global_pooling; bool global_pooling = param.global_pooling;
...@@ -48,12 +48,15 @@ void PoolCompute::Run() { ...@@ -48,12 +48,15 @@ void PoolCompute::Run() {
bool use_quantizer = param.use_quantizer; bool use_quantizer = param.use_quantizer;
std::string& data_format = param.data_format; std::string& data_format = param.data_format;
bool kps_equal = (ksize[0] == ksize[1]) && (strides[0] == strides[1]) && bool pads_equal =
(paddings[0] == paddings[1]); (paddings[0] == paddings[1]) && (paddings[2] == paddings[3]);
bool kps_equal = (ksize[0] == ksize[1]) && (strides[0] == strides[1]) &&
(paddings[0] == paddings[2]);
if (global_pooling) { if (global_pooling) {
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0; paddings[2 * i] = 0;
paddings[2 * i + 1] = 0;
ksize[i] = static_cast<int>(in_dims[i + 2]); ksize[i] = static_cast<int>(in_dims[i + 2]);
} }
if (pooling_type == "max") { if (pooling_type == "max") {
...@@ -80,7 +83,8 @@ void PoolCompute::Run() { ...@@ -80,7 +83,8 @@ void PoolCompute::Run() {
return; return;
} }
} else { } else {
if (ksize[0] == 2 && strides[0] == 2 && paddings[0] == 0 && kps_equal) { if (ksize[0] == 2 && strides[0] == 2 && paddings[0] == 0 && pads_equal &&
kps_equal) {
if (pooling_type == "max") { if (pooling_type == "max") {
lite::arm::math::pooling2x2s2_max(din, lite::arm::math::pooling2x2s2_max(din,
dout, dout,
...@@ -106,7 +110,7 @@ void PoolCompute::Run() { ...@@ -106,7 +110,7 @@ void PoolCompute::Run() {
return; return;
} }
} else if (ksize[0] == 3 && strides[0] == 1 && paddings[0] == 1 && } else if (ksize[0] == 3 && strides[0] == 1 && paddings[0] == 1 &&
kps_equal) { pads_equal && kps_equal) {
if (pooling_type == "max") { if (pooling_type == "max") {
lite::arm::math::pooling3x3s1p1_max(din, lite::arm::math::pooling3x3s1p1_max(din,
dout, dout,
...@@ -132,7 +136,7 @@ void PoolCompute::Run() { ...@@ -132,7 +136,7 @@ void PoolCompute::Run() {
return; return;
} }
} else if (ksize[0] == 3 && strides[0] == 1 && paddings[0] == 0 && } else if (ksize[0] == 3 && strides[0] == 1 && paddings[0] == 0 &&
kps_equal) { pads_equal && kps_equal) {
if (pooling_type == "max") { if (pooling_type == "max") {
lite::arm::math::pooling3x3s1p0_max(din, lite::arm::math::pooling3x3s1p0_max(din,
dout, dout,
...@@ -158,7 +162,7 @@ void PoolCompute::Run() { ...@@ -158,7 +162,7 @@ void PoolCompute::Run() {
return; return;
} }
} else if (ksize[0] == 3 && strides[0] == 2 && paddings[0] == 0 && } else if (ksize[0] == 3 && strides[0] == 2 && paddings[0] == 0 &&
kps_equal) { pads_equal && kps_equal) {
if (pooling_type == "max") { if (pooling_type == "max") {
lite::arm::math::pooling3x3s2p0_max(din, lite::arm::math::pooling3x3s2p0_max(din,
dout, dout,
...@@ -184,7 +188,7 @@ void PoolCompute::Run() { ...@@ -184,7 +188,7 @@ void PoolCompute::Run() {
return; return;
} }
} else if (ksize[0] == 3 && strides[0] == 2 && paddings[0] == 1 && } else if (ksize[0] == 3 && strides[0] == 2 && paddings[0] == 1 &&
kps_equal) { pads_equal && kps_equal) {
if (pooling_type == "max") { if (pooling_type == "max") {
lite::arm::math::pooling3x3s2p1_max(din, lite::arm::math::pooling3x3s2p1_max(din,
dout, dout,
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "lite/kernels/arm/pool_compute.h" #include "lite/kernels/arm/pool_compute.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <limits> #include <limits>
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/backends/arm/math/funcs.h" #include "lite/backends/arm/math/funcs.h"
...@@ -25,14 +26,21 @@ namespace lite { ...@@ -25,14 +26,21 @@ namespace lite {
namespace kernels { namespace kernels {
namespace arm { namespace arm {
int PoolOutputSize( int PoolOutputSize(int input_size,
int input_size, int filter_size, int padding, int stride, bool ceil_mode) { int filter_size,
int pad_left,
int pad_right,
int stride,
bool ceil_mode) {
int output_size; int output_size;
if (!ceil_mode) { if (!ceil_mode) {
output_size = (input_size - filter_size + 2 * padding) / stride + 1; output_size =
(input_size - filter_size + pad_left + pad_right) / stride + 1;
} else { } else {
output_size = output_size =
(input_size - filter_size + 2 * padding + stride - 1) / stride + 1; (input_size - filter_size + pad_left + pad_right + stride - 1) /
stride +
1;
} }
return output_size; return output_size;
} }
...@@ -40,10 +48,12 @@ int PoolOutputSize( ...@@ -40,10 +48,12 @@ int PoolOutputSize(
std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) { std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
const auto x_dims = param_->x->dims(); const auto x_dims = param_->x->dims();
std::vector<int>& ksize = param_->ksize; std::vector<int>& ksize = param_->ksize;
auto paddings = *param_->paddings;
if (param_->global_pooling) { if (param_->global_pooling) {
ksize.resize(static_cast<size_t>(x_dims.size()) - 2); ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
param_->paddings[i] = 0; paddings[2 * i] = 0;
paddings[2 * i + 1] = 0;
ksize[i] = static_cast<int>(x_dims[i + 2]); ksize[i] = static_cast<int>(x_dims[i + 2]);
} }
} }
...@@ -56,7 +66,8 @@ std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) { ...@@ -56,7 +66,8 @@ std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
for (size_t i = 0; i < param_->ksize.size(); ++i) { for (size_t i = 0; i < param_->ksize.size(); ++i) {
output_shape.push_back(PoolOutputSize(x_dims[i + 2], output_shape.push_back(PoolOutputSize(x_dims[i + 2],
param_->ksize[i], param_->ksize[i],
param_->paddings[i], paddings[2 * i],
paddings[2 * i + 1],
param_->strides[i], param_->strides[i],
param_->ceil_mode)); param_->ceil_mode));
} }
...@@ -73,7 +84,7 @@ void pool_compute_ref(const operators::PoolParam& param) { ...@@ -73,7 +84,7 @@ void pool_compute_ref(const operators::PoolParam& param) {
std::vector<int> ksize = param.ksize; std::vector<int> ksize = param.ksize;
std::vector<int> strides = param.strides; std::vector<int> strides = param.strides;
std::vector<int> paddings = param.paddings; std::vector<int> paddings = *param.paddings;
std::string pooling_type = param.pooling_type; std::string pooling_type = param.pooling_type;
bool global_pooling = param.global_pooling; bool global_pooling = param.global_pooling;
...@@ -99,7 +110,7 @@ void pool_compute_ref(const operators::PoolParam& param) { ...@@ -99,7 +110,7 @@ void pool_compute_ref(const operators::PoolParam& param) {
int stride_h = strides[0]; int stride_h = strides[0];
int stride_w = strides[1]; int stride_w = strides[1];
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[1]; int pad_w = paddings[2];
int size_channel_in = win * hin; int size_channel_in = win * hin;
int size_channel_out = wout * hout; int size_channel_out = wout * hout;
if (global_pooling) { if (global_pooling) {
...@@ -178,18 +189,22 @@ void pool_compute_ref(const operators::PoolParam& param) { ...@@ -178,18 +189,22 @@ void pool_compute_ref(const operators::PoolParam& param) {
int bh = kernel_h; int bh = kernel_h;
int bw = kernel_w; int bw = kernel_w;
if (ew == win) { if (ew == win) {
bw = sw + kernel_w >= win + pad_w ? win + pad_w bw = (sw + kernel_w) >= (win + paddings[3])
: sw + kernel_w; ? (win + paddings[3])
: (sw + kernel_w);
bw -= sw; bw -= sw;
if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) { if ((sw - pad_w) < 0 &&
(sw + kernel_w) > (win + paddings[3])) {
bw += pad_w; bw += pad_w;
} }
} }
if (eh == hin) { if (eh == hin) {
bh = sh + kernel_h >= hin + pad_h ? hin + pad_h bh = (sh + kernel_h) >= (hin + paddings[1])
: sh + kernel_h; ? (hin + paddings[1])
: (sh + kernel_h);
bh -= sh; bh -= sh;
if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) { if ((sh - pad_h) < 0 &&
(sh + kernel_h) > (hin + paddings[1])) {
bh += pad_h; bh += pad_h;
} }
} }
...@@ -225,75 +240,92 @@ TEST(pool_arm, compute) { ...@@ -225,75 +240,92 @@ TEST(pool_arm, compute) {
for (auto exclusive : {true, false}) { for (auto exclusive : {true, false}) {
for (auto ksize : {2, 3}) { for (auto ksize : {2, 3}) {
for (auto stride : {1, 2}) { for (auto stride : {1, 2}) {
for (auto pad : {0, 1}) { for (auto pad_left : {0, 1}) {
for (auto n : {1, 2}) { for (auto pad_right : {0, 1}) {
for (auto c : {1, 3}) { for (auto pad_top : {0, 1}) {
for (auto pad_bottom : {0, 1}) {
for (auto n : {1, 2}) {
for (auto c : {1, 3}) {
#if 1 #if 1
for (auto h : {2, 3, 4, 11}) { for (auto h : {2, 3, 4, 11}) {
for (auto w : {2, 3, 4, 11}) { for (auto w : {2, 3, 4, 11}) {
#else #else
for (int h = 2; h < 25; h++) { for (int h = 2; h < 25; h++) {
for (int w = 2; w < 25; w++) { for (int w = 2; w < 25; w++) {
#endif #endif
VLOG(3) << "n:" << n << " c:" << c << " h:" << h VLOG(3) << "n:" << n << " c:" << c << " h:" << h
<< " w:" << w << " ksize:" << ksize << " w:" << w << " ksize:" << ksize
<< " stride:" << stride << " pad:" << pad << " stride:" << stride
<< " exclusive:" << exclusive << " pad_left:" << pad_left
<< " global_pooling:" << global_pooling << " pad_right:" << pad_right
<< " ceil_mode: " << ceil_mode << " pad_top:" << pad_top
<< " pooling_type:" << pooling_type; << " pad_bottom:" << pad_bottom
<< " exclusive:" << exclusive
<< " global_pooling:" << global_pooling
<< " ceil_mode: " << ceil_mode
<< " pooling_type:" << pooling_type;
// init x, output // init x, output
x.Resize(DDim(std::vector<int64_t>({n, c, h, w}))); x.Resize(
auto* x_data = x.mutable_data<float>(); DDim(std::vector<int64_t>({n, c, h, w})));
for (int i = 0; i < x.dims().production(); ++i) { auto* x_data = x.mutable_data<float>();
float sign = i % 3 == 0 ? -0.03 : 0.05f; for (int i = 0; i < x.dims().production(); ++i) {
x_data[i] = sign * (i % 128); float sign = i % 3 == 0 ? -0.03 : 0.05f;
} x_data[i] = sign * (i % 128);
}
// fill param // fill param
param.x = &x; param.x = &x;
param.output = &output; param.output = &output;
param.pooling_type = pooling_type; param.pooling_type = pooling_type;
if (global_pooling) { if (global_pooling) {
param.ksize = {h, w}; param.ksize = {h, w};
} else { } else {
param.ksize = {ksize, ksize}; param.ksize = {ksize, ksize};
} }
param.global_pooling = global_pooling; param.global_pooling = global_pooling;
param.strides = {stride, stride}; param.strides = {stride, stride};
param.paddings = {pad, pad}; std::vector<int> paddings = {
param.exclusive = exclusive; pad_top, pad_bottom, pad_left, pad_right};
param.ceil_mode = ceil_mode; param.exclusive = exclusive;
param.adaptive = false; param.paddings =
param.use_quantizer = false; std::make_shared<std::vector<int>>(paddings);
param.ceil_mode = ceil_mode;
param.adaptive = false;
param.use_quantizer = false;
const std::vector<int64_t>& output_shape = const std::vector<int64_t>& output_shape =
compute_output_shape(&param); compute_output_shape(&param);
output.Resize(DDim(output_shape)); output.Resize(DDim(output_shape));
output_ref.Resize(DDim(output_shape)); output_ref.Resize(DDim(output_shape));
auto* output_data = output.mutable_data<float>(); auto* output_data = output.mutable_data<float>();
auto* output_ref_data = auto* output_ref_data =
output_ref.mutable_data<float>(); output_ref.mutable_data<float>();
for (int i = 0; i < output.dims().production(); ++i) { for (int i = 0; i < output.dims().production();
output_data[i] = -2; ++i) {
output_ref_data[i] = -2; output_data[i] = -2;
} output_ref_data[i] = -2;
}
// compute // compute
pool.SetParam(param); pool.SetParam(param);
pool.Run(); pool.Run();
// compute ref // compute ref
param.output = &output_ref; param.output = &output_ref;
pool_compute_ref(param); pool_compute_ref(param);
// compare // compare
for (int i = 0; i < output.dims().production(); i++) { for (int i = 0; i < output.dims().production();
EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-4); i++) {
EXPECT_NEAR(
output_data[i], output_ref_data[i], 1e-4);
}
VLOG(3) << "compare pass";
}
}
} }
VLOG(3) << "compare pass";
} }
} }
} }
......
...@@ -256,6 +256,7 @@ void PoolCompute::Run() { ...@@ -256,6 +256,7 @@ void PoolCompute::Run() {
bool adaptive = param.adaptive; bool adaptive = param.adaptive;
auto x_dims = param.x->dims(); auto x_dims = param.x->dims();
auto out_dims = param.output->dims(); auto out_dims = param.output->dims();
auto paddings = *param.paddings;
const int in_h = x_dims[2]; const int in_h = x_dims[2];
const int in_w = x_dims[3]; const int in_w = x_dims[3];
const int out_h = out_dims[2]; const int out_h = out_dims[2];
...@@ -266,8 +267,8 @@ void PoolCompute::Run() { ...@@ -266,8 +267,8 @@ void PoolCompute::Run() {
const int win_w = param.ksize[1]; const int win_w = param.ksize[1];
const int stride_h = param.strides[0]; const int stride_h = param.strides[0];
const int stride_w = param.strides[1]; const int stride_w = param.strides[1];
const int pad_h = param.paddings[0]; const int pad_h = paddings[0];
const int pad_w = param.paddings[1]; const int pad_w = paddings[2];
const int total_threads = out_dims.production(); const int total_threads = out_dims.production();
const int threads = 512; const int threads = 512;
const int blocks = (total_threads + threads - 1) / threads; const int blocks = (total_threads + threads - 1) / threads;
......
...@@ -27,14 +27,21 @@ namespace cuda { ...@@ -27,14 +27,21 @@ namespace cuda {
using Tensor = lite::Tensor; using Tensor = lite::Tensor;
using DDim = lite::DDim; using DDim = lite::DDim;
static int PoolOutputSize( static int PoolOutputSize(int input_size,
int input_size, int filter_size, int padding, int stride, bool ceil_mode) { int filter_size,
int pad_left,
int pad_right,
int stride,
bool ceil_mode) {
int output_size; int output_size;
if (!ceil_mode) { if (!ceil_mode) {
output_size = (input_size - filter_size + 2 * padding) / stride + 1; output_size =
(input_size - filter_size + pad_left + pad_right) / stride + 1;
} else { } else {
output_size = output_size =
(input_size - filter_size + 2 * padding + stride - 1) / stride + 1; (input_size - filter_size + pad_left + pad_right + stride - 1) /
stride +
1;
} }
return output_size; return output_size;
} }
...@@ -44,8 +51,10 @@ static std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) { ...@@ -44,8 +51,10 @@ static std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
std::vector<int>& ksize = param_->ksize; std::vector<int>& ksize = param_->ksize;
if (param_->global_pooling) { if (param_->global_pooling) {
ksize.resize(static_cast<size_t>(x_dims.size()) - 2); ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
auto paddings = *param_->paddings;
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
param_->paddings[i] = 0; paddings[2 * i] = 0;
paddings[2 * i + 1] = 0;
ksize[i] = static_cast<int>(x_dims[i + 2]); ksize[i] = static_cast<int>(x_dims[i + 2]);
} }
} }
...@@ -58,7 +67,8 @@ static std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) { ...@@ -58,7 +67,8 @@ static std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
for (size_t i = 0; i < param_->ksize.size(); ++i) { for (size_t i = 0; i < param_->ksize.size(); ++i) {
output_shape.push_back(PoolOutputSize(x_dims[i + 2], output_shape.push_back(PoolOutputSize(x_dims[i + 2],
param_->ksize[i], param_->ksize[i],
param_->paddings[i], paddings[2 * i],
paddings[2 * i + 1],
param_->strides[i], param_->strides[i],
param_->ceil_mode)); param_->ceil_mode));
} }
...@@ -75,7 +85,7 @@ static void pool_compute_ref(const operators::PoolParam& param) { ...@@ -75,7 +85,7 @@ static void pool_compute_ref(const operators::PoolParam& param) {
std::vector<int> ksize = param.ksize; std::vector<int> ksize = param.ksize;
std::vector<int> strides = param.strides; std::vector<int> strides = param.strides;
std::vector<int> paddings = param.paddings; std::vector<int> paddings = *param.paddings;
std::string pooling_type = param.pooling_type; std::string pooling_type = param.pooling_type;
bool global_pooling = param.global_pooling; bool global_pooling = param.global_pooling;
...@@ -99,7 +109,7 @@ static void pool_compute_ref(const operators::PoolParam& param) { ...@@ -99,7 +109,7 @@ static void pool_compute_ref(const operators::PoolParam& param) {
int stride_h = strides[0]; int stride_h = strides[0];
int stride_w = strides[1]; int stride_w = strides[1];
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[1]; int pad_w = paddings[2];
if (global_pooling == true) { if (global_pooling == true) {
for (int n = 0; n < in_n; ++n) { for (int n = 0; n < in_n; ++n) {
...@@ -226,7 +236,9 @@ TEST(pool_cuda, compute) { ...@@ -226,7 +236,9 @@ TEST(pool_cuda, compute) {
} }
param.global_pooling = global_pooling; param.global_pooling = global_pooling;
param.strides = {stride, stride}; param.strides = {stride, stride};
param.paddings = {pad, pad}; std::vector<int> paddings = {pad, pad, pad, pad};
param.paddings =
std::make_shared<std::vector<int>>(paddings);
param.exclusive = exclusive; param.exclusive = exclusive;
param.ceil_mode = ceil_mode; param.ceil_mode = ceil_mode;
param.adaptive = false; param.adaptive = false;
......
...@@ -48,8 +48,13 @@ node_map_type PoolConverter(const std::shared_ptr<lite::OpLite> pool_op, ...@@ -48,8 +48,13 @@ node_map_type PoolConverter(const std::shared_ptr<lite::OpLite> pool_op,
auto npu_window = ge::AttrValue::LIST_INT(ksize.begin(), ksize.end()); auto npu_window = ge::AttrValue::LIST_INT(ksize.begin(), ksize.end());
auto padding = op_info->GetAttr<std::vector<int>>("paddings"); auto padding = op_info->GetAttr<std::vector<int>>("paddings");
bool pads_equal = (padding[0] == padding[1]) && (padding[2] == padding[3]);
if (!pads_equal) {
LOG(FATAL)
<< "padding requires pad_left == pad_right, pad_top == pad_bottom";
}
auto npu_pad = auto npu_pad =
ge::AttrValue::LIST_INT{padding[0], padding[0], padding[1], padding[1]}; ge::AttrValue::LIST_INT{padding[0], padding[1], padding[2], padding[3]};
auto strides = op_info->GetAttr<std::vector<int>>("strides"); auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto npu_stride = ge::AttrValue::LIST_INT(strides.begin(), strides.end()); auto npu_stride = ge::AttrValue::LIST_INT(strides.begin(), strides.end());
int npu_ceil_mode = 0; int npu_ceil_mode = 0;
......
...@@ -61,7 +61,7 @@ void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) { ...@@ -61,7 +61,7 @@ void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) {
int stride_h = strides[0]; int stride_h = strides[0];
int stride_w = strides[1]; int stride_w = strides[1];
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[1]; int pad_w = paddings[2];
if (global_pooling == true) { if (global_pooling == true) {
for (int n = 0; n < in_n; ++n) { for (int n = 0; n < in_n; ++n) {
...@@ -163,7 +163,8 @@ void test_pool(int bs, ...@@ -163,7 +163,8 @@ void test_pool(int bs,
opdesc.SetAttr("global_pooling", global_pooling); opdesc.SetAttr("global_pooling", global_pooling);
opdesc.SetAttr("exclusive", exclusive); opdesc.SetAttr("exclusive", exclusive);
opdesc.SetAttr("strides", std::vector<int>({stride, stride})); opdesc.SetAttr("strides", std::vector<int>({stride, stride}));
opdesc.SetAttr("paddings", std::vector<int>({padding, padding})); opdesc.SetAttr("paddings",
std::vector<int>({padding, padding, padding, padding}));
// create and convert op to NPU model, then run it on NPU // create and convert op to NPU model, then run it on NPU
auto op = CreateOp<operators::PoolOpLite>(opdesc, &scope); auto op = CreateOp<operators::PoolOpLite>(opdesc, &scope);
......
...@@ -44,16 +44,22 @@ class PoolCompute ...@@ -44,16 +44,22 @@ class PoolCompute
const auto& out_dims = param.output->dims(); const auto& out_dims = param.output->dims();
const std::string pooling_type = param.pooling_type; const std::string pooling_type = param.pooling_type;
const bool global_pooling = param.global_pooling; const bool global_pooling = param.global_pooling;
std::vector<int> paddings = param.paddings; std::vector<int> paddings = *param.paddings;
std::vector<int> strides = param.strides; std::vector<int> strides = param.strides;
std::vector<int> ksize = param.ksize; std::vector<int> ksize = param.ksize;
if (global_pooling) { if (global_pooling) {
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0; paddings[2 * i] = 0;
paddings[2 * i + 1] = 0;
ksize[i] = static_cast<int>(in_dims[i + 2]); ksize[i] = static_cast<int>(in_dims[i + 2]);
} }
} }
bool pads_equal =
(paddings[0] == paddings[1]) && (paddings[2] == paddings[3]);
if (!pads_equal) {
LOG(FATAL)
<< "padding requires pad_left == pad_right, pad_top == pad_bottom";
}
auto& context = ctx_->As<OpenCLContext>(); auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr); CHECK(context.cl_context() != nullptr);
auto* input_buf = param.x->data<float, cl::Buffer>(); auto* input_buf = param.x->data<float, cl::Buffer>();
...@@ -89,7 +95,7 @@ class PoolCompute ...@@ -89,7 +95,7 @@ class PoolCompute
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[0])); status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[0]));
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[1])); status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[2]));
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *output_buf); status = kernel.setArg(++arg_idx, *output_buf);
CL_CHECK_FATAL(status); CL_CHECK_FATAL(status);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory>
#include <random> #include <random>
#include "lite/backends/opencl/target_wrapper.h" #include "lite/backends/opencl/target_wrapper.h"
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
...@@ -88,9 +89,10 @@ TEST(pool2d, compute) { ...@@ -88,9 +89,10 @@ TEST(pool2d, compute) {
param.output = &out; param.output = &out;
param.global_pooling = true; param.global_pooling = true;
param.pooling_type = "avg"; param.pooling_type = "avg";
param.paddings = std::vector<int>{0, 0}; std::vector<int> paddings = {0, 0, 0, 0};
param.strides = std::vector<int>{1, 1}; param.strides = std::vector<int>{1, 1};
param.ksize = std::vector<int>{7, 7}; param.ksize = std::vector<int>{7, 7};
param.paddings = std::make_shared<std::vector<int>>(paddings);
std::unique_ptr<KernelContext> context(new KernelContext); std::unique_ptr<KernelContext> context(new KernelContext);
context->As<OpenCLContext>().InitOnce(); context->As<OpenCLContext>().InitOnce();
......
...@@ -35,7 +35,6 @@ class PoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -35,7 +35,6 @@ class PoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
if (param.global_pooling) { if (param.global_pooling) {
for (size_t i = 0; i < param.ksize.size(); ++i) { for (size_t i = 0; i < param.ksize.size(); ++i) {
param.paddings[i] = 0;
param.ksize[i] = static_cast<int>(param.x->dims()[i + 2]); param.ksize[i] = static_cast<int>(param.x->dims()[i + 2]);
} }
} }
...@@ -52,7 +51,7 @@ class PoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -52,7 +51,7 @@ class PoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
param.x, param.x,
param.ksize, param.ksize,
param.strides, param.strides,
param.paddings, *param.paddings,
pool_process, pool_process,
true, true,
false, false,
...@@ -68,7 +67,7 @@ class PoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -68,7 +67,7 @@ class PoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
param.x, param.x,
param.ksize, param.ksize,
param.strides, param.strides,
param.paddings, *param.paddings,
pool_process, pool_process,
param.exclusive, param.exclusive,
param.adaptive, param.adaptive,
......
...@@ -60,7 +60,8 @@ TEST(pool2d_x86, run_test) { ...@@ -60,7 +60,8 @@ TEST(pool2d_x86, run_test) {
param.x = &x; param.x = &x;
param.output = &out; param.output = &out;
param.strides = {2, 2}; param.strides = {2, 2};
param.paddings = {0, 0}; std::vector<int> paddings = {0, 0, 0, 0};
param.paddings = std::make_shared<std::vector<int>>(paddings);
param.ksize = {2, 2}; param.ksize = {2, 2};
param.pooling_type = "max"; param.pooling_type = "max";
std::unique_ptr<KernelContext> ctx(new KernelContext); std::unique_ptr<KernelContext> ctx(new KernelContext);
......
...@@ -60,7 +60,7 @@ void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) { ...@@ -60,7 +60,7 @@ void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) {
int stride_h = strides[0]; int stride_h = strides[0];
int stride_w = strides[1]; int stride_w = strides[1];
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[1]; int pad_w = paddings[2];
if (global_pooling == true) { if (global_pooling == true) {
for (int n = 0; n < in_n; ++n) { for (int n = 0; n < in_n; ++n) {
...@@ -162,7 +162,8 @@ void test_pool(int bs, ...@@ -162,7 +162,8 @@ void test_pool(int bs,
opdesc.SetAttr("global_pooling", global_pooling); opdesc.SetAttr("global_pooling", global_pooling);
opdesc.SetAttr("exclusive", exclusive); opdesc.SetAttr("exclusive", exclusive);
opdesc.SetAttr("strides", std::vector<int>({stride, stride})); opdesc.SetAttr("strides", std::vector<int>({stride, stride}));
opdesc.SetAttr("paddings", std::vector<int>({padding, padding})); opdesc.SetAttr("paddings",
std::vector<int>({padding, padding, padding, padding}));
opdesc.SetAttr("ceil_mode", ceil_mode); opdesc.SetAttr("ceil_mode", ceil_mode);
// create and convert op to XPU model, then run it on XPU // create and convert op to XPU model, then run it on XPU
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -302,7 +303,12 @@ struct PoolParam { ...@@ -302,7 +303,12 @@ struct PoolParam {
bool global_pooling{ bool global_pooling{
false}; // if true, knernel size and paddings will be ignored false}; // if true, knernel size and paddings will be ignored
std::vector<int> strides{1, 1}; std::vector<int> strides{1, 1};
std::vector<int> paddings{0, 0}; /* paddings type change
* from std::vector<int> to std::shared_ptr<std::vector<int>>
* to support dynamically modify padding
* let kernel param and operator param Synchronous update
*/
std::shared_ptr<std::vector<int>> paddings;
bool exclusive{true}; bool exclusive{true};
bool adaptive{false}; bool adaptive{false};
bool ceil_mode{false}; bool ceil_mode{false};
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "lite/operators/pool_op.h" #include "lite/operators/pool_op.h"
#include <algorithm>
#include "lite/core/op_registry.h" #include "lite/core/op_registry.h"
namespace paddle { namespace paddle {
...@@ -26,7 +27,7 @@ bool PoolOpLite::CheckShape() const { ...@@ -26,7 +27,7 @@ bool PoolOpLite::CheckShape() const {
const auto& x_dims = param_.x->dims(); const auto& x_dims = param_.x->dims();
const auto& ksize = param_.ksize; const auto& ksize = param_.ksize;
const auto& strides = param_.strides; const auto& strides = param_.strides;
const auto& paddings = param_.paddings; const auto& paddings = *param_.paddings;
// "Pooling intput should be 4-D or 5-D tensor." // "Pooling intput should be 4-D or 5-D tensor."
CHECK_OR_FALSE(x_dims.size() == 4 || x_dims.size() == 5); CHECK_OR_FALSE(x_dims.size() == 4 || x_dims.size() == 5);
...@@ -34,20 +35,60 @@ bool PoolOpLite::CheckShape() const { ...@@ -34,20 +35,60 @@ bool PoolOpLite::CheckShape() const {
CHECK_OR_FALSE(x_dims.size() - ksize.size() == 2U); CHECK_OR_FALSE(x_dims.size() - ksize.size() == 2U);
// Strides size and pooling size should be the same. // Strides size and pooling size should be the same.
CHECK_OR_FALSE(ksize.size() == strides.size()); CHECK_OR_FALSE(ksize.size() == strides.size());
// Paddings size and pooling size should be the same. // Paddings size must be 4.
CHECK_OR_FALSE(ksize.size() == paddings.size()); CHECK_OR_FALSE(paddings.size() == 4L);
return true; return true;
} }
int PoolOutputSize( inline void UpdatePadding(std::vector<int>* paddings,
int input_size, int filter_size, int padding, int stride, bool ceil_mode) { const bool global_pooling,
const bool adaptive,
const std::string padding_algorithm,
const lite::DDim data_dims,
const std::vector<int>& strides,
const std::vector<int>& ksize) {
// when padding_algorithm is "VALID" or "SAME"
if (padding_algorithm == "SAME") {
for (int i = 0; i < strides.size(); ++i) {
int out_size = (data_dims[i + 2] + strides[i] - 1) / strides[i];
int pad_sum =
std::max((out_size - 1) * strides[i] + ksize[i] - data_dims[i + 2],
(int64_t)0);
int pad_0 = pad_sum / 2;
int pad_1 = pad_sum - pad_0;
*(paddings->begin() + i * 2) = pad_0;
*(paddings->begin() + i * 2 + 1) = pad_1;
}
} else if (padding_algorithm == "VALID") {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
// if global_pooling == true or adaptive == true, padding will be ignore
if (global_pooling || adaptive) {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
}
int PoolOutputSize(int input_size,
int filter_size,
int pad_left,
int pad_right,
int stride,
bool ceil_mode) {
int output_size; int output_size;
if (!ceil_mode) { if (!ceil_mode) {
output_size = (input_size - filter_size + 2 * padding) / stride + 1; output_size =
(input_size - filter_size + pad_left + pad_right) / stride + 1;
} else { } else {
output_size = output_size =
(input_size - filter_size + 2 * padding + stride - 1) / stride + 1; (input_size - filter_size + pad_left + pad_right + stride - 1) /
stride +
1;
} }
return output_size; return output_size;
} }
...@@ -55,14 +96,21 @@ int PoolOutputSize( ...@@ -55,14 +96,21 @@ int PoolOutputSize(
bool PoolOpLite::InferShape() const { bool PoolOpLite::InferShape() const {
const auto x_dims = param_.x->dims(); const auto x_dims = param_.x->dims();
std::vector<int>& ksize = param_.ksize; std::vector<int>& ksize = param_.ksize;
// dynamic update 4-pad
UpdatePadding(param_.paddings.get(),
param_.global_pooling,
param_.adaptive,
padding_algorithm_,
x_dims,
param_.strides,
ksize);
if (param_.global_pooling) { if (param_.global_pooling) {
ksize.resize(static_cast<size_t>(x_dims.size()) - 2); ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
for (size_t i = 0; i < ksize.size(); ++i) { for (size_t i = 0; i < ksize.size(); ++i) {
param_.paddings[i] = 0;
ksize[i] = static_cast<int>(x_dims[i + 2]); ksize[i] = static_cast<int>(x_dims[i + 2]);
} }
} }
auto paddings = *param_.paddings;
std::vector<int64_t> output_shape({x_dims[0], x_dims[1]}); std::vector<int64_t> output_shape({x_dims[0], x_dims[1]});
if (param_.adaptive) { if (param_.adaptive) {
output_shape.insert( output_shape.insert(
...@@ -71,15 +119,14 @@ bool PoolOpLite::InferShape() const { ...@@ -71,15 +119,14 @@ bool PoolOpLite::InferShape() const {
for (size_t i = 0; i < param_.ksize.size(); ++i) { for (size_t i = 0; i < param_.ksize.size(); ++i) {
output_shape.push_back(PoolOutputSize(x_dims[i + 2], output_shape.push_back(PoolOutputSize(x_dims[i + 2],
param_.ksize[i], param_.ksize[i],
param_.paddings[i], paddings[2 * i],
paddings[2 * i + 1],
param_.strides[i], param_.strides[i],
param_.ceil_mode)); param_.ceil_mode));
} }
} }
param_.output->Resize(lite::DDim(output_shape)); param_.output->Resize(lite::DDim(output_shape));
// ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
// ctx->ShareLoD("X", "Out");
return true; return true;
} }
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "lite/core/kernel.h" #include "lite/core/kernel.h"
...@@ -51,7 +52,7 @@ class PoolOpLite : public OpLite { ...@@ -51,7 +52,7 @@ class PoolOpLite : public OpLite {
param_.ksize = op_desc.GetAttr<std::vector<int>>("ksize"); param_.ksize = op_desc.GetAttr<std::vector<int>>("ksize");
param_.global_pooling = op_desc.GetAttr<bool>("global_pooling"); param_.global_pooling = op_desc.GetAttr<bool>("global_pooling");
param_.strides = op_desc.GetAttr<std::vector<int>>("strides"); param_.strides = op_desc.GetAttr<std::vector<int>>("strides");
param_.paddings = op_desc.GetAttr<std::vector<int>>("paddings"); auto paddings = op_desc.GetAttr<std::vector<int>>("paddings");
if (op_desc.HasAttr("exclusive")) { if (op_desc.HasAttr("exclusive")) {
param_.exclusive = op_desc.GetAttr<bool>("exclusive"); param_.exclusive = op_desc.GetAttr<bool>("exclusive");
...@@ -65,7 +66,23 @@ class PoolOpLite : public OpLite { ...@@ -65,7 +66,23 @@ class PoolOpLite : public OpLite {
if (op_desc.HasAttr("use_quantizer")) { if (op_desc.HasAttr("use_quantizer")) {
param_.use_quantizer = op_desc.GetAttr<bool>("use_quantizer"); param_.use_quantizer = op_desc.GetAttr<bool>("use_quantizer");
} }
// param_.data_format = op_desc.GetAttr<bool>("data_format"); if (op_desc.HasAttr("padding_algorithm")) {
padding_algorithm_ = op_desc.GetAttr<std::string>("padding_algorithm");
}
// 2-pad to 4-pad
if (paddings.size() == 2L) {
for (size_t i = 0; i < 2L; ++i) {
int copy_pad = *(paddings.begin() + 2 * i);
paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
}
} else {
if (paddings.size() != 4L) {
LOG(FATAL)
<< "Paddings size should be the same or twice as the inputs size.";
}
}
param_.paddings = std::make_shared<std::vector<int>>(paddings);
return true; return true;
} }
...@@ -75,6 +92,7 @@ class PoolOpLite : public OpLite { ...@@ -75,6 +92,7 @@ class PoolOpLite : public OpLite {
private: private:
mutable PoolParam param_; mutable PoolParam param_;
std::string padding_algorithm_{""};
}; };
} // namespace operators } // namespace operators
......
...@@ -69,8 +69,7 @@ DDim compute_out_dim(const DDim& dim_in, ...@@ -69,8 +69,7 @@ DDim compute_out_dim(const DDim& dim_in,
auto kernel_w = param.ksize[1]; auto kernel_w = param.ksize[1];
auto h = dim_in[2]; auto h = dim_in[2];
auto w = dim_in[3]; auto w = dim_in[3];
int pad_h = param.paddings[0]; auto paddings = *param.paddings;
int pad_w = param.paddings[1];
int stride_h = param.strides[0]; int stride_h = param.strides[0];
int stride_w = param.strides[1]; int stride_w = param.strides[1];
bool ceil_mode = param.ceil_mode; bool ceil_mode = param.ceil_mode;
...@@ -79,11 +78,15 @@ DDim compute_out_dim(const DDim& dim_in, ...@@ -79,11 +78,15 @@ DDim compute_out_dim(const DDim& dim_in,
int wout = 1; int wout = 1;
if (!flag_global) { if (!flag_global) {
if (!ceil_mode) { if (!ceil_mode) {
hout = (h - kernel_h + 2 * pad_h) / stride_h + 1; hout = (h - kernel_h + paddings[0] + paddings[1]) / stride_h + 1;
wout = (w - kernel_w + 2 * pad_w) / stride_w + 1; wout = (w - kernel_w + paddings[2] + paddings[3]) / stride_w + 1;
} else { } else {
hout = (h - kernel_h + 2 * pad_h + stride_h - 1) / stride_h + 1; hout =
wout = (w - kernel_w + 2 * pad_w + stride_w - 1) / stride_w + 1; (h - kernel_h + paddings[0] + paddings[1] + stride_h - 1) / stride_h +
1;
wout =
(w - kernel_w + paddings[2] + paddings[3] + stride_w - 1) / stride_w +
1;
} }
} }
dim_out[2] = hout; dim_out[2] = hout;
...@@ -116,7 +119,7 @@ void pooling_basic(const float* din, ...@@ -116,7 +119,7 @@ void pooling_basic(const float* din,
int stride_h = strides[0]; int stride_h = strides[0];
int stride_w = strides[1]; int stride_w = strides[1];
int pad_h = paddings[0]; int pad_h = paddings[0];
int pad_w = paddings[1]; int pad_w = paddings[2];
int size_channel_in = win * hin; int size_channel_in = win * hin;
int size_channel_out = wout * hout; int size_channel_out = wout * hout;
if (global_pooling) { if (global_pooling) {
...@@ -195,18 +198,22 @@ void pooling_basic(const float* din, ...@@ -195,18 +198,22 @@ void pooling_basic(const float* din,
int bh = kernel_h; int bh = kernel_h;
int bw = kernel_w; int bw = kernel_w;
if (ew == win) { if (ew == win) {
bw = sw + kernel_w >= win + pad_w ? win + pad_w bw = (sw + kernel_w) >= (win + paddings[3])
: sw + kernel_w; ? (win + paddings[3])
: (sw + kernel_w);
bw -= sw; bw -= sw;
if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) { if ((sw - pad_w) < 0 &&
(sw + kernel_w) > (win + paddings[3])) {
bw += pad_w; bw += pad_w;
} }
} }
if (eh == hin) { if (eh == hin) {
bh = sh + kernel_h >= hin + pad_h ? hin + pad_h bh = (sh + kernel_h) >= (hin + paddings[1])
: sh + kernel_h; ? (hin + paddings[1])
: (sh + kernel_h);
bh -= sh; bh -= sh;
if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) { if ((sh - pad_h) < 0 &&
(sh + kernel_h) > (hin + paddings[1])) {
bh += pad_h; bh += pad_h;
} }
} }
...@@ -243,7 +250,7 @@ void test_pool_fp32(const std::vector<DDim>& input_dims, ...@@ -243,7 +250,7 @@ void test_pool_fp32(const std::vector<DDim>& input_dims,
param.ksize = ksize; param.ksize = ksize;
param.strides = strides; param.strides = strides;
param.paddings = pads; param.paddings = std::make_shared<std::vector<int>>(pads);
param.ceil_mode = ceil_mode; param.ceil_mode = ceil_mode;
param.global_pooling = flag_global; param.global_pooling = flag_global;
param.pooling_type = pooling_type; param.pooling_type = pooling_type;
...@@ -399,31 +406,38 @@ TEST(TestPoolRand, test_pool_rand) { ...@@ -399,31 +406,38 @@ TEST(TestPoolRand, test_pool_rand) {
for (auto& kw : {1, 2, 3}) { for (auto& kw : {1, 2, 3}) {
for (auto& kh : {1, 2, 3}) { for (auto& kh : {1, 2, 3}) {
for (auto& stride : {1, 2}) { for (auto& stride : {1, 2}) {
for (auto& pad : {0, 1, 2}) { for (auto& pad_top : {0, 1, 2}) {
for (auto& flag_global : {false, true}) { for (auto& pad_bottom : {0, 1, 2}) {
for (auto& exclusive : {false, true}) { for (auto& pad_left : {0, 1, 2}) {
for (auto& ceil_mode : {false, true}) { for (auto& pad_right : {0, 1, 2}) {
for (auto& pooling_type : {"max", "avg"}) { for (auto& flag_global : {false, true}) {
bool adaptive = false; for (auto& exclusive : {false, true}) {
bool use_quantizer = false; for (auto& ceil_mode : {false, true}) {
std::vector<DDim> dims; for (auto& pooling_type : {"max", "avg"}) {
for (auto& batch : {1, 2}) { bool adaptive = false;
for (auto& h : {1, 2, 3, 4, 11, 19, 32, 28}) { bool use_quantizer = false;
dims.push_back(DDim({batch, cin, h, h})); std::vector<DDim> dims;
for (auto& batch : {1, 2}) {
for (auto& h : {1, 2, 3, 4, 11, 19, 32, 28}) {
dims.push_back(DDim({batch, cin, h, h}));
}
}
test_pool_fp32(
dims,
{kh, kw},
{stride, stride},
{pad_top, pad_bottom, pad_left, pad_right},
ceil_mode,
flag_global,
exclusive,
adaptive,
use_quantizer,
pooling_type,
{1, 2, 4},
{FLAGS_power_mode});
}
} }
} }
test_pool_fp32(dims,
{kh, kw},
{stride, stride},
{pad, pad},
ceil_mode,
flag_global,
exclusive,
adaptive,
use_quantizer,
pooling_type,
{1, 2, 4},
{FLAGS_power_mode});
} }
} }
} }
...@@ -443,7 +457,7 @@ TEST(TesPoolCustom, test_pool_fp32_custom_size) { ...@@ -443,7 +457,7 @@ TEST(TesPoolCustom, test_pool_fp32_custom_size) {
{DDim({FLAGS_batch, FLAGS_in_channel, FLAGS_in_height, FLAGS_in_width})}, {DDim({FLAGS_batch, FLAGS_in_channel, FLAGS_in_height, FLAGS_in_width})},
{FLAGS_kernel_h, FLAGS_kernel_w}, {FLAGS_kernel_h, FLAGS_kernel_w},
{FLAGS_stride_h, FLAGS_stride_w}, {FLAGS_stride_h, FLAGS_stride_w},
{FLAGS_pad_h, FLAGS_pad_w}, {FLAGS_pad_h, FLAGS_pad_h, FLAGS_pad_w, FLAGS_pad_w},
FLAGS_ceil_mode, FLAGS_ceil_mode,
FLAGS_flag_global, FLAGS_flag_global,
FLAGS_exclusive, FLAGS_exclusive,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册