未验证 提交 a7f7d49b 编写于 作者: H HappyAngel 提交者: GitHub

update pooling 2-padding to 4-padding (#2410)

* fix pooling bug and speed

* fix build error

* delete VLOGin pool, test=develop

* add openmp, test=develop

* fix lite/kernels/arm/pool_compute_test basic_pooling compute error bug, test=develop

* update pooling 2-pad to 4-pad, test=develop

* fix 2-pad to 4-pad in operators/pool_op.h, AttachKernel will set param, so 2-pad to 4-pad funcs should put in AttachKernel. test=ddevellop

* put 2-pad to 4-pad in AttachImpl, test=develop

* according to reviews, fix some format error. test=develop

* fix format errorr, add (). test=develop

* change paddings type to support dynamically modify, test=develop

* update padding type int other devices, test=develop

* fix x8d build error on shared_ptr, test=ddevelop

* fix formmat in operators pool_op.cc, test=develop
上级 ee7ba3ab
......@@ -46,7 +46,7 @@ void pooling_basic(const float* din,
int stride_h = strides[0];
int stride_w = strides[1];
int pad_h = paddings[0];
int pad_w = paddings[1];
int pad_w = paddings[2];
int size_channel_in = win * hin;
int size_channel_out = wout * hout;
if (global_pooling) {
......@@ -125,18 +125,22 @@ void pooling_basic(const float* din,
int bh = kernel_h;
int bw = kernel_w;
if (ew == win) {
bw = sw + kernel_w >= win + pad_w ? win + pad_w
: sw + kernel_w;
bw = (sw + kernel_w) >= (win + paddings[3])
? (win + paddings[3])
: (sw + kernel_w);
bw -= sw;
if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) {
if ((sw - pad_w) < 0 &&
(sw + kernel_w) > (win + paddings[3])) {
bw += pad_w;
}
}
if (eh == hin) {
bh = sh + kernel_h >= hin + pad_h ? hin + pad_h
: sh + kernel_h;
bh = (sh + kernel_h) >= (hin + paddings[1])
? (hin + paddings[1])
: (sh + kernel_h);
bh -= sh;
if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) {
if ((sh - pad_h) < 0 &&
(sh + kernel_h) > (hin + paddings[1])) {
bh += pad_h;
}
}
......
......@@ -45,13 +45,14 @@ class PoolingPE : public PE {
PoolingArgs args = {0};
args.mode = param_.type;
auto paddings = *param_.paddings;
args.kernel_reciprocal = fp32_2_fp16(1.0f / (k_width * k_height));
args.image.address = input->data<float16>();
args.image.channels = input->shape().channel();
args.image.height = input->shape().height();
args.image.width = input->shape().width();
args.image.pad_height = param_.paddings[0];
args.image.pad_width = param_.paddings[1];
args.image.pad_height = paddings[0];
args.image.pad_width = paddings[2];
args.image.scale_address = input->scale();
args.output.address = output->mutableData<float16>();
args.output.scale_address = output->scale();
......@@ -76,12 +77,13 @@ class PoolingPE : public PE {
float* image_addr = float_input.mutableData<float>(FP32, input->shape());
float_input.copyFrom(input);
float16* data_out = output->data<float16>();
auto paddings = *param_.paddings;
int image_height = input->shape().height();
int image_width = input->shape().width();
int image_channels = input->shape().channel();
int image_pad_h = param_.paddings[0];
int image_pad_w = param_.paddings[1];
int image_pad_h = paddings[0];
int image_pad_w = paddings[2];
int kernel_height = param_.kernelSize[1];
int kernel_width = param_.kernelSize[0];
int kernel_step_h = param_.strides[0];
......
......@@ -49,7 +49,7 @@ class Pool2dFunctor<lite::TargetType::kX86, PoolProcess, T> {
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const int padding_width = paddings[2];
const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width;
......@@ -130,7 +130,7 @@ class Pool2dGradFunctor<lite::TargetType::kX86, PoolProcess, T> {
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const int padding_width = paddings[2];
const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width;
......@@ -213,7 +213,7 @@ class MaxPool2dGradFunctor<lite::TargetType::kX86, T> {
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const int padding_width = paddings[2];
const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width;
......@@ -629,7 +629,7 @@ class MaxPool2dWithIndexFunctor<lite::TargetType::kX86, T1, T2> {
const int stride_height = strides[0];
const int stride_width = strides[1];
const int padding_height = paddings[0];
const int padding_width = paddings[1];
const int padding_width = paddings[2];
const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width;
......
......@@ -38,7 +38,7 @@ void PoolCompute::Run() {
std::vector<int>& ksize = param.ksize;
std::vector<int>& strides = param.strides;
std::vector<int>& paddings = param.paddings;
std::vector<int>& paddings = *param.paddings;
std::string& pooling_type = param.pooling_type;
bool global_pooling = param.global_pooling;
......@@ -48,12 +48,15 @@ void PoolCompute::Run() {
bool use_quantizer = param.use_quantizer;
std::string& data_format = param.data_format;
bool kps_equal = (ksize[0] == ksize[1]) && (strides[0] == strides[1]) &&
(paddings[0] == paddings[1]);
bool pads_equal =
(paddings[0] == paddings[1]) && (paddings[2] == paddings[3]);
bool kps_equal = (ksize[0] == ksize[1]) && (strides[0] == strides[1]) &&
(paddings[0] == paddings[2]);
if (global_pooling) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
paddings[2 * i] = 0;
paddings[2 * i + 1] = 0;
ksize[i] = static_cast<int>(in_dims[i + 2]);
}
if (pooling_type == "max") {
......@@ -80,7 +83,8 @@ void PoolCompute::Run() {
return;
}
} else {
if (ksize[0] == 2 && strides[0] == 2 && paddings[0] == 0 && kps_equal) {
if (ksize[0] == 2 && strides[0] == 2 && paddings[0] == 0 && pads_equal &&
kps_equal) {
if (pooling_type == "max") {
lite::arm::math::pooling2x2s2_max(din,
dout,
......@@ -106,7 +110,7 @@ void PoolCompute::Run() {
return;
}
} else if (ksize[0] == 3 && strides[0] == 1 && paddings[0] == 1 &&
kps_equal) {
pads_equal && kps_equal) {
if (pooling_type == "max") {
lite::arm::math::pooling3x3s1p1_max(din,
dout,
......@@ -132,7 +136,7 @@ void PoolCompute::Run() {
return;
}
} else if (ksize[0] == 3 && strides[0] == 1 && paddings[0] == 0 &&
kps_equal) {
pads_equal && kps_equal) {
if (pooling_type == "max") {
lite::arm::math::pooling3x3s1p0_max(din,
dout,
......@@ -158,7 +162,7 @@ void PoolCompute::Run() {
return;
}
} else if (ksize[0] == 3 && strides[0] == 2 && paddings[0] == 0 &&
kps_equal) {
pads_equal && kps_equal) {
if (pooling_type == "max") {
lite::arm::math::pooling3x3s2p0_max(din,
dout,
......@@ -184,7 +188,7 @@ void PoolCompute::Run() {
return;
}
} else if (ksize[0] == 3 && strides[0] == 2 && paddings[0] == 1 &&
kps_equal) {
pads_equal && kps_equal) {
if (pooling_type == "max") {
lite::arm::math::pooling3x3s2p1_max(din,
dout,
......
......@@ -15,6 +15,7 @@
#include "lite/kernels/arm/pool_compute.h"
#include <gtest/gtest.h>
#include <limits>
#include <memory>
#include <string>
#include <vector>
#include "lite/backends/arm/math/funcs.h"
......@@ -25,14 +26,21 @@ namespace lite {
namespace kernels {
namespace arm {
int PoolOutputSize(
int input_size, int filter_size, int padding, int stride, bool ceil_mode) {
int PoolOutputSize(int input_size,
int filter_size,
int pad_left,
int pad_right,
int stride,
bool ceil_mode) {
int output_size;
if (!ceil_mode) {
output_size = (input_size - filter_size + 2 * padding) / stride + 1;
output_size =
(input_size - filter_size + pad_left + pad_right) / stride + 1;
} else {
output_size =
(input_size - filter_size + 2 * padding + stride - 1) / stride + 1;
(input_size - filter_size + pad_left + pad_right + stride - 1) /
stride +
1;
}
return output_size;
}
......@@ -40,10 +48,12 @@ int PoolOutputSize(
std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
const auto x_dims = param_->x->dims();
std::vector<int>& ksize = param_->ksize;
auto paddings = *param_->paddings;
if (param_->global_pooling) {
ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
for (size_t i = 0; i < ksize.size(); ++i) {
param_->paddings[i] = 0;
paddings[2 * i] = 0;
paddings[2 * i + 1] = 0;
ksize[i] = static_cast<int>(x_dims[i + 2]);
}
}
......@@ -56,7 +66,8 @@ std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
for (size_t i = 0; i < param_->ksize.size(); ++i) {
output_shape.push_back(PoolOutputSize(x_dims[i + 2],
param_->ksize[i],
param_->paddings[i],
paddings[2 * i],
paddings[2 * i + 1],
param_->strides[i],
param_->ceil_mode));
}
......@@ -73,7 +84,7 @@ void pool_compute_ref(const operators::PoolParam& param) {
std::vector<int> ksize = param.ksize;
std::vector<int> strides = param.strides;
std::vector<int> paddings = param.paddings;
std::vector<int> paddings = *param.paddings;
std::string pooling_type = param.pooling_type;
bool global_pooling = param.global_pooling;
......@@ -99,7 +110,7 @@ void pool_compute_ref(const operators::PoolParam& param) {
int stride_h = strides[0];
int stride_w = strides[1];
int pad_h = paddings[0];
int pad_w = paddings[1];
int pad_w = paddings[2];
int size_channel_in = win * hin;
int size_channel_out = wout * hout;
if (global_pooling) {
......@@ -178,18 +189,22 @@ void pool_compute_ref(const operators::PoolParam& param) {
int bh = kernel_h;
int bw = kernel_w;
if (ew == win) {
bw = sw + kernel_w >= win + pad_w ? win + pad_w
: sw + kernel_w;
bw = (sw + kernel_w) >= (win + paddings[3])
? (win + paddings[3])
: (sw + kernel_w);
bw -= sw;
if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) {
if ((sw - pad_w) < 0 &&
(sw + kernel_w) > (win + paddings[3])) {
bw += pad_w;
}
}
if (eh == hin) {
bh = sh + kernel_h >= hin + pad_h ? hin + pad_h
: sh + kernel_h;
bh = (sh + kernel_h) >= (hin + paddings[1])
? (hin + paddings[1])
: (sh + kernel_h);
bh -= sh;
if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) {
if ((sh - pad_h) < 0 &&
(sh + kernel_h) > (hin + paddings[1])) {
bh += pad_h;
}
}
......@@ -225,75 +240,92 @@ TEST(pool_arm, compute) {
for (auto exclusive : {true, false}) {
for (auto ksize : {2, 3}) {
for (auto stride : {1, 2}) {
for (auto pad : {0, 1}) {
for (auto n : {1, 2}) {
for (auto c : {1, 3}) {
for (auto pad_left : {0, 1}) {
for (auto pad_right : {0, 1}) {
for (auto pad_top : {0, 1}) {
for (auto pad_bottom : {0, 1}) {
for (auto n : {1, 2}) {
for (auto c : {1, 3}) {
#if 1
for (auto h : {2, 3, 4, 11}) {
for (auto w : {2, 3, 4, 11}) {
for (auto h : {2, 3, 4, 11}) {
for (auto w : {2, 3, 4, 11}) {
#else
for (int h = 2; h < 25; h++) {
for (int w = 2; w < 25; w++) {
for (int h = 2; h < 25; h++) {
for (int w = 2; w < 25; w++) {
#endif
VLOG(3) << "n:" << n << " c:" << c << " h:" << h
<< " w:" << w << " ksize:" << ksize
<< " stride:" << stride << " pad:" << pad
<< " exclusive:" << exclusive
<< " global_pooling:" << global_pooling
<< " ceil_mode: " << ceil_mode
<< " pooling_type:" << pooling_type;
VLOG(3) << "n:" << n << " c:" << c << " h:" << h
<< " w:" << w << " ksize:" << ksize
<< " stride:" << stride
<< " pad_left:" << pad_left
<< " pad_right:" << pad_right
<< " pad_top:" << pad_top
<< " pad_bottom:" << pad_bottom
<< " exclusive:" << exclusive
<< " global_pooling:" << global_pooling
<< " ceil_mode: " << ceil_mode
<< " pooling_type:" << pooling_type;
// init x, output
x.Resize(DDim(std::vector<int64_t>({n, c, h, w})));
auto* x_data = x.mutable_data<float>();
for (int i = 0; i < x.dims().production(); ++i) {
float sign = i % 3 == 0 ? -0.03 : 0.05f;
x_data[i] = sign * (i % 128);
}
// init x, output
x.Resize(
DDim(std::vector<int64_t>({n, c, h, w})));
auto* x_data = x.mutable_data<float>();
for (int i = 0; i < x.dims().production(); ++i) {
float sign = i % 3 == 0 ? -0.03 : 0.05f;
x_data[i] = sign * (i % 128);
}
// fill param
param.x = &x;
param.output = &output;
param.pooling_type = pooling_type;
if (global_pooling) {
param.ksize = {h, w};
} else {
param.ksize = {ksize, ksize};
}
param.global_pooling = global_pooling;
param.strides = {stride, stride};
param.paddings = {pad, pad};
param.exclusive = exclusive;
param.ceil_mode = ceil_mode;
param.adaptive = false;
param.use_quantizer = false;
// fill param
param.x = &x;
param.output = &output;
param.pooling_type = pooling_type;
if (global_pooling) {
param.ksize = {h, w};
} else {
param.ksize = {ksize, ksize};
}
param.global_pooling = global_pooling;
param.strides = {stride, stride};
std::vector<int> paddings = {
pad_top, pad_bottom, pad_left, pad_right};
param.exclusive = exclusive;
param.paddings =
std::make_shared<std::vector<int>>(paddings);
param.ceil_mode = ceil_mode;
param.adaptive = false;
param.use_quantizer = false;
const std::vector<int64_t>& output_shape =
compute_output_shape(&param);
output.Resize(DDim(output_shape));
output_ref.Resize(DDim(output_shape));
const std::vector<int64_t>& output_shape =
compute_output_shape(&param);
output.Resize(DDim(output_shape));
output_ref.Resize(DDim(output_shape));
auto* output_data = output.mutable_data<float>();
auto* output_ref_data =
output_ref.mutable_data<float>();
for (int i = 0; i < output.dims().production(); ++i) {
output_data[i] = -2;
output_ref_data[i] = -2;
}
auto* output_data = output.mutable_data<float>();
auto* output_ref_data =
output_ref.mutable_data<float>();
for (int i = 0; i < output.dims().production();
++i) {
output_data[i] = -2;
output_ref_data[i] = -2;
}
// compute
pool.SetParam(param);
pool.Run();
// compute
pool.SetParam(param);
pool.Run();
// compute ref
param.output = &output_ref;
pool_compute_ref(param);
// compute ref
param.output = &output_ref;
pool_compute_ref(param);
// compare
for (int i = 0; i < output.dims().production(); i++) {
EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-4);
// compare
for (int i = 0; i < output.dims().production();
i++) {
EXPECT_NEAR(
output_data[i], output_ref_data[i], 1e-4);
}
VLOG(3) << "compare pass";
}
}
}
VLOG(3) << "compare pass";
}
}
}
......
......@@ -256,6 +256,7 @@ void PoolCompute::Run() {
bool adaptive = param.adaptive;
auto x_dims = param.x->dims();
auto out_dims = param.output->dims();
auto paddings = *param.paddings;
const int in_h = x_dims[2];
const int in_w = x_dims[3];
const int out_h = out_dims[2];
......@@ -266,8 +267,8 @@ void PoolCompute::Run() {
const int win_w = param.ksize[1];
const int stride_h = param.strides[0];
const int stride_w = param.strides[1];
const int pad_h = param.paddings[0];
const int pad_w = param.paddings[1];
const int pad_h = paddings[0];
const int pad_w = paddings[2];
const int total_threads = out_dims.production();
const int threads = 512;
const int blocks = (total_threads + threads - 1) / threads;
......
......@@ -27,14 +27,21 @@ namespace cuda {
using Tensor = lite::Tensor;
using DDim = lite::DDim;
static int PoolOutputSize(
int input_size, int filter_size, int padding, int stride, bool ceil_mode) {
static int PoolOutputSize(int input_size,
int filter_size,
int pad_left,
int pad_right,
int stride,
bool ceil_mode) {
int output_size;
if (!ceil_mode) {
output_size = (input_size - filter_size + 2 * padding) / stride + 1;
output_size =
(input_size - filter_size + pad_left + pad_right) / stride + 1;
} else {
output_size =
(input_size - filter_size + 2 * padding + stride - 1) / stride + 1;
(input_size - filter_size + pad_left + pad_right + stride - 1) /
stride +
1;
}
return output_size;
}
......@@ -44,8 +51,10 @@ static std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
std::vector<int>& ksize = param_->ksize;
if (param_->global_pooling) {
ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
auto paddings = *param_->paddings;
for (size_t i = 0; i < ksize.size(); ++i) {
param_->paddings[i] = 0;
paddings[2 * i] = 0;
paddings[2 * i + 1] = 0;
ksize[i] = static_cast<int>(x_dims[i + 2]);
}
}
......@@ -58,7 +67,8 @@ static std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
for (size_t i = 0; i < param_->ksize.size(); ++i) {
output_shape.push_back(PoolOutputSize(x_dims[i + 2],
param_->ksize[i],
param_->paddings[i],
paddings[2 * i],
paddings[2 * i + 1],
param_->strides[i],
param_->ceil_mode));
}
......@@ -75,7 +85,7 @@ static void pool_compute_ref(const operators::PoolParam& param) {
std::vector<int> ksize = param.ksize;
std::vector<int> strides = param.strides;
std::vector<int> paddings = param.paddings;
std::vector<int> paddings = *param.paddings;
std::string pooling_type = param.pooling_type;
bool global_pooling = param.global_pooling;
......@@ -99,7 +109,7 @@ static void pool_compute_ref(const operators::PoolParam& param) {
int stride_h = strides[0];
int stride_w = strides[1];
int pad_h = paddings[0];
int pad_w = paddings[1];
int pad_w = paddings[2];
if (global_pooling == true) {
for (int n = 0; n < in_n; ++n) {
......@@ -226,7 +236,9 @@ TEST(pool_cuda, compute) {
}
param.global_pooling = global_pooling;
param.strides = {stride, stride};
param.paddings = {pad, pad};
std::vector<int> paddings = {pad, pad, pad, pad};
param.paddings =
std::make_shared<std::vector<int>>(paddings);
param.exclusive = exclusive;
param.ceil_mode = ceil_mode;
param.adaptive = false;
......
......@@ -48,8 +48,13 @@ node_map_type PoolConverter(const std::shared_ptr<lite::OpLite> pool_op,
auto npu_window = ge::AttrValue::LIST_INT(ksize.begin(), ksize.end());
auto padding = op_info->GetAttr<std::vector<int>>("paddings");
bool pads_equal = (padding[0] == padding[1]) && (padding[2] == padding[3]);
if (!pads_equal) {
LOG(FATAL)
<< "padding requires pad_left == pad_right, pad_top == pad_bottom";
}
auto npu_pad =
ge::AttrValue::LIST_INT{padding[0], padding[0], padding[1], padding[1]};
ge::AttrValue::LIST_INT{padding[0], padding[1], padding[2], padding[3]};
auto strides = op_info->GetAttr<std::vector<int>>("strides");
auto npu_stride = ge::AttrValue::LIST_INT(strides.begin(), strides.end());
int npu_ceil_mode = 0;
......
......@@ -61,7 +61,7 @@ void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) {
int stride_h = strides[0];
int stride_w = strides[1];
int pad_h = paddings[0];
int pad_w = paddings[1];
int pad_w = paddings[2];
if (global_pooling == true) {
for (int n = 0; n < in_n; ++n) {
......@@ -163,7 +163,8 @@ void test_pool(int bs,
opdesc.SetAttr("global_pooling", global_pooling);
opdesc.SetAttr("exclusive", exclusive);
opdesc.SetAttr("strides", std::vector<int>({stride, stride}));
opdesc.SetAttr("paddings", std::vector<int>({padding, padding}));
opdesc.SetAttr("paddings",
std::vector<int>({padding, padding, padding, padding}));
// create and convert op to NPU model, then run it on NPU
auto op = CreateOp<operators::PoolOpLite>(opdesc, &scope);
......
......@@ -44,16 +44,22 @@ class PoolCompute
const auto& out_dims = param.output->dims();
const std::string pooling_type = param.pooling_type;
const bool global_pooling = param.global_pooling;
std::vector<int> paddings = param.paddings;
std::vector<int> paddings = *param.paddings;
std::vector<int> strides = param.strides;
std::vector<int> ksize = param.ksize;
if (global_pooling) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
paddings[2 * i] = 0;
paddings[2 * i + 1] = 0;
ksize[i] = static_cast<int>(in_dims[i + 2]);
}
}
bool pads_equal =
(paddings[0] == paddings[1]) && (paddings[2] == paddings[3]);
if (!pads_equal) {
LOG(FATAL)
<< "padding requires pad_left == pad_right, pad_top == pad_bottom";
}
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
auto* input_buf = param.x->data<float, cl::Buffer>();
......@@ -89,7 +95,7 @@ class PoolCompute
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[0]));
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[1]));
status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[2]));
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *output_buf);
CL_CHECK_FATAL(status);
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <gtest/gtest.h>
#include <memory>
#include <random>
#include "lite/backends/opencl/target_wrapper.h"
#include "lite/core/op_registry.h"
......@@ -88,9 +89,10 @@ TEST(pool2d, compute) {
param.output = &out;
param.global_pooling = true;
param.pooling_type = "avg";
param.paddings = std::vector<int>{0, 0};
std::vector<int> paddings = {0, 0, 0, 0};
param.strides = std::vector<int>{1, 1};
param.ksize = std::vector<int>{7, 7};
param.paddings = std::make_shared<std::vector<int>>(paddings);
std::unique_ptr<KernelContext> context(new KernelContext);
context->As<OpenCLContext>().InitOnce();
......
......@@ -35,7 +35,6 @@ class PoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto& param = *param_.get_mutable<param_t>();
if (param.global_pooling) {
for (size_t i = 0; i < param.ksize.size(); ++i) {
param.paddings[i] = 0;
param.ksize[i] = static_cast<int>(param.x->dims()[i + 2]);
}
}
......@@ -52,7 +51,7 @@ class PoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
param.x,
param.ksize,
param.strides,
param.paddings,
*param.paddings,
pool_process,
true,
false,
......@@ -68,7 +67,7 @@ class PoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
param.x,
param.ksize,
param.strides,
param.paddings,
*param.paddings,
pool_process,
param.exclusive,
param.adaptive,
......
......@@ -60,7 +60,8 @@ TEST(pool2d_x86, run_test) {
param.x = &x;
param.output = &out;
param.strides = {2, 2};
param.paddings = {0, 0};
std::vector<int> paddings = {0, 0, 0, 0};
param.paddings = std::make_shared<std::vector<int>>(paddings);
param.ksize = {2, 2};
param.pooling_type = "max";
std::unique_ptr<KernelContext> ctx(new KernelContext);
......
......@@ -60,7 +60,7 @@ void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) {
int stride_h = strides[0];
int stride_w = strides[1];
int pad_h = paddings[0];
int pad_w = paddings[1];
int pad_w = paddings[2];
if (global_pooling == true) {
for (int n = 0; n < in_n; ++n) {
......@@ -162,7 +162,8 @@ void test_pool(int bs,
opdesc.SetAttr("global_pooling", global_pooling);
opdesc.SetAttr("exclusive", exclusive);
opdesc.SetAttr("strides", std::vector<int>({stride, stride}));
opdesc.SetAttr("paddings", std::vector<int>({padding, padding}));
opdesc.SetAttr("paddings",
std::vector<int>({padding, padding, padding, padding}));
opdesc.SetAttr("ceil_mode", ceil_mode);
// create and convert op to XPU model, then run it on XPU
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <utility>
#include <vector>
......@@ -302,7 +303,12 @@ struct PoolParam {
bool global_pooling{
false}; // if true, knernel size and paddings will be ignored
std::vector<int> strides{1, 1};
std::vector<int> paddings{0, 0};
/* paddings type change
* from std::vector<int> to std::shared_ptr<std::vector<int>>
* to support dynamically modify padding
* let kernel param and operator param Synchronous update
*/
std::shared_ptr<std::vector<int>> paddings;
bool exclusive{true};
bool adaptive{false};
bool ceil_mode{false};
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "lite/operators/pool_op.h"
#include <algorithm>
#include "lite/core/op_registry.h"
namespace paddle {
......@@ -26,7 +27,7 @@ bool PoolOpLite::CheckShape() const {
const auto& x_dims = param_.x->dims();
const auto& ksize = param_.ksize;
const auto& strides = param_.strides;
const auto& paddings = param_.paddings;
const auto& paddings = *param_.paddings;
// "Pooling intput should be 4-D or 5-D tensor."
CHECK_OR_FALSE(x_dims.size() == 4 || x_dims.size() == 5);
......@@ -34,20 +35,60 @@ bool PoolOpLite::CheckShape() const {
CHECK_OR_FALSE(x_dims.size() - ksize.size() == 2U);
// Strides size and pooling size should be the same.
CHECK_OR_FALSE(ksize.size() == strides.size());
// Paddings size and pooling size should be the same.
CHECK_OR_FALSE(ksize.size() == paddings.size());
// Paddings size must be 4.
CHECK_OR_FALSE(paddings.size() == 4L);
return true;
}
int PoolOutputSize(
int input_size, int filter_size, int padding, int stride, bool ceil_mode) {
inline void UpdatePadding(std::vector<int>* paddings,
const bool global_pooling,
const bool adaptive,
const std::string padding_algorithm,
const lite::DDim data_dims,
const std::vector<int>& strides,
const std::vector<int>& ksize) {
// when padding_algorithm is "VALID" or "SAME"
if (padding_algorithm == "SAME") {
for (int i = 0; i < strides.size(); ++i) {
int out_size = (data_dims[i + 2] + strides[i] - 1) / strides[i];
int pad_sum =
std::max((out_size - 1) * strides[i] + ksize[i] - data_dims[i + 2],
(int64_t)0);
int pad_0 = pad_sum / 2;
int pad_1 = pad_sum - pad_0;
*(paddings->begin() + i * 2) = pad_0;
*(paddings->begin() + i * 2 + 1) = pad_1;
}
} else if (padding_algorithm == "VALID") {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
// if global_pooling == true or adaptive == true, padding will be ignore
if (global_pooling || adaptive) {
for (auto it = paddings->begin(); it != paddings->end(); it++) {
*it = 0;
}
}
}
int PoolOutputSize(int input_size,
int filter_size,
int pad_left,
int pad_right,
int stride,
bool ceil_mode) {
int output_size;
if (!ceil_mode) {
output_size = (input_size - filter_size + 2 * padding) / stride + 1;
output_size =
(input_size - filter_size + pad_left + pad_right) / stride + 1;
} else {
output_size =
(input_size - filter_size + 2 * padding + stride - 1) / stride + 1;
(input_size - filter_size + pad_left + pad_right + stride - 1) /
stride +
1;
}
return output_size;
}
......@@ -55,14 +96,21 @@ int PoolOutputSize(
bool PoolOpLite::InferShape() const {
const auto x_dims = param_.x->dims();
std::vector<int>& ksize = param_.ksize;
// dynamic update 4-pad
UpdatePadding(param_.paddings.get(),
param_.global_pooling,
param_.adaptive,
padding_algorithm_,
x_dims,
param_.strides,
ksize);
if (param_.global_pooling) {
ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
for (size_t i = 0; i < ksize.size(); ++i) {
param_.paddings[i] = 0;
ksize[i] = static_cast<int>(x_dims[i + 2]);
}
}
auto paddings = *param_.paddings;
std::vector<int64_t> output_shape({x_dims[0], x_dims[1]});
if (param_.adaptive) {
output_shape.insert(
......@@ -71,15 +119,14 @@ bool PoolOpLite::InferShape() const {
for (size_t i = 0; i < param_.ksize.size(); ++i) {
output_shape.push_back(PoolOutputSize(x_dims[i + 2],
param_.ksize[i],
param_.paddings[i],
paddings[2 * i],
paddings[2 * i + 1],
param_.strides[i],
param_.ceil_mode));
}
}
param_.output->Resize(lite::DDim(output_shape));
// ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
// ctx->ShareLoD("X", "Out");
return true;
}
......
......@@ -14,6 +14,7 @@
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "lite/core/kernel.h"
......@@ -51,7 +52,7 @@ class PoolOpLite : public OpLite {
param_.ksize = op_desc.GetAttr<std::vector<int>>("ksize");
param_.global_pooling = op_desc.GetAttr<bool>("global_pooling");
param_.strides = op_desc.GetAttr<std::vector<int>>("strides");
param_.paddings = op_desc.GetAttr<std::vector<int>>("paddings");
auto paddings = op_desc.GetAttr<std::vector<int>>("paddings");
if (op_desc.HasAttr("exclusive")) {
param_.exclusive = op_desc.GetAttr<bool>("exclusive");
......@@ -65,7 +66,23 @@ class PoolOpLite : public OpLite {
if (op_desc.HasAttr("use_quantizer")) {
param_.use_quantizer = op_desc.GetAttr<bool>("use_quantizer");
}
// param_.data_format = op_desc.GetAttr<bool>("data_format");
if (op_desc.HasAttr("padding_algorithm")) {
padding_algorithm_ = op_desc.GetAttr<std::string>("padding_algorithm");
}
// 2-pad to 4-pad
if (paddings.size() == 2L) {
for (size_t i = 0; i < 2L; ++i) {
int copy_pad = *(paddings.begin() + 2 * i);
paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
}
} else {
if (paddings.size() != 4L) {
LOG(FATAL)
<< "Paddings size should be the same or twice as the inputs size.";
}
}
param_.paddings = std::make_shared<std::vector<int>>(paddings);
return true;
}
......@@ -75,6 +92,7 @@ class PoolOpLite : public OpLite {
private:
mutable PoolParam param_;
std::string padding_algorithm_{""};
};
} // namespace operators
......
......@@ -69,8 +69,7 @@ DDim compute_out_dim(const DDim& dim_in,
auto kernel_w = param.ksize[1];
auto h = dim_in[2];
auto w = dim_in[3];
int pad_h = param.paddings[0];
int pad_w = param.paddings[1];
auto paddings = *param.paddings;
int stride_h = param.strides[0];
int stride_w = param.strides[1];
bool ceil_mode = param.ceil_mode;
......@@ -79,11 +78,15 @@ DDim compute_out_dim(const DDim& dim_in,
int wout = 1;
if (!flag_global) {
if (!ceil_mode) {
hout = (h - kernel_h + 2 * pad_h) / stride_h + 1;
wout = (w - kernel_w + 2 * pad_w) / stride_w + 1;
hout = (h - kernel_h + paddings[0] + paddings[1]) / stride_h + 1;
wout = (w - kernel_w + paddings[2] + paddings[3]) / stride_w + 1;
} else {
hout = (h - kernel_h + 2 * pad_h + stride_h - 1) / stride_h + 1;
wout = (w - kernel_w + 2 * pad_w + stride_w - 1) / stride_w + 1;
hout =
(h - kernel_h + paddings[0] + paddings[1] + stride_h - 1) / stride_h +
1;
wout =
(w - kernel_w + paddings[2] + paddings[3] + stride_w - 1) / stride_w +
1;
}
}
dim_out[2] = hout;
......@@ -116,7 +119,7 @@ void pooling_basic(const float* din,
int stride_h = strides[0];
int stride_w = strides[1];
int pad_h = paddings[0];
int pad_w = paddings[1];
int pad_w = paddings[2];
int size_channel_in = win * hin;
int size_channel_out = wout * hout;
if (global_pooling) {
......@@ -195,18 +198,22 @@ void pooling_basic(const float* din,
int bh = kernel_h;
int bw = kernel_w;
if (ew == win) {
bw = sw + kernel_w >= win + pad_w ? win + pad_w
: sw + kernel_w;
bw = (sw + kernel_w) >= (win + paddings[3])
? (win + paddings[3])
: (sw + kernel_w);
bw -= sw;
if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) {
if ((sw - pad_w) < 0 &&
(sw + kernel_w) > (win + paddings[3])) {
bw += pad_w;
}
}
if (eh == hin) {
bh = sh + kernel_h >= hin + pad_h ? hin + pad_h
: sh + kernel_h;
bh = (sh + kernel_h) >= (hin + paddings[1])
? (hin + paddings[1])
: (sh + kernel_h);
bh -= sh;
if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) {
if ((sh - pad_h) < 0 &&
(sh + kernel_h) > (hin + paddings[1])) {
bh += pad_h;
}
}
......@@ -243,7 +250,7 @@ void test_pool_fp32(const std::vector<DDim>& input_dims,
param.ksize = ksize;
param.strides = strides;
param.paddings = pads;
param.paddings = std::make_shared<std::vector<int>>(pads);
param.ceil_mode = ceil_mode;
param.global_pooling = flag_global;
param.pooling_type = pooling_type;
......@@ -399,31 +406,38 @@ TEST(TestPoolRand, test_pool_rand) {
for (auto& kw : {1, 2, 3}) {
for (auto& kh : {1, 2, 3}) {
for (auto& stride : {1, 2}) {
for (auto& pad : {0, 1, 2}) {
for (auto& flag_global : {false, true}) {
for (auto& exclusive : {false, true}) {
for (auto& ceil_mode : {false, true}) {
for (auto& pooling_type : {"max", "avg"}) {
bool adaptive = false;
bool use_quantizer = false;
std::vector<DDim> dims;
for (auto& batch : {1, 2}) {
for (auto& h : {1, 2, 3, 4, 11, 19, 32, 28}) {
dims.push_back(DDim({batch, cin, h, h}));
for (auto& pad_top : {0, 1, 2}) {
for (auto& pad_bottom : {0, 1, 2}) {
for (auto& pad_left : {0, 1, 2}) {
for (auto& pad_right : {0, 1, 2}) {
for (auto& flag_global : {false, true}) {
for (auto& exclusive : {false, true}) {
for (auto& ceil_mode : {false, true}) {
for (auto& pooling_type : {"max", "avg"}) {
bool adaptive = false;
bool use_quantizer = false;
std::vector<DDim> dims;
for (auto& batch : {1, 2}) {
for (auto& h : {1, 2, 3, 4, 11, 19, 32, 28}) {
dims.push_back(DDim({batch, cin, h, h}));
}
}
test_pool_fp32(
dims,
{kh, kw},
{stride, stride},
{pad_top, pad_bottom, pad_left, pad_right},
ceil_mode,
flag_global,
exclusive,
adaptive,
use_quantizer,
pooling_type,
{1, 2, 4},
{FLAGS_power_mode});
}
}
}
test_pool_fp32(dims,
{kh, kw},
{stride, stride},
{pad, pad},
ceil_mode,
flag_global,
exclusive,
adaptive,
use_quantizer,
pooling_type,
{1, 2, 4},
{FLAGS_power_mode});
}
}
}
......@@ -443,7 +457,7 @@ TEST(TesPoolCustom, test_pool_fp32_custom_size) {
{DDim({FLAGS_batch, FLAGS_in_channel, FLAGS_in_height, FLAGS_in_width})},
{FLAGS_kernel_h, FLAGS_kernel_w},
{FLAGS_stride_h, FLAGS_stride_w},
{FLAGS_pad_h, FLAGS_pad_w},
{FLAGS_pad_h, FLAGS_pad_h, FLAGS_pad_w, FLAGS_pad_w},
FLAGS_ceil_mode,
FLAGS_flag_global,
FLAGS_exclusive,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册