update pooling 2-pad to 4-pad, test=develop

0516e18d · chenjiaoAngel · c9d607e1 · 0516e18d · 0516e18d · 0516e18d
16 changed file
--- a/lite/backends/arm/math/pooling.cc
+++ b/lite/backends/arm/math/pooling.cc
@@ -46,7 +46,7 @@ void pooling_basic(const float* din,
  int stride_h = strides[0];
  int stride_w = strides[1];
  int pad_h = paddings[0];
-  int pad_w = paddings[1];
+  int pad_w = paddings[2];
  int size_channel_in = win * hin;
  int size_channel_out = wout * hout;
  if (global_pooling) {
@@ -125,18 +125,18 @@ void pooling_basic(const float* din,
                int bh = kernel_h;
                int bw = kernel_w;
                if (ew == win) {
-                  bw = sw + kernel_w >= win + pad_w ? win + pad_w
+                  bw = sw + kernel_w >= win + paddings[3] ? win + paddings[3]
-                                                    : sw + kernel_w;
+                                                          : sw + kernel_w;
                  bw -= sw;
-                  if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) {
+                  if (sw - pad_w < 0 && sw + kernel_w > win + paddings[3]) {
                    bw += pad_w;
                  }
                }
                if (eh == hin) {
-                  bh = sh + kernel_h >= hin + pad_h ? hin + pad_h
+                  bh = sh + kernel_h >= hin + paddings[1] ? hin + paddings[1]
-                                                    : sh + kernel_h;
+                                                          : sh + kernel_h;
                  bh -= sh;
-                  if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) {
+                  if (sh - pad_h < 0 && sh + kernel_h > hin + paddings[1]) {
                    bh += pad_h;
                  }
                }

--- a/lite/backends/fpga/KD/pes/pooling_pe.hpp
+++ b/lite/backends/fpga/KD/pes/pooling_pe.hpp
@@ -51,7 +51,7 @@ class PoolingPE : public PE {
    args.image.height = input->shape().height();
    args.image.width = input->shape().width();
    args.image.pad_height = param_.paddings[0];
-    args.image.pad_width = param_.paddings[1];
+    args.image.pad_width = param_.paddings[2];
    args.image.scale_address = input->scale();
    args.output.address = output->mutableData<float16>();
    args.output.scale_address = output->scale();
@@ -81,7 +81,7 @@ class PoolingPE : public PE {
    int image_width = input->shape().width();
    int image_channels = input->shape().channel();
    int image_pad_h = param_.paddings[0];
-    int image_pad_w = param_.paddings[1];
+    int image_pad_w = param_.paddings[2];
    int kernel_height = param_.kernelSize[1];
    int kernel_width = param_.kernelSize[0];
    int kernel_step_h = param_.strides[0];

--- a/lite/backends/x86/math/pooling.cc
+++ b/lite/backends/x86/math/pooling.cc
@@ -49,7 +49,7 @@ class Pool2dFunctor<lite::TargetType::kX86, PoolProcess, T> {
    const int stride_height = strides[0];
    const int stride_width = strides[1];
    const int padding_height = paddings[0];
-    const int padding_width = paddings[1];
+    const int padding_width = paddings[2];
    const int input_stride = input_height * input_width;
    const int output_stride = output_height * output_width;
@@ -130,7 +130,7 @@ class Pool2dGradFunctor<lite::TargetType::kX86, PoolProcess, T> {
    const int stride_height = strides[0];
    const int stride_width = strides[1];
    const int padding_height = paddings[0];
-    const int padding_width = paddings[1];
+    const int padding_width = paddings[2];
    const int input_stride = input_height * input_width;
    const int output_stride = output_height * output_width;
@@ -213,7 +213,7 @@ class MaxPool2dGradFunctor<lite::TargetType::kX86, T> {
    const int stride_height = strides[0];
    const int stride_width = strides[1];
    const int padding_height = paddings[0];
-    const int padding_width = paddings[1];
+    const int padding_width = paddings[2];
    const int input_stride = input_height * input_width;
    const int output_stride = output_height * output_width;
@@ -629,7 +629,7 @@ class MaxPool2dWithIndexFunctor<lite::TargetType::kX86, T1, T2> {
    const int stride_height = strides[0];
    const int stride_width = strides[1];
    const int padding_height = paddings[0];
-    const int padding_width = paddings[1];
+    const int padding_width = paddings[2];
    const int input_stride = input_height * input_width;
    const int output_stride = output_height * output_width;

--- a/lite/kernels/arm/pool_compute.cc
+++ b/lite/kernels/arm/pool_compute.cc
@@ -48,12 +48,14 @@ void PoolCompute::Run() {
  bool use_quantizer = param.use_quantizer;
  std::string& data_format = param.data_format;
+  bool pads_equal =
+      (paddings[0] == paddings[1]) && (paddings[2] == paddings[3]);
  bool kps_equal = (ksize[0] == ksize[1]) && (strides[0] == strides[1]) &&
-                   (paddings[0] == paddings[1]);
+                   (paddings[0] == paddings[2]);
  if (global_pooling) {
    for (size_t i = 0; i < ksize.size(); ++i) {
-      paddings[i] = 0;
      ksize[i] = static_cast<int>(in_dims[i + 2]);
    }
    if (pooling_type == "max") {
@@ -80,7 +82,8 @@ void PoolCompute::Run() {
      return;
    }
  } else {
-    if (ksize[0] == 2 && strides[0] == 2 && paddings[0] == 0 && kps_equal) {
+    if (ksize[0] == 2 && strides[0] == 2 && paddings[0] == 0 && pads_equal &&
+        kps_equal) {
      if (pooling_type == "max") {
        lite::arm::math::pooling2x2s2_max(din,
                                          dout,
@@ -106,7 +109,7 @@ void PoolCompute::Run() {
        return;
      }
    } else if (ksize[0] == 3 && strides[0] == 1 && paddings[0] == 1 &&
-               kps_equal) {
+               pads_equal && kps_equal) {
      if (pooling_type == "max") {
        lite::arm::math::pooling3x3s1p1_max(din,
                                            dout,
@@ -132,7 +135,7 @@ void PoolCompute::Run() {
        return;
      }
    } else if (ksize[0] == 3 && strides[0] == 1 && paddings[0] == 0 &&
-               kps_equal) {
+               pads_equal && kps_equal) {
      if (pooling_type == "max") {
        lite::arm::math::pooling3x3s1p0_max(din,
                                            dout,
@@ -158,7 +161,7 @@ void PoolCompute::Run() {
        return;
      }
    } else if (ksize[0] == 3 && strides[0] == 2 && paddings[0] == 0 &&
-               kps_equal) {
+               pads_equal && kps_equal) {
      if (pooling_type == "max") {
        lite::arm::math::pooling3x3s2p0_max(din,
                                            dout,
@@ -184,7 +187,7 @@ void PoolCompute::Run() {
        return;
      }
    } else if (ksize[0] == 3 && strides[0] == 2 && paddings[0] == 1 &&
-               kps_equal) {
+               pads_equal && kps_equal) {
      if (pooling_type == "max") {
        lite::arm::math::pooling3x3s2p1_max(din,
                                            dout,

--- a/lite/kernels/arm/pool_compute_test.cc
+++ b/lite/kernels/arm/pool_compute_test.cc
@@ -25,14 +25,21 @@ namespace lite {
 namespace kernels {
 namespace arm {
-int PoolOutputSize(
+int PoolOutputSize(int input_size,
-    int input_size, int filter_size, int padding, int stride, bool ceil_mode) {
+                   int filter_size,
+                   int pad_left,
+                   int pad_right,
+                   int stride,
+                   bool ceil_mode) {
  int output_size;
  if (!ceil_mode) {
-    output_size = (input_size - filter_size + 2 * padding) / stride + 1;
+    output_size =
+        (input_size - filter_size + pad_left + pad_right) / stride + 1;
  } else {
    output_size =
-        (input_size - filter_size + 2 * padding + stride - 1) / stride + 1;
+        (input_size - filter_size + pad_left + pad_right + stride - 1) /
+            stride +
+        1;
  }
  return output_size;
 }
@@ -43,7 +50,8 @@ std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
  if (param_->global_pooling) {
    ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
    for (size_t i = 0; i < ksize.size(); ++i) {
-      param_->paddings[i] = 0;
+      param_->paddings[2 * i] = 0;
+      param_->paddings[2 * i + 1] = 0;
      ksize[i] = static_cast<int>(x_dims[i + 2]);
    }
  }
@@ -56,7 +64,8 @@ std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
    for (size_t i = 0; i < param_->ksize.size(); ++i) {
      output_shape.push_back(PoolOutputSize(x_dims[i + 2],
                                            param_->ksize[i],
-                                            param_->paddings[i],
+                                            param_->paddings[2 * i],
+                                            param_->paddings[2 * i + 1],
                                            param_->strides[i],
                                            param_->ceil_mode));
    }
@@ -99,7 +108,7 @@ void pool_compute_ref(const operators::PoolParam& param) {
  int stride_h = strides[0];
  int stride_w = strides[1];
  int pad_h = paddings[0];
-  int pad_w = paddings[1];
+  int pad_w = paddings[2];
  int size_channel_in = win * hin;
  int size_channel_out = wout * hout;
  if (global_pooling) {
@@ -178,18 +187,18 @@ void pool_compute_ref(const operators::PoolParam& param) {
                int bh = kernel_h;
                int bw = kernel_w;
                if (ew == win) {
-                  bw = sw + kernel_w >= win + pad_w ? win + pad_w
+                  bw = sw + kernel_w >= win + paddings[3] ? win + paddings[3]
-                                                    : sw + kernel_w;
+                                                          : sw + kernel_w;
                  bw -= sw;
-                  if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) {
+                  if (sw - pad_w < 0 && sw + kernel_w > win + paddings[3]) {
                    bw += pad_w;
                  }
                }
                if (eh == hin) {
-                  bh = sh + kernel_h >= hin + pad_h ? hin + pad_h
+                  bh = sh + kernel_h >= hin + paddings[1] ? hin + paddings[1]
-                                                    : sh + kernel_h;
+                                                          : sh + kernel_h;
                  bh -= sh;
-                  if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) {
+                  if (sh - pad_h < 0 && sh + kernel_h > hin + paddings[1]) {
                    bh += pad_h;
                  }
                }
@@ -262,7 +271,7 @@ TEST(pool_arm, compute) {
                        }
                        param.global_pooling = global_pooling;
                        param.strides = {stride, stride};
-                        param.paddings = {pad, pad};
+                        param.paddings = {pad, pad, pad, pad};
                        param.exclusive = exclusive;
                        param.ceil_mode = ceil_mode;
                        param.adaptive = false;

--- a/lite/kernels/cuda/pool_compute.cu
+++ b/lite/kernels/cuda/pool_compute.cu
@@ -267,7 +267,7 @@ void PoolCompute::Run() {
  const int stride_h = param.strides[0];
  const int stride_w = param.strides[1];
  const int pad_h = param.paddings[0];
-  const int pad_w = param.paddings[1];
+  const int pad_w = param.paddings[2];
  const int total_threads = out_dims.production();
  const int threads = 512;
  const int blocks = (total_threads + threads - 1) / threads;

--- a/lite/kernels/cuda/pool_compute_test.cc
+++ b/lite/kernels/cuda/pool_compute_test.cc
@@ -27,14 +27,21 @@ namespace cuda {
 using Tensor = lite::Tensor;
 using DDim = lite::DDim;
-static int PoolOutputSize(
+static int PoolOutputSize(int input_size,
-    int input_size, int filter_size, int padding, int stride, bool ceil_mode) {
+                          int filter_size,
+                          int pad_left,
+                          int pad_right,
+                          int stride,
+                          bool ceil_mode) {
  int output_size;
  if (!ceil_mode) {
-    output_size = (input_size - filter_size + 2 * padding) / stride + 1;
+    output_size =
+        (input_size - filter_size + pad_left + pad_right) / stride + 1;
  } else {
    output_size =
-        (input_size - filter_size + 2 * padding + stride - 1) / stride + 1;
+        (input_size - filter_size + pad_left + pad_right + stride - 1) /
+            stride +
+        1;
  }
  return output_size;
 }
@@ -45,7 +52,8 @@ static std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
  if (param_->global_pooling) {
    ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
    for (size_t i = 0; i < ksize.size(); ++i) {
-      param_->paddings[i] = 0;
+      param_->paddings[2 * i] = 0;
+      param_->paddings[2 * i + 1] = 0;
      ksize[i] = static_cast<int>(x_dims[i + 2]);
    }
  }
@@ -58,7 +66,8 @@ static std::vector<int64_t> compute_output_shape(operators::PoolParam* param_) {
    for (size_t i = 0; i < param_->ksize.size(); ++i) {
      output_shape.push_back(PoolOutputSize(x_dims[i + 2],
                                            param_->ksize[i],
-                                            param_->paddings[i],
+                                            param_->paddings[2 * i],
+                                            param_->paddings[2 * i + 1],
                                            param_->strides[i],
                                            param_->ceil_mode));
    }
@@ -99,7 +108,7 @@ static void pool_compute_ref(const operators::PoolParam& param) {
  int stride_h = strides[0];
  int stride_w = strides[1];
  int pad_h = paddings[0];
-  int pad_w = paddings[1];
+  int pad_w = paddings[2];
  if (global_pooling == true) {
    for (int n = 0; n < in_n; ++n) {
@@ -226,7 +235,7 @@ TEST(pool_cuda, compute) {
                        }
                        param.global_pooling = global_pooling;
                        param.strides = {stride, stride};
-                        param.paddings = {pad, pad};
+                        param.paddings = {pad, pad, pad, pad};
                        param.exclusive = exclusive;
                        param.ceil_mode = ceil_mode;
                        param.adaptive = false;

--- a/lite/kernels/npu/bridges/pool_op.cc
+++ b/lite/kernels/npu/bridges/pool_op.cc
@@ -48,8 +48,13 @@ node_map_type PoolConverter(const std::shared_ptr<lite::OpLite> pool_op,
  auto npu_window = ge::AttrValue::LIST_INT(ksize.begin(), ksize.end());
  auto padding = op_info->GetAttr<std::vector<int>>("paddings");
+  bool pads_equal = (padding[0] == padding[1]) && (padding[2] == padding[3]);
+  if (!pads_equal) {
+    LOG(FATAL)
+        << "padding requires pad_left == pad_right, pad_top == pad_bottom";
+  }
  auto npu_pad =
-      ge::AttrValue::LIST_INT{padding[0], padding[0], padding[1], padding[1]};
+      ge::AttrValue::LIST_INT{padding[0], padding[1], padding[2], padding[3]};
  auto strides = op_info->GetAttr<std::vector<int>>("strides");
  auto npu_stride = ge::AttrValue::LIST_INT(strides.begin(), strides.end());
  int npu_ceil_mode = 0;

--- a/lite/kernels/npu/bridges/pool_op_test.cc
+++ b/lite/kernels/npu/bridges/pool_op_test.cc
@@ -61,7 +61,7 @@ void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) {
  int stride_h = strides[0];
  int stride_w = strides[1];
  int pad_h = paddings[0];
-  int pad_w = paddings[1];
+  int pad_w = paddings[2];
  if (global_pooling == true) {
    for (int n = 0; n < in_n; ++n) {
@@ -163,7 +163,8 @@ void test_pool(int bs,
  opdesc.SetAttr("global_pooling", global_pooling);
  opdesc.SetAttr("exclusive", exclusive);
  opdesc.SetAttr("strides", std::vector<int>({stride, stride}));
-  opdesc.SetAttr("paddings", std::vector<int>({padding, padding}));
+  opdesc.SetAttr("paddings",
+                 std::vector<int>({padding, padding, padding, padding}));
  // create and convert op to NPU model, then run it on NPU
  auto op = CreateOp<operators::PoolOpLite>(opdesc, &scope);

--- a/lite/kernels/opencl/pool_compute.cc
+++ b/lite/kernels/opencl/pool_compute.cc
@@ -49,11 +49,17 @@ class PoolCompute
    std::vector<int> ksize = param.ksize;
    if (global_pooling) {
      for (size_t i = 0; i < ksize.size(); ++i) {
-        paddings[i] = 0;
+        paddings[2 * i] = 0;
+        paddings[2 * i + 1] = 0;
        ksize[i] = static_cast<int>(in_dims[i + 2]);
      }
    }
+    bool pads_equal =
+        (paddings[0] == paddings[1]) && (paddings[2] == paddings[3]);
+    if (!pads_equal) {
+      LOG(FATAL)
+          << "padding requires pad_left == pad_right, pad_top == pad_bottom";
+    }
    auto& context = ctx_->As<OpenCLContext>();
    CHECK(context.cl_context() != nullptr);
    auto* input_buf = param.x->data<float, cl::Buffer>();
@@ -89,7 +95,7 @@ class PoolCompute
    CL_CHECK_FATAL(status);
    status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[0]));
    CL_CHECK_FATAL(status);
-    status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[1]));
+    status = kernel.setArg(++arg_idx, static_cast<const int>(paddings[2]));
    CL_CHECK_FATAL(status);
    status = kernel.setArg(++arg_idx, *output_buf);
    CL_CHECK_FATAL(status);

--- a/lite/kernels/opencl/pool_compute_test.cc
+++ b/lite/kernels/opencl/pool_compute_test.cc
@@ -88,7 +88,7 @@ TEST(pool2d, compute) {
  param.output = &out;
  param.global_pooling = true;
  param.pooling_type = "avg";
-  param.paddings = std::vector<int>{0, 0};
+  param.paddings = std::vector<int>{0, 0, 0, 0};
  param.strides = std::vector<int>{1, 1};
  param.ksize = std::vector<int>{7, 7};

--- a/lite/kernels/x86/pool_compute_test.cc
+++ b/lite/kernels/x86/pool_compute_test.cc
@@ -60,7 +60,7 @@ TEST(pool2d_x86, run_test) {
  param.x = &x;
  param.output = &out;
  param.strides = {2, 2};
-  param.paddings = {0, 0};
+  param.paddings = {0, 0, 0, 0};
  param.ksize = {2, 2};
  param.pooling_type = "max";
  std::unique_ptr<KernelContext> ctx(new KernelContext);

--- a/lite/kernels/xpu/bridges/pool_op_test.cc
+++ b/lite/kernels/xpu/bridges/pool_op_test.cc
@@ -60,7 +60,7 @@ void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) {
  int stride_h = strides[0];
  int stride_w = strides[1];
  int pad_h = paddings[0];
-  int pad_w = paddings[1];
+  int pad_w = paddings[2];
  if (global_pooling == true) {
    for (int n = 0; n < in_n; ++n) {
@@ -162,7 +162,8 @@ void test_pool(int bs,
  opdesc.SetAttr("global_pooling", global_pooling);
  opdesc.SetAttr("exclusive", exclusive);
  opdesc.SetAttr("strides", std::vector<int>({stride, stride}));
-  opdesc.SetAttr("paddings", std::vector<int>({padding, padding}));
+  opdesc.SetAttr("paddings",
+                 std::vector<int>({padding, padding, padding, padding}));
  opdesc.SetAttr("ceil_mode", ceil_mode);
  // create and convert op to XPU model, then run it on XPU

--- a/lite/operators/pool_op.cc
+++ b/lite/operators/pool_op.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include "lite/operators/pool_op.h"
+#include <algorithm>
 #include "lite/core/op_registry.h"
 namespace paddle {
@@ -35,19 +36,72 @@ bool PoolOpLite::CheckShape() const {
  // Strides size and pooling size should be the same.
  CHECK_OR_FALSE(ksize.size() == strides.size());
  // Paddings size and pooling size should be the same.
-  CHECK_OR_FALSE(ksize.size() == paddings.size());
+  // CHECK_OR_FALSE(ksize.size() == paddings.size());
  return true;
 }
-int PoolOutputSize(
+inline void UpdatePadding(std::vector<int>* paddings,
-    int input_size, int filter_size, int padding, int stride, bool ceil_mode) {
+                          const bool global_pooling,
+                          const bool adaptive,
+                          const std::string padding_algorithm,
+                          const lite::DDim data_dims,
+                          const std::vector<int>& strides,
+                          const std::vector<int>& ksize) {
+  // set padding size * 2 == data_dims.size()
+  if (paddings->size() * 2 == data_dims.size()) {
+    for (size_t i = 0; i < strides.size(); ++i) {
+      int copy_pad = *(paddings->begin() + i);
+      paddings->insert(paddings->begin() + 2 * i + 1, copy_pad);
+    }
+  } else {
+    if (paddings->size() != data_dims.size()) {
+      LOG(FATAL)
+          << "Paddings size should be the same or twice as the pooling size.";
+    }
+  }
+  // when padding_algorithm is "VALID" or "SAME"
+  if (padding_algorithm == "SAME") {
+    for (int i = 0; i < strides.size(); ++i) {
+      int out_size = (data_dims[i + 2] + strides[i] - 1) / strides[i];
+      int pad_sum =
+          std::max((out_size - 1) * strides[i] + ksize[i] - data_dims[i + 2],
+                   (int64_t)0);
+      int pad_0 = pad_sum / 2;
+      int pad_1 = pad_sum - pad_0;
+      *(paddings->begin() + i * 2) = pad_0;
+      *(paddings->begin() + i * 2 + 1) = pad_1;
+    }
+  } else if (padding_algorithm == "VALID") {
+    for (auto it = paddings->begin(); it != paddings->end(); it++) {
+      *it = 0;
+    }
+  }
+  // if global_pooling == true or adaptive == true, padding will be ignore
+  if (global_pooling || adaptive) {
+    for (auto it = paddings->begin(); it != paddings->end(); it++) {
+      *it = 0;
+    }
+  }
+}
+int PoolOutputSize(int input_size,
+                   int filter_size,
+                   int pad_left,
+                   int pad_right,
+                   int stride,
+                   bool ceil_mode) {
  int output_size;
  if (!ceil_mode) {
-    output_size = (input_size - filter_size + 2 * padding) / stride + 1;
+    output_size =
+        (input_size - filter_size + pad_left + pad_right) / stride + 1;
  } else {
    output_size =
-        (input_size - filter_size + 2 * padding + stride - 1) / stride + 1;
+        (input_size - filter_size + pad_left + pad_right + stride - 1) /
+            stride +
+        1;
  }
  return output_size;
 }
@@ -55,6 +109,14 @@ int PoolOutputSize(
 bool PoolOpLite::InferShape() const {
  const auto x_dims = param_.x->dims();
  std::vector<int>& ksize = param_.ksize;
+  // 2-pad to 4-pad
+  UpdatePadding(&param_.paddings,
+                param_.global_pooling,
+                param_.adaptive,
+                padding_algorithm_,
+                x_dims,
+                param_.strides,
+                ksize);
  if (param_.global_pooling) {
    ksize.resize(static_cast<size_t>(x_dims.size()) - 2);
    for (size_t i = 0; i < ksize.size(); ++i) {
@@ -71,15 +133,14 @@ bool PoolOpLite::InferShape() const {
    for (size_t i = 0; i < param_.ksize.size(); ++i) {
      output_shape.push_back(PoolOutputSize(x_dims[i + 2],
                                            param_.ksize[i],
-                                            param_.paddings[i],
+                                            param_.paddings[2 * i],
+                                            param_.paddings[2 * i + 1],
                                            param_.strides[i],
                                            param_.ceil_mode));
    }
  }
  param_.output->Resize(lite::DDim(output_shape));
-  // ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
-  // ctx->ShareLoD("X", "Out");
  return true;
 }

--- a/lite/operators/pool_op.h
+++ b/lite/operators/pool_op.h
@@ -65,6 +65,9 @@ class PoolOpLite : public OpLite {
    if (op_desc.HasAttr("use_quantizer")) {
      param_.use_quantizer = op_desc.GetAttr<bool>("use_quantizer");
    }
+    if (op_desc.HasAttr("padding_algorithm")) {
+      padding_algorithm_ = op_desc.GetAttr<std::string>("padding_algorithm");
+    }
    // param_.data_format = op_desc.GetAttr<bool>("data_format");
    return true;
  }
@@ -75,6 +78,7 @@ class PoolOpLite : public OpLite {
 private:
  mutable PoolParam param_;
+  std::string padding_algorithm_{""};
 };
 }  // namespace operators

--- a/lite/tests/math/pool_compute_test.cc
+++ b/lite/tests/math/pool_compute_test.cc
@@ -69,8 +69,7 @@ DDim compute_out_dim(const DDim& dim_in,
  auto kernel_w = param.ksize[1];
  auto h = dim_in[2];
  auto w = dim_in[3];
-  int pad_h = param.paddings[0];
+  auto paddings = param.paddings;
-  int pad_w = param.paddings[1];
  int stride_h = param.strides[0];
  int stride_w = param.strides[1];
  bool ceil_mode = param.ceil_mode;
@@ -79,11 +78,15 @@ DDim compute_out_dim(const DDim& dim_in,
  int wout = 1;
  if (!flag_global) {
    if (!ceil_mode) {
-      hout = (h - kernel_h + 2 * pad_h) / stride_h + 1;
+      hout = (h - kernel_h + paddings[0] + paddings[1]) / stride_h + 1;
-      wout = (w - kernel_w + 2 * pad_w) / stride_w + 1;
+      wout = (w - kernel_w + paddings[2] + paddings[3]) / stride_w + 1;
    } else {
-      hout = (h - kernel_h + 2 * pad_h + stride_h - 1) / stride_h + 1;
+      hout =
-      wout = (w - kernel_w + 2 * pad_w + stride_w - 1) / stride_w + 1;
+          (h - kernel_h + paddings[0] + paddings[1] + stride_h - 1) / stride_h +
+          1;
+      wout =
+          (w - kernel_w + paddings[2] + paddings[3] + stride_w - 1) / stride_w +
+          1;
    }
  }
  dim_out[2] = hout;
@@ -116,7 +119,7 @@ void pooling_basic(const float* din,
  int stride_h = strides[0];
  int stride_w = strides[1];
  int pad_h = paddings[0];
-  int pad_w = paddings[1];
+  int pad_w = paddings[2];
  int size_channel_in = win * hin;
  int size_channel_out = wout * hout;
  if (global_pooling) {
@@ -195,18 +198,18 @@ void pooling_basic(const float* din,
                int bh = kernel_h;
                int bw = kernel_w;
                if (ew == win) {
-                  bw = sw + kernel_w >= win + pad_w ? win + pad_w
+                  bw = sw + kernel_w >= win + paddings[3] ? win + paddings[3]
-                                                    : sw + kernel_w;
+                                                          : sw + kernel_w;
                  bw -= sw;
-                  if (sw - pad_w < 0 && sw + kernel_w > win + pad_w) {
+                  if (sw - pad_w < 0 && sw + kernel_w > win + paddings[3]) {
                    bw += pad_w;
                  }
                }
                if (eh == hin) {
-                  bh = sh + kernel_h >= hin + pad_h ? hin + pad_h
+                  bh = sh + kernel_h >= hin + paddings[1] ? hin + paddings[1]
-                                                    : sh + kernel_h;
+                                                          : sh + kernel_h;
                  bh -= sh;
-                  if (sh - pad_h < 0 && sh + kernel_h > hin + pad_h) {
+                  if (sh - pad_h < 0 && sh + kernel_h > hin + paddings[1]) {
                    bh += pad_h;
                  }
                }
@@ -399,31 +402,38 @@ TEST(TestPoolRand, test_pool_rand) {
      for (auto& kw : {1, 2, 3}) {
        for (auto& kh : {1, 2, 3}) {
          for (auto& stride : {1, 2}) {
-            for (auto& pad : {0, 1, 2}) {
+            for (auto& pad_top : {0, 1, 2}) {
-              for (auto& flag_global : {false, true}) {
+              for (auto& pad_bottom : {0, 1, 2}) {
-                for (auto& exclusive : {false, true}) {
+                for (auto& pad_left : {0, 1, 2}) {
-                  for (auto& ceil_mode : {false, true}) {
+                  for (auto& pad_right : {0, 1, 2}) {
-                    for (auto& pooling_type : {"max", "avg"}) {
+                    for (auto& flag_global : {false, true}) {
-                      bool adaptive = false;
+                      for (auto& exclusive : {false, true}) {
-                      bool use_quantizer = false;
+                        for (auto& ceil_mode : {false, true}) {
-                      std::vector<DDim> dims;
+                          for (auto& pooling_type : {"max", "avg"}) {
-                      for (auto& batch : {1, 2}) {
+                            bool adaptive = false;
-                        for (auto& h : {1, 2, 3, 4, 11, 19, 32, 28}) {
+                            bool use_quantizer = false;
-                          dims.push_back(DDim({batch, cin, h, h}));
+                            std::vector<DDim> dims;
+                            for (auto& batch : {1, 2}) {
+                              for (auto& h : {1, 2, 3, 4, 11, 19, 32, 28}) {
+                                dims.push_back(DDim({batch, cin, h, h}));
+                              }
+                            }
+                            test_pool_fp32(
+                                dims,
+                                {kh, kw},
+                                {stride, stride},
+                                {pad_top, pad_bottom, pad_left, pad_right},
+                                ceil_mode,
+                                flag_global,
+                                exclusive,
+                                adaptive,
+                                use_quantizer,
+                                pooling_type,
+                                {1, 2, 4},
+                                {FLAGS_power_mode});
+                          }
                        }
                      }
-                      test_pool_fp32(dims,
-                                     {kh, kw},
-                                     {stride, stride},
-                                     {pad, pad},
-                                     ceil_mode,
-                                     flag_global,
-                                     exclusive,
-                                     adaptive,
-                                     use_quantizer,
-                                     pooling_type,
-                                     {1, 2, 4},
-                                     {FLAGS_power_mode});
                    }
                  }
                }
@@ -443,7 +453,7 @@ TEST(TesPoolCustom, test_pool_fp32_custom_size) {
      {DDim({FLAGS_batch, FLAGS_in_channel, FLAGS_in_height, FLAGS_in_width})},
      {FLAGS_kernel_h, FLAGS_kernel_w},
      {FLAGS_stride_h, FLAGS_stride_w},
-      {FLAGS_pad_h, FLAGS_pad_w},
+      {FLAGS_pad_h, FLAGS_pad_h, FLAGS_pad_w, FLAGS_pad_w},
      FLAGS_ceil_mode,
      FLAGS_flag_global,
      FLAGS_exclusive,