fix conflict, test=develop

73bac0f2 · chenjiaoAngel · 40482334 · 73bac0f2 · 73bac0f2 · 73bac0f2
7 changed file
--- a/lite/backends/arm/math/pooling.cc
+++ b/lite/backends/arm/math/pooling.cc
@@ -583,7 +583,7 @@ void pooling_basic(const float* din,
  "vld2.f32  {d0-d3}, [%[dr0]]!                   @ load \n"          \
  "vld2.f32  {d4-d7}, [%[dr1]]!                   @ load \n"          \
  "vadd.f32  q5, q9, q8                           @ max reduce\n"     \
-  "subs   %[cnt_num],  %[cnt_num], #1                       @ subs cnt_num \n"  \
+  "subs      %[cnt_num], #1                       @ subs cnt_num \n"  \
  "vmul.f32  q4, q5, %q[vcoef_left]               @ mul coef \n"      \
  "vst1.f32  {d8-d9}, [%[dr_out]]!                @ store 4 out \n"   \
  "ble       2f                                   @ bne\n"
@@ -1345,19 +1345,16 @@ void pooling2x2s2p1_max(const float* din,
                [dr1] "+r"(dr1),
                [dr_out] "+r"(dr_out),
                [cnt_num] "+r"(cnt_num)
-              : [vzero] "w" (vzero)
+              : [vzero] "w"(vzero)
              : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v8");
 #else 
-       //   cnt_num -= 1; 
          asm volatile(
-              P2x2S2_INIT 
-              P2x2S2P1_MAX
-              P2x2S2P0_MAX "2: \n" /* end */
+              P2x2S2_INIT P2x2S2P1_MAX P2x2S2P0_MAX "2: \n" /* end */
              : [dr0] "+r"(dr0),
                [dr1] "+r"(dr1),
                [dr_out] "+r"(dr_out),
                [cnt_num] "+r"(cnt_num)
-              : [vzero] "w" (vzero)
+              : [vzero] "w"(vzero)
              : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q8", "q9");
 #endif
          dr0 -= 8;
@@ -1455,7 +1452,8 @@ void pooling2x2s2p1_avg(const float* din,
        }
        float coef_left_most = exclusive ? coef_h : coef_h / 2;
        float32x4_t vcoef = vdupq_n_f32(coef_h / 2);
-        float coef_left[4] = {coef_left_most, coef_h / 2, coef_h / 2, coef_h / 2};
+        float coef_left[4] =
+            {coef_left_most, coef_h / 2, coef_h / 2, coef_h / 2};
        float32x4_t vcoef_left = vld1q_f32(coef_left);
        int cnt_num = w_unroll_size;
        if (w_unroll_size > 0) {
@@ -1466,7 +1464,9 @@ void pooling2x2s2p1_avg(const float* din,
                [dr1] "+r"(dr1),
                [dr_out] "+r"(dr_out),
                [cnt_num] "+r"(cnt_num)
-              : [vcoef] "w"(vcoef), [vzero] "w"(vzero), [vcoef_left] "w"(vcoef_left)
+              : [vcoef] "w"(vcoef),
+                [vzero] "w"(vzero),
+                [vcoef_left] "w"(vcoef_left)
              : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v8");
 #else
          asm volatile(
@@ -1475,7 +1475,9 @@ void pooling2x2s2p1_avg(const float* din,
                [dr1] "+r"(dr1),
                [dr_out] "+r"(dr_out),
                [cnt_num] "+r"(cnt_num)
-              : [vcoef] "w"(vcoef), [vzero] "w"(vzero), [vcoef_left] "w"(vcoef_left)
+              : [vcoef] "w"(vcoef),
+                [vzero] "w"(vzero),
+                [vcoef_left] "w"(vcoef_left)
              : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q8", "q9");
 #endif
          dr0 -= 8;
@@ -2571,7 +2573,6 @@ void pooling3x3s2p0_max(const float* din,
                         [dr1] "+r"(dr1),
                         [dr2] "+r"(dr2),
                         [dr_out] "+r"(dr_out),
-                         [remain] "+r" (cnt_remain),
                         [cnt_num] "+r"(cnt_num)
                       :
                       : "cc",
@@ -2633,7 +2634,7 @@ void pooling3x3s2p0_max(const float* din,
                [dr1] "+r"(dr1),
                [dr2] "+r"(dr2),
                [dr_out] "+r"(dr_out),
-                         [remain] "+r" (cnt_remain),
+                [remain] "+r"(cnt_remain),
                [cnt_num] "+r"(cnt_num)
              :
              : "cc",
@@ -2650,20 +2651,17 @@ void pooling3x3s2p0_max(const float* din,
                "q9",
                "q10",
                "q11");
-#endif
-          // dr0 -= 8;
-          // dr1 -= 8;
-          // dr2 -= 8;
-          if (right){
+          if (right) {
            int wstart = (w_unroll_size * 4 + remain) * S;
            int wend = std::min(wstart + K, win);
-            float tmp = dr0[wstart];//std::numeric_limits<float>::min();
-            for(int i = wstart; i < wend; i++){
-              tmp = std::max(tmp,std::max(dr0[i],dr1[i]));
-              tmp = std::max(tmp,dr2[i]);
+            float tmp = dr0[wstart];  //std::numeric_limits<float>::min();
+            for (int i = wstart; i < wend; i++) {
+              tmp = std::max(tmp, std::max(dr0[i], dr1[i]));
+              tmp = std::max(tmp, dr2[i]);
            }
            *(dr_out++) = tmp;
          }
+#endif
        }

        r0 = r2;

--- a/lite/backends/arm/math/pooling.h
+++ b/lite/backends/arm/math/pooling.h
--- a/lite/backends/fpga/KD/pes/pooling_pe.hpp
+++ b/lite/backends/fpga/KD/pes/pooling_pe.hpp
@@ -70,8 +70,7 @@ class PoolingPE : public PE {
    param_.poolingArgs = args;

    // use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1
-    // &&
-    //            (k_width > 7 || k_height > 7);
+    // && (k_width > 7 || k_height > 7);
    use_cpu_ = output->shape().width() == 1 && output->shape().height() == 1 &&
               (k_width > 255 || k_height > 255);
    // use_cpu_ = param_.type == AVERAGE;

--- a/lite/kernels/arm/pool_compute.cc
+++ b/lite/kernels/arm/pool_compute.cc
--- a/lite/kernels/npu/bridges/pool_op.cc
+++ b/lite/kernels/npu/bridges/pool_op.cc
@@ -64,20 +64,6 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
    return FAILED;
  }

-  auto padding = op_info->GetAttr<std::vector<int>>("paddings");
-  bool pads_equal = (padding[0] == padding[1]) && (padding[2] == padding[3]);
-  if (!pads_equal) {
-    LOG(FATAL)
-        << "padding requires pad_left == pad_right, pad_top == pad_bottom";
-  }
-  auto npu_pad =
-      ge::AttrValue::LIST_INT{padding[0], padding[1], padding[2], padding[3]};
-  auto strides = op_info->GetAttr<std::vector<int>>("strides");
-  auto npu_stride = ge::AttrValue::LIST_INT(strides.begin(), strides.end());
-  int npu_ceil_mode = 0;
-  if (op_info->HasAttr("ceil_mode")) {
-    npu_ceil_mode = op_info->GetAttr<bool>("ceil_mode") ? 1 : 0;
-
  // pad mode
  int pad_mode = 0;
  std::string padding_algorithm("");

--- a/lite/kernels/xpu/bridges/pool_op_test.cc
+++ b/lite/kernels/xpu/bridges/pool_op_test.cc
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "lite/operators/pool_op.h"
-#include <gtest/gtest.h>
-#include "lite/core/op_registry.h"
-#include "lite/kernels/xpu/bridges/registry.h"
-#include "lite/kernels/xpu/bridges/test_helper.h"
-
-namespace paddle {
-namespace lite {
-namespace kernels {
-namespace xpu {
-namespace bridges {
-
-void pool_ref(const std::shared_ptr<operators::PoolOpLite> op) {
-  Scope* scope = op->scope();
-  const OpInfo* op_info = op->op_info();
-  auto x = scope->FindVar(op_info->Input("X").front())->GetMutable<Tensor>();
-  auto out =
-      scope->FindVar(op_info->Output("Out").front())->GetMutable<Tensor>();
-  auto& in_dims = x->dims();
-  auto& out_dims = out->dims();
-
-  const float* src_ptr = x->data<const float>();
-  float* dst_ptr = out->mutable_data<float>();
-
-  std::vector<int> ksize = op_info->GetAttr<std::vector<int>>("ksize");
-  std::vector<int> strides = op_info->GetAttr<std::vector<int>>("strides");
-  std::vector<int> paddings = op_info->GetAttr<std::vector<int>>("paddings");
-  bool exclusive = op_info->GetAttr<bool>("exclusive");
-  std::string pooling_type = op_info->GetAttr<std::string>("pooling_type");
-  bool global_pooling = op_info->GetAttr<bool>("global_pooling");
-
-  int in_n = in_dims[0];
-  int in_c = in_dims[1];
-  int in_h = in_dims[2];
-  int in_w = in_dims[3];
-  int size_in_n = in_c * in_h * in_w;
-  int size_in_c = in_h * in_w;
-
-  int out_h = out_dims[2];
-  int out_w = out_dims[3];
-  int size_out_n = in_c * out_h * out_w;
-  int size_out_c = out_h * out_w;
-
-  int window_h = ksize[0];
-  int window_w = ksize[1];
-  int stride_h = strides[0];
-  int stride_w = strides[1];
-  int pad_h = paddings[0];
-  int pad_w = paddings[2];
-
-  if (global_pooling == true) {
-    for (int n = 0; n < in_n; ++n) {
-      for (int c = 0; c < in_c; ++c) {
-        const float* src = src_ptr + n * size_in_n + c * size_in_c;
-        float res = src[0];
-        if (pooling_type == "max") {
-          for (int i = 1; i < size_in_c; ++i) {
-            float cur_val = src[i];
-            res = cur_val > res ? cur_val : res;
-          }
-        } else if (pooling_type == "avg") {
-          for (int i = 1; i < size_in_c; ++i) {
-            float cur_val = src[i];
-            res += cur_val;
-          }
-          res /= size_in_c;
-        }
-        dst_ptr[n * size_out_n + c] = res;
-      }
-    }
-  } else {
-    for (int n = 0; n < in_n; ++n) {
-      for (int c = 0; c < in_c; ++c) {
-        for (int h = 0; h < out_h; ++h) {
-          int sh = h * stride_h;
-          int eh = sh + window_h;
-          sh = (sh - pad_h) < 0 ? 0 : sh - pad_h;
-          eh = (eh - pad_h) > in_h ? in_h : eh - pad_h;
-          for (int w = 0; w < out_w; ++w) {
-            int sw = w * stride_w;
-            int ew = sw + window_w;
-            sw = (sw - pad_w) < 0 ? 0 : sw - pad_w;
-            ew = (ew - pad_w) > in_w ? in_w : ew - pad_w;
-            int pooling_size = (ew - sw) * (eh - sh);
-            if (pooling_size == 0) continue;
-            float res = 0.f;
-            for (int kh = sh; kh < eh; ++kh) {
-              for (int kw = sw; kw < ew; ++kw) {
-                int src_idx = n * size_in_n + c * size_in_c + kh * in_w + kw;
-                if (kh == sh && kw == sw) {
-                  res = src_ptr[src_idx];
-                } else {
-                  if (pooling_type == "max") {
-                    res = res >= src_ptr[src_idx] ? res : src_ptr[src_idx];
-                  }
-                  if (pooling_type == "avg") {
-                    res += src_ptr[src_idx];
-                  }
-                }
-              }
-            }
-            if (pooling_type == "avg") {
-              if (exclusive) {
-                res /= pooling_size;
-              } else {
-                res /= window_h * window_w;
-              }
-            }
-            dst_ptr[n * size_out_n + c * size_out_c + h * out_w + w] = res;
-          }
-        }
-      }
-    }
-  }
-}
-
-void test_pool(int bs,
-               int ic,
-               int ih,
-               int iw,
-               std::string pooling_type,
-               bool ceil_mode,
-               bool global_pooling,
-               bool exclusive,
-               int ksize,
-               int stride,
-               int padding) {
-  // prepare input&output variables
-  Scope scope;
-  std::string x_var_name = "x";
-  std::string out_var_name = "out";
-  std::string out_ref_var_name = "out_ref";
-  auto* x = scope.Var(x_var_name)->GetMutable<Tensor>();
-  auto* out = scope.Var(out_var_name)->GetMutable<Tensor>();
-  auto* out_ref = scope.Var(out_ref_var_name)->GetMutable<Tensor>();
-  x->Resize({bs, ic, ih, iw});
-
-  // initialize input&output data
-  FillTensor<float>(x);
-
-  // initialize op desc
-  cpp::OpDesc opdesc;
-  opdesc.SetType("pool2d");
-  opdesc.SetInput("X", {x_var_name});
-  opdesc.SetOutput("Out", {out_var_name});
-  opdesc.SetAttr("pooling_type", pooling_type);
-  opdesc.SetAttr("ksize", std::vector<int>({ksize, ksize}));
-  opdesc.SetAttr("global_pooling", global_pooling);
-  opdesc.SetAttr("exclusive", exclusive);
-  opdesc.SetAttr("strides", std::vector<int>({stride, stride}));
-  opdesc.SetAttr("paddings",
-                 std::vector<int>({padding, padding, padding, padding}));
-  opdesc.SetAttr("ceil_mode", ceil_mode);
-
-  // create and convert op to XPU model, then run it on XPU
-  auto op = CreateOp<operators::PoolOpLite>(opdesc, &scope);
-  LauchOp(op, {x_var_name}, {out_var_name});
-  out_ref->CopyDataFrom(*out);
-
-  // execute reference implementation and save to output tensor
-  pool_ref(op);
-
-  // compare results
-  auto* out_data = out->mutable_data<float>();
-  auto* out_ref_data = out_ref->mutable_data<float>();
-  for (int i = 0; i < out->dims().production(); i++) {
-    EXPECT_NEAR(out_data[i], out_ref_data[i], 1e-5);
-  }
-}
-
-TEST(XPUBridges, pool) {
-  for (auto pooling_type : {"max", "avg"}) {
-    for (auto bs : {1, 3}) {
-      for (auto ic : {2}) {
-        for (auto ih : {3}) {
-          for (auto iw : {4}) {
-            test_pool(bs, ic, ih, iw, pooling_type, true, true, true, 0, 1, 0);
-          }
-        }
-      }
-    }
-  }
-
-  for (auto pooling_type : {"max"}) {
-    for (auto ceil_mode : {true, false}) {
-      for (auto ksize : {2, 3}) {
-        for (auto stride : {1, 2}) {
-          for (auto padding : {0, 1}) {
-            for (auto bs : {1, 3}) {
-              for (auto ic : {2}) {
-                for (auto ih : {3}) {
-                  for (auto iw : {4}) {
-                    test_pool(bs,
-                              ic,
-                              ih,
-                              iw,
-                              pooling_type,
-                              ceil_mode,
-                              false,
-                              true,
-                              ksize,
-                              stride,
-                              padding);
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  for (auto pooling_type : {"avg"}) {
-    for (auto ceil_mode : {true, false}) {
-      for (auto exclusive : {true, false}) {
-        for (auto ksize : {2, 3}) {
-          for (auto stride : {1, 2}) {
-            for (auto padding : {0, 1}) {
-              for (auto bs : {1, 3}) {
-                for (auto ic : {2}) {
-                  for (auto ih : {3}) {
-                    for (auto iw : {4}) {
-                      test_pool(bs,
-                                ic,
-                                ih,
-                                iw,
-                                pooling_type,
-                                ceil_mode,
-                                false,
-                                exclusive,
-                                ksize,
-                                stride,
-                                padding);
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-}  // namespace bridges
-}  // namespace xpu
-}  // namespace kernels
-}  // namespace lite
-}  // namespace paddle
-
-USE_LITE_OP(pool2d);
-USE_XPU_BRIDGE(pool2d);
--- a/lite/operators/pool_op.cc
+++ b/lite/operators/pool_op.cc
@@ -41,39 +41,6 @@ bool PoolOpLite::CheckShape() const {
  return true;
 }

-inline void UpdatePadding(std::vector<int>* paddings,
-                          const bool global_pooling,
-                          const bool adaptive,
-                          const std::string padding_algorithm,
-                          const lite::DDim data_dims,
-                          const std::vector<int>& strides,
-                          const std::vector<int>& ksize) {
-  // when padding_algorithm is "VALID" or "SAME"
-  if (padding_algorithm == "SAME") {
-    for (int i = 0; i < strides.size(); ++i) {
-      int out_size = (data_dims[i + 2] + strides[i] - 1) / strides[i];
-      int pad_sum =
-          std::max((out_size - 1) * strides[i] + ksize[i] - data_dims[i + 2],
-                   (int64_t)0);
-      int pad_0 = pad_sum / 2;
-      int pad_1 = pad_sum - pad_0;
-      *(paddings->begin() + i * 2) = pad_0;
-      *(paddings->begin() + i * 2 + 1) = pad_1;
-    }
-  } else if (padding_algorithm == "VALID") {
-    for (auto it = paddings->begin(); it != paddings->end(); it++) {
-      *it = 0;
-    }
-  }
-
-  // if global_pooling == true or adaptive == true, padding will be ignore
-  if (global_pooling || adaptive) {
-    for (auto it = paddings->begin(); it != paddings->end(); it++) {
-      *it = 0;
-    }
-  }
-}
-
 int PoolOutputSize(int input_size,
                   int filter_size,
                   int pad_left,