Optimize pooling3x3

eb1e983c · hjchen2 · 26769ad7 · eb1e983c · eb1e983c · eb1e983c
Showing with 240 addition and 73 deletion

src/operators/math/pooling.cpp src/operators/math/pooling.cpp +0 -6

src/operators/math/pooling.h src/operators/math/pooling.h +24 -29

src/operators/math/pooling3x3.cpp src/operators/math/pooling3x3.cpp +216 -38

未找到文件。
--- a/src/operators/math/pooling.cpp
+++ b/src/operators/math/pooling.cpp
@@ -60,18 +60,12 @@ void Pooling<P>::operator()(const framework::Tensor &input,
          wstart = std::max(wstart, 0);

          PoolingVal<P> val;
-          //          std::cout << "output[" << ph * output_width + pw << "]:"
-          //          << std::endl;
          for (int h = hstart; h < hend; ++h) {
            for (int w = wstart; w < wend; ++w) {
              val += input_ptr[h * input_width + w];
-              //              std::cout << "input[" << h << "][" << w << "] = "
-              //              << input_ptr[h * input_width + w] << std::endl;
            }
          }
          output_ptr[ph * output_width + pw] = val.Value();
-          //          std::cout << "output[" << ph * output_width + pw << "] = "
-          //          << val.Value() << std::endl;
        }
      }
    }

--- a/src/operators/math/pooling.h
+++ b/src/operators/math/pooling.h
@@ -34,45 +34,39 @@ template <PoolingType P = Max>
 struct PoolingVal {
  float val;
  int count;
-  PoolingVal() {
-    val = -std::numeric_limits<float>::max();
-    count = 0;
-  }
+  PoolingVal() : count(0) { val = -std::numeric_limits<float>::max(); }
  inline PoolingVal<P> &operator+=(const float &x) {
    val = std::max(val, x);
-    count += 1;
+    ++count;
    return *this;
  }
-  float Value() const {
-    if (count > 0) {
-      return val;
-    }
-    return 0.f;
-  }
+  inline float Value() { return (count > 0) ? val : 0.f; }
 };

 template <>
 struct PoolingVal<Avg> {
  float val;
  int count;
-  PoolingVal() {
-    val = 0.f;
-    count = 0;
-  }
+  PoolingVal() : val(0.f), count(0) {}
  inline PoolingVal<Avg> &operator+=(const float &x) {
    val += x;
-    count += 1;
+    ++count;
    return *this;
  }
-  float Value() const {
-    if (count > 0) {
-      return val / count;
-    }
-    return 0.f;
-  }
+  inline float Value() { return (count > 0) ? val * (1.f / count) : 0.f; }
 };

 #if defined(__ARM_NEON) || defined(__ARM_NEON__)
+template <PoolingType P = Max>
+inline float32x4_t vPoolInitq_f32() {
+  return vdupq_n_f32(-std::numeric_limits<float>::max());
+}
+
+template <>
+inline float32x4_t vPoolInitq_f32<Avg>() {
+  return vdupq_n_f32(0.f);
+}
+
 template <PoolingType P = Max>
 inline float32x4_t vPoolPreq_f32(const float32x4_t &x1, const float32x4_t &x2) {
  return vmaxq_f32(x1, x2);
@@ -85,14 +79,15 @@ inline float32x4_t vPoolPreq_f32<Avg>(const float32x4_t &x1,
 }

 template <PoolingType P = Max>
-inline float32x4_t vPoolPostq_f32(const float32x4_t &x) {
+inline float32x4_t vPoolPostq_f32(const float32x4_t &x,
+                                  const float32x4_t &post) {
  return x;
 }

 template <>
-inline float32x4_t vPoolPostq_f32<Avg>(const float32x4_t &x) {
-  float32x4_t avg = vdupq_n_f32(1.f / 9);
-  return vmulq_f32(avg, x);
+inline float32x4_t vPoolPostq_f32<Avg>(const float32x4_t &x,
+                                       const float32x4_t &post) {
+  return vmulq_f32(x, post);
 }
 #endif  // __ARM_NEON__

@@ -107,13 +102,13 @@ inline float PoolPre<Avg>(const float &x1, const float &x2) {
 }

 template <PoolingType P = Max>
-inline float PoolPost(const float &x) {
+inline float PoolPost(const float &x, const float &post) {
  return x;
 }

 template <>
-inline float PoolPost<Avg>(const float &x) {
-  return 1.f / 9 * x;
+inline float PoolPost<Avg>(const float &x, const float &post) {
+  return x * post;
 }

 template <PoolingType P>

--- a/src/operators/math/pooling3x3.cpp
+++ b/src/operators/math/pooling3x3.cpp