提交 eb1e983c 编写于 作者: H hjchen2

Optimize pooling3x3

上级 26769ad7
...@@ -60,18 +60,12 @@ void Pooling<P>::operator()(const framework::Tensor &input, ...@@ -60,18 +60,12 @@ void Pooling<P>::operator()(const framework::Tensor &input,
wstart = std::max(wstart, 0); wstart = std::max(wstart, 0);
PoolingVal<P> val; PoolingVal<P> val;
// std::cout << "output[" << ph * output_width + pw << "]:"
// << std::endl;
for (int h = hstart; h < hend; ++h) { for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) { for (int w = wstart; w < wend; ++w) {
val += input_ptr[h * input_width + w]; val += input_ptr[h * input_width + w];
// std::cout << "input[" << h << "][" << w << "] = "
// << input_ptr[h * input_width + w] << std::endl;
} }
} }
output_ptr[ph * output_width + pw] = val.Value(); output_ptr[ph * output_width + pw] = val.Value();
// std::cout << "output[" << ph * output_width + pw << "] = "
// << val.Value() << std::endl;
} }
} }
} }
......
...@@ -34,45 +34,39 @@ template <PoolingType P = Max> ...@@ -34,45 +34,39 @@ template <PoolingType P = Max>
struct PoolingVal { struct PoolingVal {
float val; float val;
int count; int count;
PoolingVal() { PoolingVal() : count(0) { val = -std::numeric_limits<float>::max(); }
val = -std::numeric_limits<float>::max();
count = 0;
}
inline PoolingVal<P> &operator+=(const float &x) { inline PoolingVal<P> &operator+=(const float &x) {
val = std::max(val, x); val = std::max(val, x);
count += 1; ++count;
return *this; return *this;
} }
float Value() const { inline float Value() { return (count > 0) ? val : 0.f; }
if (count > 0) {
return val;
}
return 0.f;
}
}; };
template <> template <>
struct PoolingVal<Avg> { struct PoolingVal<Avg> {
float val; float val;
int count; int count;
PoolingVal() { PoolingVal() : val(0.f), count(0) {}
val = 0.f;
count = 0;
}
inline PoolingVal<Avg> &operator+=(const float &x) { inline PoolingVal<Avg> &operator+=(const float &x) {
val += x; val += x;
count += 1; ++count;
return *this; return *this;
} }
float Value() const { inline float Value() { return (count > 0) ? val * (1.f / count) : 0.f; }
if (count > 0) {
return val / count;
}
return 0.f;
}
}; };
#if defined(__ARM_NEON) || defined(__ARM_NEON__) #if defined(__ARM_NEON) || defined(__ARM_NEON__)
template <PoolingType P = Max>
inline float32x4_t vPoolInitq_f32() {
return vdupq_n_f32(-std::numeric_limits<float>::max());
}
template <>
inline float32x4_t vPoolInitq_f32<Avg>() {
return vdupq_n_f32(0.f);
}
template <PoolingType P = Max> template <PoolingType P = Max>
inline float32x4_t vPoolPreq_f32(const float32x4_t &x1, const float32x4_t &x2) { inline float32x4_t vPoolPreq_f32(const float32x4_t &x1, const float32x4_t &x2) {
return vmaxq_f32(x1, x2); return vmaxq_f32(x1, x2);
...@@ -85,14 +79,15 @@ inline float32x4_t vPoolPreq_f32<Avg>(const float32x4_t &x1, ...@@ -85,14 +79,15 @@ inline float32x4_t vPoolPreq_f32<Avg>(const float32x4_t &x1,
} }
template <PoolingType P = Max> template <PoolingType P = Max>
inline float32x4_t vPoolPostq_f32(const float32x4_t &x) { inline float32x4_t vPoolPostq_f32(const float32x4_t &x,
const float32x4_t &post) {
return x; return x;
} }
template <> template <>
inline float32x4_t vPoolPostq_f32<Avg>(const float32x4_t &x) { inline float32x4_t vPoolPostq_f32<Avg>(const float32x4_t &x,
float32x4_t avg = vdupq_n_f32(1.f / 9); const float32x4_t &post) {
return vmulq_f32(avg, x); return vmulq_f32(x, post);
} }
#endif // __ARM_NEON__ #endif // __ARM_NEON__
...@@ -107,13 +102,13 @@ inline float PoolPre<Avg>(const float &x1, const float &x2) { ...@@ -107,13 +102,13 @@ inline float PoolPre<Avg>(const float &x1, const float &x2) {
} }
template <PoolingType P = Max> template <PoolingType P = Max>
inline float PoolPost(const float &x) { inline float PoolPost(const float &x, const float &post) {
return x; return x;
} }
template <> template <>
inline float PoolPost<Avg>(const float &x) { inline float PoolPost<Avg>(const float &x, const float &post) {
return 1.f / 9 * x; return x * post;
} }
template <PoolingType P> template <PoolingType P>
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册