提交 c7d5385f 编写于 作者: H hjchen2

Make 5x5 depthwise conv implementation invisible for aarch64

上级 380013d9
...@@ -186,7 +186,6 @@ inline void DepthwiseConv3x3(const ConvParam<CPU> &param) { ...@@ -186,7 +186,6 @@ inline void DepthwiseConv3x3(const ConvParam<CPU> &param) {
} }
} }
} }
#endif // __aarch64__
template <typename Itype, typename Otype> template <typename Itype, typename Otype>
inline void DepthwiseConv5x5(const ConvParam<CPU> &param) { inline void DepthwiseConv5x5(const ConvParam<CPU> &param) {
...@@ -209,6 +208,7 @@ inline void DepthwiseConv5x5(const ConvParam<CPU> &param) { ...@@ -209,6 +208,7 @@ inline void DepthwiseConv5x5(const ConvParam<CPU> &param) {
GemmConv<Itype, Otype>(param); GemmConv<Itype, Otype>(param);
} }
} }
#endif // __aarch64__
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -56,6 +56,9 @@ inline int32x4_t vRoundq_f32(const float32x4_t &x) { ...@@ -56,6 +56,9 @@ inline int32x4_t vRoundq_f32(const float32x4_t &x) {
template <> template <>
inline int32x4_t vRoundq_f32<ROUND_NEAREST_AWAY_ZERO>(const float32x4_t &x) { inline int32x4_t vRoundq_f32<ROUND_NEAREST_AWAY_ZERO>(const float32x4_t &x) {
#if __aarch64__
return vcvtaq_s32_f32(x);
#else
float32x4_t plus = vdupq_n_f32(0.5); float32x4_t plus = vdupq_n_f32(0.5);
float32x4_t minus = vdupq_n_f32(-0.5); float32x4_t minus = vdupq_n_f32(-0.5);
float32x4_t zero = vdupq_n_f32(0); float32x4_t zero = vdupq_n_f32(0);
...@@ -64,10 +67,14 @@ inline int32x4_t vRoundq_f32<ROUND_NEAREST_AWAY_ZERO>(const float32x4_t &x) { ...@@ -64,10 +67,14 @@ inline int32x4_t vRoundq_f32<ROUND_NEAREST_AWAY_ZERO>(const float32x4_t &x) {
temp = vaddq_f32(x, temp); temp = vaddq_f32(x, temp);
int32x4_t ret = vcvtq_s32_f32(temp); int32x4_t ret = vcvtq_s32_f32(temp);
return ret; return ret;
#endif
} }
template <> template <>
inline int32x4_t vRoundq_f32<ROUND_NEAREST_TO_EVEN>(const float32x4_t &x) { inline int32x4_t vRoundq_f32<ROUND_NEAREST_TO_EVEN>(const float32x4_t &x) {
#if __aarch64__
return vcvtnq_s32_f32(x);
#else
float32x4_t point5 = vdupq_n_f32(0.5); float32x4_t point5 = vdupq_n_f32(0.5);
int32x4_t one = vdupq_n_s32(1); int32x4_t one = vdupq_n_s32(1);
int32x4_t zero = vdupq_n_s32(0); int32x4_t zero = vdupq_n_s32(0);
...@@ -90,6 +97,7 @@ inline int32x4_t vRoundq_f32<ROUND_NEAREST_TO_EVEN>(const float32x4_t &x) { ...@@ -90,6 +97,7 @@ inline int32x4_t vRoundq_f32<ROUND_NEAREST_TO_EVEN>(const float32x4_t &x) {
smask = vsubq_s32(smask, one); smask = vsubq_s32(smask, one);
rnd = vaddq_s32(rnd, smask); rnd = vaddq_s32(rnd, smask);
return rnd; return rnd;
#endif
} }
#endif // __ARM_NEON__ #endif // __ARM_NEON__
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册