From 8c2e4ac26958022da0ca50697a8940169fbe1a01 Mon Sep 17 00:00:00 2001
From: hjchen2 <chenhoujiangcug@gmail.com>
Date: Mon, 7 Jan 2019 14:25:24 +0800
Subject: [PATCH] Make 5x5 depthwise conv implementation invisible for aarch64

---
 src/operators/kernel/central-arm-func/conv_arm_func.h | 2 +-
 src/operators/math/quantize.h                         | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/src/operators/kernel/central-arm-func/conv_arm_func.h b/src/operators/kernel/central-arm-func/conv_arm_func.h
index 1c48ebefd9..86a3c7a969 100644
--- a/src/operators/kernel/central-arm-func/conv_arm_func.h
+++ b/src/operators/kernel/central-arm-func/conv_arm_func.h
@@ -186,7 +186,6 @@ inline void DepthwiseConv3x3(const ConvParam<CPU> &param) {
     }
   }
 }
-#endif  // __aarch64__
 
 template <typename Itype, typename Otype>
 inline void DepthwiseConv5x5(const ConvParam<CPU> &param) {
@@ -209,6 +208,7 @@ inline void DepthwiseConv5x5(const ConvParam<CPU> &param) {
     GemmConv<Itype, Otype>(param);
   }
 }
+#endif  // __aarch64__
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/math/quantize.h b/src/operators/math/quantize.h
index b6e9d1a24d..9f9e91330c 100644
--- a/src/operators/math/quantize.h
+++ b/src/operators/math/quantize.h
@@ -56,6 +56,9 @@ inline int32x4_t vRoundq_f32(const float32x4_t &x) {
 
 template <>
 inline int32x4_t vRoundq_f32<ROUND_NEAREST_AWAY_ZERO>(const float32x4_t &x) {
+#if __aarch64__
+  return vcvtaq_s32_f32(x);
+#else
   float32x4_t plus = vdupq_n_f32(0.5);
   float32x4_t minus = vdupq_n_f32(-0.5);
   float32x4_t zero = vdupq_n_f32(0);
@@ -64,10 +67,14 @@ inline int32x4_t vRoundq_f32<ROUND_NEAREST_AWAY_ZERO>(const float32x4_t &x) {
   temp = vaddq_f32(x, temp);
   int32x4_t ret = vcvtq_s32_f32(temp);
   return ret;
+#endif
 }
 
 template <>
 inline int32x4_t vRoundq_f32<ROUND_NEAREST_TO_EVEN>(const float32x4_t &x) {
+#if __aarch64__
+  return vcvtnq_s32_f32(x);
+#else
   float32x4_t point5 = vdupq_n_f32(0.5);
   int32x4_t one = vdupq_n_s32(1);
   int32x4_t zero = vdupq_n_s32(0);
@@ -90,6 +97,7 @@ inline int32x4_t vRoundq_f32<ROUND_NEAREST_TO_EVEN>(const float32x4_t &x) {
   smask = vsubq_s32(smask, one);
   rnd = vaddq_s32(rnd, smask);
   return rnd;
+#endif
 }
 #endif  // __ARM_NEON__
 
-- 
GitLab