From 8c2e4ac26958022da0ca50697a8940169fbe1a01 Mon Sep 17 00:00:00 2001 From: hjchen2 Date: Mon, 7 Jan 2019 14:25:24 +0800 Subject: [PATCH] Make 5x5 depthwise conv implementation invisible for aarch64 --- src/operators/kernel/central-arm-func/conv_arm_func.h | 2 +- src/operators/math/quantize.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/operators/kernel/central-arm-func/conv_arm_func.h b/src/operators/kernel/central-arm-func/conv_arm_func.h index 1c48ebefd9..86a3c7a969 100644 --- a/src/operators/kernel/central-arm-func/conv_arm_func.h +++ b/src/operators/kernel/central-arm-func/conv_arm_func.h @@ -186,7 +186,6 @@ inline void DepthwiseConv3x3(const ConvParam ¶m) { } } } -#endif // __aarch64__ template inline void DepthwiseConv5x5(const ConvParam ¶m) { @@ -209,6 +208,7 @@ inline void DepthwiseConv5x5(const ConvParam ¶m) { GemmConv(param); } } +#endif // __aarch64__ } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/math/quantize.h b/src/operators/math/quantize.h index b6e9d1a24d..9f9e91330c 100644 --- a/src/operators/math/quantize.h +++ b/src/operators/math/quantize.h @@ -56,6 +56,9 @@ inline int32x4_t vRoundq_f32(const float32x4_t &x) { template <> inline int32x4_t vRoundq_f32(const float32x4_t &x) { +#if __aarch64__ + return vcvtaq_s32_f32(x); +#else float32x4_t plus = vdupq_n_f32(0.5); float32x4_t minus = vdupq_n_f32(-0.5); float32x4_t zero = vdupq_n_f32(0); @@ -64,10 +67,14 @@ inline int32x4_t vRoundq_f32(const float32x4_t &x) { temp = vaddq_f32(x, temp); int32x4_t ret = vcvtq_s32_f32(temp); return ret; +#endif } template <> inline int32x4_t vRoundq_f32(const float32x4_t &x) { +#if __aarch64__ + return vcvtnq_s32_f32(x); +#else float32x4_t point5 = vdupq_n_f32(0.5); int32x4_t one = vdupq_n_s32(1); int32x4_t zero = vdupq_n_s32(0); @@ -90,6 +97,7 @@ inline int32x4_t vRoundq_f32(const float32x4_t &x) { smask = vsubq_s32(smask, one); rnd = vaddq_s32(rnd, smask); return rnd; +#endif } #endif // __ARM_NEON__ -- GitLab