From 2fa5e3f969ead16a44f993480f8b4c748693055a Mon Sep 17 00:00:00 2001 From: hjchen2 Date: Tue, 9 Oct 2018 07:44:21 +0000 Subject: [PATCH] Refine --- src/operators/kernel/arm/quantize_kernel.cpp | 25 ++++++++++++++++++++ src/operators/math/math_func_neon.h | 1 + 2 files changed, 26 insertions(+) diff --git a/src/operators/kernel/arm/quantize_kernel.cpp b/src/operators/kernel/arm/quantize_kernel.cpp index b8e6bf873d..7c26f158ba 100644 --- a/src/operators/kernel/arm/quantize_kernel.cpp +++ b/src/operators/kernel/arm/quantize_kernel.cpp @@ -44,6 +44,7 @@ int32x4_t vrnd_away_zero(float32x4_t r) { } int32x4_t vrnd_to_even(float32x4_t r) { +#if 0 int32x4_t ret; float value[4]; vst1q_f32(value, r); @@ -61,6 +62,30 @@ int32x4_t vrnd_to_even(float32x4_t r) { } } return ret; +#else + float32x4_t point5 = vdupq_n_f32(0.5); + int32x4_t one = vdupq_n_s32(1); + int32x4_t zero = vdupq_n_s32(0); + + int32x4_t rnd = vrnd_away_zero(r); + float32x4_t frnd = vcvtq_f32_s32(rnd); + frnd = vsubq_f32(frnd, r); + frnd = vabsq_f32(frnd); + uint32x4_t equal_point5 = vceqq_f32(frnd, point5); + int32x4_t abs_rnd = vabsq_s32(rnd); + abs_rnd = vandq_s32(abs_rnd, one); + uint32x4_t not_mod2 = vreinterpretq_u32_s32(abs_rnd); + uint32x4_t mask = vandq_u32(equal_point5, not_mod2); + uint32x4_t more_than_zero = vcgtq_s32(rnd, zero); + more_than_zero = vandq_u32(more_than_zero, vreinterpretq_u32_s32(one)); + mask = veorq_u32(more_than_zero, mask); + more_than_zero = veorq_u32(more_than_zero, vreinterpretq_u32_s32(one)); + mask = vaddq_u32(more_than_zero, mask); + int32x4_t smask = vreinterpretq_s32_u32(mask); + smask = vsubq_s32(smask, one); + rnd = vaddq_s32(rnd, smask); + return rnd; + #endif } #endif diff --git a/src/operators/math/math_func_neon.h b/src/operators/math/math_func_neon.h index 5bb3fd0f5a..97e1e6f67d 100644 --- a/src/operators/math/math_func_neon.h +++ b/src/operators/math/math_func_neon.h @@ -38,6 +38,7 @@ limitations under the License. */ * * (this is the zlib license) */ + #pragma once #include -- GitLab