fix tanh_ps return wrong value when input < -45 (#601)

* fix tanh_ps return wrong value when input < -45 * Update mish_math_func.h

fix tanh_ps return wrong value when input < -45 (#601)
* fix tanh_ps return wrong value when input < -45 * Update mish_math_func.h
27113c05 · nihui · GitHub · 2ed03474 · 27113c05 · 27113c05
Showing with 2 addition and 2 deletion

src/dev/cpu/op/mish/cortex-a/mish_math_func.h src/dev/cpu/op/mish/cortex-a/mish_math_func.h +1 -1

src/dev/cpu/op/selu/cortex-a/neon_mathfun.h src/dev/cpu/op/selu/cortex-a/neon_mathfun.h +1 -1

未找到文件。
--- a/src/dev/cpu/op/mish/cortex-a/mish_math_func.h
+++ b/src/dev/cpu/op/mish/cortex-a/mish_math_func.h
@@ -185,7 +185,7 @@ static inline float32x4_t tanh_ps(float32x4_t x)

    // abs(x) > HALFMAXLOGF
    // return 1.0 or -1.0
-    uint32x4_t mask_pos = vcgtq_f32(x2, vdupq_n_f32(0.f));
+    uint32x4_t mask_pos = vcgtq_f32(x, vdupq_n_f32(0.f));
    float32x4_t y1 = vreinterpretq_f32_u32(vbslq_u32(mask_pos, vreinterpretq_u32_f32(vdupq_n_f32(1.f)), vreinterpretq_u32_f32(vdupq_n_f32(-1.f))));

    y = vreinterpretq_f32_u32(vbslq_u32(mask_l, vreinterpretq_u32_f32(y0), vreinterpretq_u32_f32(y)));

--- a/src/dev/cpu/op/selu/cortex-a/neon_mathfun.h
+++ b/src/dev/cpu/op/selu/cortex-a/neon_mathfun.h
@@ -379,7 +379,7 @@ static inline float32x4_t tanh_ps(float32x4_t x)

    // abs(x) > HALFMAXLOGF
    // return 1.0 or -1.0
-    uint32x4_t mask_pos = vcgtq_f32(x2, vdupq_n_f32(0.f));
+    uint32x4_t mask_pos = vcgtq_f32(x, vdupq_n_f32(0.f));
    float32x4_t y1 = vreinterpretq_f32_u32(
        vbslq_u32(mask_pos, vreinterpretq_u32_f32(vdupq_n_f32(1.f)), vreinterpretq_u32_f32(vdupq_n_f32(-1.f))));