From 711b5bf50284e09b7dc327ad4896f83aa6e186bd Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 17 May 2022 16:49:44 +0800 Subject: [PATCH] fix(dnn/arm_common): fix some load beyond memory GitOrigin-RevId: acd63639455a75fa6d6f98337d5090a1e2d9b318 --- dnn/src/arm_common/conv_bias/int8/direct.cpp | 6 ++++-- dnn/src/arm_common/conv_bias/int8/direct_dotprod.cpp | 6 ++++-- .../convolution/int8x8x32/conv_backdata_stride1.cpp | 4 +++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/dnn/src/arm_common/conv_bias/int8/direct.cpp b/dnn/src/arm_common/conv_bias/int8/direct.cpp index c3bad21b4..a2ab67a85 100644 --- a/dnn/src/arm_common/conv_bias/int8/direct.cpp +++ b/dnn/src/arm_common/conv_bias/int8/direct.cpp @@ -389,7 +389,8 @@ void conv_bias::conv_direct_stride1_3x3_int8_nchw( ACC_S16_S32(sum10, sum11, d1); int8x8_t _r30 = vld1_s8(sptr + 3 * IW); - int8x8_t _r3n = vld1_s8(sptr + 3 * IW + 8); + int8x8_t _r3n = + vreinterpret_s8_s16(vld1_dup_s16((int16_t*)(sptr + 3 * IW + 8))); int8x8_t _r31 = vext_s8(_r30, _r3n, 1); int8x8_t _r32 = vext_s8(_r30, _r3n, 2); d1 = vmull_s8(_r30, k20); @@ -444,7 +445,8 @@ void conv_bias::conv_direct_stride1_3x3_int8_nchw( ACC_S16_S32(sum00, sum01, d0); int8x8_t _r20 = vld1_s8(sptr + 2 * IW); - int8x8_t _r2n = vld1_s8(sptr + 2 * IW + 8); + int8x8_t _r2n = + vreinterpret_s8_s16(vld1_dup_s16((int16_t*)(sptr + 2 * IW + 8))); int8x8_t _r21 = vext_s8(_r20, _r2n, 1); int8x8_t _r22 = vext_s8(_r20, _r2n, 2); d0 = vmull_s8(_r20, k20); diff --git a/dnn/src/arm_common/conv_bias/int8/direct_dotprod.cpp b/dnn/src/arm_common/conv_bias/int8/direct_dotprod.cpp index 28203e2bf..1a0e3bbc2 100644 --- a/dnn/src/arm_common/conv_bias/int8/direct_dotprod.cpp +++ b/dnn/src/arm_common/conv_bias/int8/direct_dotprod.cpp @@ -437,8 +437,10 @@ void conv_bias::conv_direct_stride1_3x3_int8_dot( _tmp = vtranslq_s8(vld1_s8(r2)); CALC_2(678, 345, 0); - - _tmp = vtranslq_s8(vld1_s8(r3)); + int8x8_t tmp_last = vreinterpret_s8_s32(vld1_dup_s32(r3)); + tmp_last = vreinterpret_s8_s16( + vld1_lane_s16(r3 + 4, vreinterpret_s16_s8(tmp_last), 2)); + _tmp = vtranslq_s8(tmp_last); CALC_1(678, 0); POSTPROCESS_2X4(_sum00, _sum10, outptr, outptr2, dstptr, dstptr2); diff --git a/dnn/src/arm_common/convolution/int8x8x32/conv_backdata_stride1.cpp b/dnn/src/arm_common/convolution/int8x8x32/conv_backdata_stride1.cpp index ed21f4ca0..6838b3278 100644 --- a/dnn/src/arm_common/convolution/int8x8x32/conv_backdata_stride1.cpp +++ b/dnn/src/arm_common/convolution/int8x8x32/conv_backdata_stride1.cpp @@ -297,7 +297,9 @@ void deconv_direct_2x2( _tmp = vtranslq_s8(vld1_s8(r0)); CALC_0(1, 0); - _tmp = vtranslq_s8(vld1_s8(r1)); + int8x8_t temp_x = vld1_dup_s32(r1); + temp_x = vld1_lane_s8(r1 + 4, temp_x, 4); + _tmp = vtranslq_s8(temp_x); CALC_0(23, 0); vst1q_s32(outptr, _sum00); -- GitLab