提交 711b5bf5 编写于 作者: M Megvii Engine Team

fix(dnn/arm_common): fix some load beyond memory

GitOrigin-RevId: acd63639455a75fa6d6f98337d5090a1e2d9b318
上级 3ebb8db0
......@@ -389,7 +389,8 @@ void conv_bias::conv_direct_stride1_3x3_int8_nchw(
ACC_S16_S32(sum10, sum11, d1);
int8x8_t _r30 = vld1_s8(sptr + 3 * IW);
int8x8_t _r3n = vld1_s8(sptr + 3 * IW + 8);
int8x8_t _r3n =
vreinterpret_s8_s16(vld1_dup_s16((int16_t*)(sptr + 3 * IW + 8)));
int8x8_t _r31 = vext_s8(_r30, _r3n, 1);
int8x8_t _r32 = vext_s8(_r30, _r3n, 2);
d1 = vmull_s8(_r30, k20);
......@@ -444,7 +445,8 @@ void conv_bias::conv_direct_stride1_3x3_int8_nchw(
ACC_S16_S32(sum00, sum01, d0);
int8x8_t _r20 = vld1_s8(sptr + 2 * IW);
int8x8_t _r2n = vld1_s8(sptr + 2 * IW + 8);
int8x8_t _r2n =
vreinterpret_s8_s16(vld1_dup_s16((int16_t*)(sptr + 2 * IW + 8)));
int8x8_t _r21 = vext_s8(_r20, _r2n, 1);
int8x8_t _r22 = vext_s8(_r20, _r2n, 2);
d0 = vmull_s8(_r20, k20);
......
......@@ -437,8 +437,10 @@ void conv_bias::conv_direct_stride1_3x3_int8_dot(
_tmp = vtranslq_s8(vld1_s8(r2));
CALC_2(678, 345, 0);
_tmp = vtranslq_s8(vld1_s8(r3));
int8x8_t tmp_last = vreinterpret_s8_s32(vld1_dup_s32(r3));
tmp_last = vreinterpret_s8_s16(
vld1_lane_s16(r3 + 4, vreinterpret_s16_s8(tmp_last), 2));
_tmp = vtranslq_s8(tmp_last);
CALC_1(678, 0);
POSTPROCESS_2X4(_sum00, _sum10, outptr, outptr2, dstptr, dstptr2);
......
......@@ -297,7 +297,9 @@ void deconv_direct_2x2(
_tmp = vtranslq_s8(vld1_s8(r0));
CALC_0(1, 0);
_tmp = vtranslq_s8(vld1_s8(r1));
int8x8_t temp_x = vld1_dup_s32(r1);
temp_x = vld1_lane_s8(r1 + 4, temp_x, 4);
_tmp = vtranslq_s8(temp_x);
CALC_0(23, 0);
vst1q_s32(outptr, _sum00);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册