未验证 提交 8a96e11a 编写于 作者: wangq3802's avatar wangq3802 提交者: GitHub

fix bug : x86 AVX/SSE winograd when output_channels %8 != 0 (#369)

Co-authored-by: Nqwang <qwang02@openailab.com>
上级 0e235db9
......@@ -937,7 +937,7 @@ void conv3x3s1_winograd43_sse(float* bottom_blob, float* top_blob, float* kernel
#else
_sum0 = _mm_add_ps(_sum0, _mm_mul_ps(_r0, _k0));
#endif
kptr += 16;
kptr += 4;
r0 += 4;
}
_mm_storeu_ps(output0_tm, _sum0);
......@@ -948,7 +948,7 @@ void conv3x3s1_winograd43_sse(float* bottom_blob, float* top_blob, float* kernel
{
for (int n = 0; n < 4; n++)
{
sum0[n] += ( int )r0[n] * kptr[n];
sum0[n] += r0[n] * kptr[n];
}
kptr += 4;
r0 += 4;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册