From 8a96e11ae33dbe0e7e795009e799db768419fcef Mon Sep 17 00:00:00 2001 From: clovking Date: Thu, 13 Aug 2020 10:06:58 +0800 Subject: [PATCH] fix bug : x86 AVX/SSE winograd when output_channels %8 != 0 (#369) Co-authored-by: qwang --- src/dev/cpu/op/conv/x86/wino_conv_kernel_x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dev/cpu/op/conv/x86/wino_conv_kernel_x86.c b/src/dev/cpu/op/conv/x86/wino_conv_kernel_x86.c index 2ff30687..ecff883a 100644 --- a/src/dev/cpu/op/conv/x86/wino_conv_kernel_x86.c +++ b/src/dev/cpu/op/conv/x86/wino_conv_kernel_x86.c @@ -937,7 +937,7 @@ void conv3x3s1_winograd43_sse(float* bottom_blob, float* top_blob, float* kernel #else _sum0 = _mm_add_ps(_sum0, _mm_mul_ps(_r0, _k0)); #endif - kptr += 16; + kptr += 4; r0 += 4; } _mm_storeu_ps(output0_tm, _sum0); @@ -948,7 +948,7 @@ void conv3x3s1_winograd43_sse(float* bottom_blob, float* top_blob, float* kernel { for (int n = 0; n < 4; n++) { - sum0[n] += ( int )r0[n] * kptr[n]; + sum0[n] += r0[n] * kptr[n]; } kptr += 4; r0 += 4; -- GitLab