提交 0f0f5652 编写于 作者: A Alexander Alekhin

Merge pull request #9717 from alalek:fix_fma

......@@ -4365,7 +4365,7 @@ float normL2Sqr_(const float* a, const float* b, int n)
for( ; j <= n - 8; j += 8 )
{
__m256 t0 = _mm256_sub_ps(_mm256_loadu_ps(a + j), _mm256_loadu_ps(b + j));
#ifdef CV_FMA3
#if CV_FMA3
d0 = _mm256_fmadd_ps(t0, t0, d0);
#else
d0 = _mm256_add_ps(d0, _mm256_mul_ps(t0, t0));
......
......@@ -59,7 +59,7 @@ void fastGEMM( const float* aptr, size_t astep, const float* bptr,
#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX
#if !CV_FMA // AVX workaround
#if !CV_FMA3 // AVX workaround
#undef _mm256_fmadd_ps
#define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b))
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册