Merge pull request #9717 from alalek:fix_fma

0f0f5652 · Alexander Alekhin · d4c905bf · 3dee92ec · 0f0f5652 · 0f0f5652
显示空白变更内容
内联并排

Showing with 2 addition and 2 deletion

modules/core/src/stat.cpp modules/core/src/stat.cpp +1 -1

modules/dnn/src/layers/layers_common.simd.hpp modules/dnn/src/layers/layers_common.simd.hpp +1 -1

未找到文件。
--- a/modules/core/src/stat.cpp
+++ b/modules/core/src/stat.cpp
@@ -4365,7 +4365,7 @@ float normL2Sqr_(const float* a, const float* b, int n)
    for( ; j <= n - 8; j += 8 )
    {
        __m256 t0 = _mm256_sub_ps(_mm256_loadu_ps(a + j), _mm256_loadu_ps(b + j));
-#ifdef CV_FMA3
+#if CV_FMA3
        d0 = _mm256_fmadd_ps(t0, t0, d0);
 #else
        d0 = _mm256_add_ps(d0, _mm256_mul_ps(t0, t0));

--- a/modules/dnn/src/layers/layers_common.simd.hpp
+++ b/modules/dnn/src/layers/layers_common.simd.hpp
@@ -59,7 +59,7 @@ void fastGEMM( const float* aptr, size_t astep, const float* bptr,

 #if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX

-#if !CV_FMA // AVX workaround
+#if !CV_FMA3 // AVX workaround
 #undef _mm256_fmadd_ps
 #define _mm256_fmadd_ps(a, b, c) _mm256_add_ps(c, _mm256_mul_ps(a, b))
 #endif