[CPU] Fix the compiling issue with AVX512F macro. (#18634)

97549a4f · Yihua Xu · tensor-tang · 256ba7cb · 97549a4f
隐藏空白更改
内联并排

Showing with 6 addition and 7 deletion

paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc +6 -7

未找到文件。
--- a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc
+++ b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc
@@ -46,7 +46,7 @@ void CRFDecoding(const int seq_len, const float* x, const float* w,
    x_content = _mm512_loadu_ps(x + i_offset);
    alpha_content = _mm512_add_ps(w_content, x_content);
    // Save the alpha value.
-    _mm512_storeu_ps(alpha_value + i_offset, alpha_content);
+    _mm512_storeu_ps(alpha + i_offset, alpha_content);
 #else
    // AVX or AVX2
    // weights, input and alpha values.
@@ -131,13 +131,12 @@ void CRFDecoding(const int seq_len, const float* x, const float* w,
      }
 /* Update the alpha and track values. */
 #ifdef __AVX512F__
-      __m512 x_content =
+      __m512 x_content = _mm512_loadu_ps(x + seq_offset + tag_num + j_offset);
-          _mm512_loadu_ps(x + seq_offset + this->num_ + j_offset);
      max_score = _mm512_add_ps(max_score, x_content);
-      _mm512_storeu_ps(alpha + seq_offset + this->num_ + j_offset, max_score);
+      _mm512_storeu_ps(alpha + seq_offset + tag_num + j_offset, max_score);
-      _mm512_storeu_si512(reinterpret_cast<__m512i*>(track + seq_offset +
+      _mm512_storeu_si512(
-                                                     this->num_ + j_offset),
+          reinterpret_cast<__m512i*>(track + seq_offset + tag_num + j_offset),
-                          max_j);
+          max_j);
 #else
      __m256 x_content = _mm256_loadu_ps(x + seq_offset + tag_num + j_offset);
      max_score = _mm256_add_ps(max_score, x_content);