提交 97549a4f 编写于 作者: Y Yihua Xu 提交者: tensor-tang

[CPU] Fix the compiling issue with AVX512F macro. (#18634)

上级 256ba7cb
......@@ -46,7 +46,7 @@ void CRFDecoding(const int seq_len, const float* x, const float* w,
x_content = _mm512_loadu_ps(x + i_offset);
alpha_content = _mm512_add_ps(w_content, x_content);
// Save the alpha value.
_mm512_storeu_ps(alpha_value + i_offset, alpha_content);
_mm512_storeu_ps(alpha + i_offset, alpha_content);
#else
// AVX or AVX2
// weights, input and alpha values.
......@@ -131,13 +131,12 @@ void CRFDecoding(const int seq_len, const float* x, const float* w,
}
/* Update the alpha and track values. */
#ifdef __AVX512F__
__m512 x_content =
_mm512_loadu_ps(x + seq_offset + this->num_ + j_offset);
__m512 x_content = _mm512_loadu_ps(x + seq_offset + tag_num + j_offset);
max_score = _mm512_add_ps(max_score, x_content);
_mm512_storeu_ps(alpha + seq_offset + this->num_ + j_offset, max_score);
_mm512_storeu_si512(reinterpret_cast<__m512i*>(track + seq_offset +
this->num_ + j_offset),
max_j);
_mm512_storeu_ps(alpha + seq_offset + tag_num + j_offset, max_score);
_mm512_storeu_si512(
reinterpret_cast<__m512i*>(track + seq_offset + tag_num + j_offset),
max_j);
#else
__m256 x_content = _mm256_loadu_ps(x + seq_offset + tag_num + j_offset);
max_score = _mm256_add_ps(max_score, x_content);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册