diff --git a/paddle/fluid/operators/math/cpu_vec.h b/paddle/fluid/operators/math/cpu_vec.h index 4406a5587188eabb6933175010b4f053dbf6c661..8940a41424b01c975f1264ca309cc09fc3c7ae85 100644 --- a/paddle/fluid/operators/math/cpu_vec.h +++ b/paddle/fluid/operators/math/cpu_vec.h @@ -160,7 +160,7 @@ inline void vec_sum(const size_t n, const float* x, end = n & ~(block - 1); __m256 tmp = _mm256_setzero_ps(); for (i = 0; i < end; i += block) { - tmp = _mm256_add_ps(tmp, _mm256_load_ps(x + i)); + tmp = _mm256_add_ps(tmp, _mm256_loadu_ps(x + i)); } __m256 hsum = _mm256_hadd_ps(tmp, tmp);