未验证 提交 a25c31b6 编写于 作者: C Connor Holmes 提交者: GitHub

Update AVX512 Detection (#2621)

* Update cpuinfo AVX512 detection

* Missing conversion from `_mm256` to `_mm256i`
Co-authored-by: NOlatunji Ruwase <olruwase@microsoft.com>
上级 0f0e38c5
......@@ -21,7 +21,8 @@
#define SIMD_DIV(x, y) _mm512_div_ps(x, y)
#define SIMD_WIDTH 16
#define SIMD_LOAD2(x, h) ((h) ? _mm512_cvtph_ps(_mm256_loadu_ps(x)) : _mm512_loadu_ps(x))
#define SIMD_LOAD2(x, h) \
((h) ? _mm512_cvtph_ps(_mm256_castps_si256(_mm256_loadu_ps(x))) : _mm512_loadu_ps(x))
#define SIMD_STORE2(x, d, h) \
((h) ? _mm256_store_ps(x, _mm256_castsi256_ps(_mm512_cvtps_ph(d, _MM_FROUND_TO_NEAREST_INT))) \
: _mm512_storeu_ps(x, d))
......
......@@ -376,6 +376,8 @@ class OpBuilder(ABC):
cpu_info['arch'] = 'X86_64'
if 'avx512' in result:
cpu_info['flags'] += 'avx512,'
elif 'avx512f' in result:
cpu_info['flags'] += 'avx512f,'
if 'avx2' in result:
cpu_info['flags'] += 'avx2'
elif 'ppc64le' in result:
......@@ -402,7 +404,7 @@ class OpBuilder(ABC):
return '-D__SCALAR__'
if cpu_info['arch'] == 'X86_64':
if 'avx512' in cpu_info['flags']:
if 'avx512' in cpu_info['flags'] or 'avx512f' in cpu_info['flags']:
return '-D__AVX512__'
elif 'avx2' in cpu_info['flags']:
return '-D__AVX256__'
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册