diff --git a/configure b/configure index 145056a55d164c58add5a7368df8f7cbd26acc5e..640314116371c00593cb5932aa1e62a42cf5b3e7 100755 --- a/configure +++ b/configure @@ -14169,7 +14169,7 @@ fi # Check for Intel SSE 4.2 intrinsics to do CRC calculations. # -# First check if the _mm_crc32_u8 and _mmcrc32_u64 intrinsics can be used +# First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used # with the default compiler flags. If not, check if adding the -msse4.2 # flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=" >&5 @@ -14254,23 +14254,49 @@ fi fi +# Are we targeting a processor that supports SSE 4.2? gcc, clang and icc all +# define __SSE4_2__ in that case. +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + +#ifndef __SSE4_2__ +#error __SSE4_2__ not defined +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + SSE4_2_TARGETED=1 +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + # Select CRC-32C implementation. # -# If the SSE 4.2 intrinsics are available without extra CFLAGS, then use them -# always. If they require extra CFLAGS, compile both implementations and -# select which one to use at runtime, depending on whether SSE 4.2 is -# supported by the processor we're running on. +# If we are targeting a processor that has SSE 4.2 instructions, we can use the +# special CRC instructions for calculating CRC-32C. If we're not targeting such +# a processor, but we can nevertheless produce code that uses the SSE +# intrinsics, perhaps with some extra CFLAGS, compile both implementations and +# select which one to use at runtime, depending on whether SSE 4.2 is supported +# by the processor we're running on. # # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 # in the template or configure command line. if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then - if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$CFLAGS_SSE42" = x"" ; then + if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then USE_SSE42_CRC32C=1 else # the CPUID instruction is needed for the runtime check. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 else + # fall back to slicing-by-8 algorithm which doesn't require any special + # CPU support. USE_SLICING_BY_8_CRC32C=1 fi fi diff --git a/configure.in b/configure.in index 96efdafcbb32c8e238ed22e3454a9410bbb31106..1cd9e1eb46fea48a091bda9e50f50b7d47e5a094 100644 --- a/configure.in +++ b/configure.in @@ -1816,7 +1816,7 @@ fi # Check for Intel SSE 4.2 intrinsics to do CRC calculations. # -# First check if the _mm_crc32_u8 and _mmcrc32_u64 intrinsics can be used +# First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used # with the default compiler flags. If not, check if adding the -msse4.2 # flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required. PGAC_SSE42_CRC32_INTRINSICS([]) @@ -1825,23 +1825,35 @@ if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then fi AC_SUBST(CFLAGS_SSE42) +# Are we targeting a processor that supports SSE 4.2? gcc, clang and icc all +# define __SSE4_2__ in that case. +AC_TRY_COMPILE([], [ +#ifndef __SSE4_2__ +#error __SSE4_2__ not defined +#endif +], [SSE4_2_TARGETED=1]) + # Select CRC-32C implementation. # -# If the SSE 4.2 intrinsics are available without extra CFLAGS, then use them -# always. If they require extra CFLAGS, compile both implementations and -# select which one to use at runtime, depending on whether SSE 4.2 is -# supported by the processor we're running on. +# If we are targeting a processor that has SSE 4.2 instructions, we can use the +# special CRC instructions for calculating CRC-32C. If we're not targeting such +# a processor, but we can nevertheless produce code that uses the SSE +# intrinsics, perhaps with some extra CFLAGS, compile both implementations and +# select which one to use at runtime, depending on whether SSE 4.2 is supported +# by the processor we're running on. # # You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 # in the template or configure command line. if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then - if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$CFLAGS_SSE42" = x"" ; then + if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then USE_SSE42_CRC32C=1 else # the CPUID instruction is needed for the runtime check. if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 else + # fall back to slicing-by-8 algorithm which doesn't require any special + # CPU support. USE_SLICING_BY_8_CRC32C=1 fi fi