diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 38aab11bc6519f869913fe3ed3f4fc99d8dac2ae..f81e7d613969dc639c5ff9e9528f421e4390d10c 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -473,3 +473,30 @@ AC_DEFUN([PGAC_HAVE_GCC__ATOMIC_INT64_CAS], if test x"$pgac_cv_gcc_atomic_int64_cas" = x"yes"; then AC_DEFINE(HAVE_GCC__ATOMIC_INT64_CAS, 1, [Define to 1 if you have __atomic_compare_exchange_n(int64 *, int *, int64).]) fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS + +# PGAC_SSE42_CRC32_INTRINSICS +# ----------------------- +# Check if the compiler supports _mm_crc32_u8 and _mm_crc32_u64 intrinsics. +# An optional compiler flag can be passed as argument (e.g. -msse4.2). If the +# intrinsics are supported, sets pgac_sse42_crc32_intrinsics, and CFLAGS_SSE42. +AC_DEFUN([PGAC_SSE42_CRC32_INTRINSICS], +[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_crc32_intrinsics_$1])])dnl +AC_CACHE_CHECK([for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=$1], [Ac_cachevar], +[pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS $1" +ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +AC_TRY_LINK([#include ], + [unsigned int crc = 0; + crc = _mm_crc32_u8(crc, 0); + crc = (unsigned int) _mm_crc32_u64(crc, 0);], + [Ac_cachevar=yes], + [Ac_cachevar=no]) +ac_c_werror_flag=$ac_save_c_werror_flag +CFLAGS="$pgac_save_CFLAGS"]) +if test x"$Ac_cachevar" = x"yes"; then + CFLAGS_SSE42="$1" + pgac_sse42_crc32_intrinsics=yes +fi +undefine([Ac_cachevar])dnl +])# PGAC_SSE42_CRC32_INTRINSICS diff --git a/configure b/configure index 640ffc764e1d1cf6e2659b06059bfd3ea57e5a7a..145056a55d164c58add5a7368df8f7cbd26acc5e 100755 --- a/configure +++ b/configure @@ -650,6 +650,8 @@ MSGMERGE MSGFMT_FLAGS MSGFMT HAVE_POSIX_SIGNALS +PG_CRC32C_OBJS +CFLAGS_SSE42 LDAP_LIBS_BE LDAP_LIBS_FE PTHREAD_CFLAGS @@ -14095,6 +14097,216 @@ $as_echo "#define HAVE_GCC__ATOMIC_INT64_CAS 1" >>confdefs.h fi + +# Check for x86 cpuid instruction +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __get_cpuid" >&5 +$as_echo_n "checking for __get_cpuid... " >&6; } +if ${pgac_cv__get_cpuid+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +unsigned int exx[4] = {0, 0, 0, 0}; + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv__get_cpuid="yes" +else + pgac_cv__get_cpuid="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__get_cpuid" >&5 +$as_echo "$pgac_cv__get_cpuid" >&6; } +if test x"$pgac_cv__get_cpuid" = x"yes"; then + +$as_echo "#define HAVE__GET_CPUID 1" >>confdefs.h + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __cpuid" >&5 +$as_echo_n "checking for __cpuid... " >&6; } +if ${pgac_cv__cpuid+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +unsigned int exx[4] = {0, 0, 0, 0}; + __get_cpuid(exx[0], 1); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv__cpuid="yes" +else + pgac_cv__cpuid="no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__cpuid" >&5 +$as_echo "$pgac_cv__cpuid" >&6; } +if test x"$pgac_cv__cpuid" = x"yes"; then + +$as_echo "#define HAVE__CPUID 1" >>confdefs.h + +fi + +# Check for Intel SSE 4.2 intrinsics to do CRC calculations. +# +# First check if the _mm_crc32_u8 and _mmcrc32_u64 intrinsics can be used +# with the default compiler flags. If not, check if adding the -msse4.2 +# flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=" >&5 +$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=... " >&6; } +if ${pgac_cv_sse42_crc32_intrinsics_+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS " +ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +unsigned int crc = 0; + crc = _mm_crc32_u8(crc, 0); + crc = (unsigned int) _mm_crc32_u64(crc, 0); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_sse42_crc32_intrinsics_=yes +else + pgac_cv_sse42_crc32_intrinsics_=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +ac_c_werror_flag=$ac_save_c_werror_flag +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics_" >&5 +$as_echo "$pgac_cv_sse42_crc32_intrinsics_" >&6; } +if test x"$pgac_cv_sse42_crc32_intrinsics_" = x"yes"; then + CFLAGS_SSE42="" + pgac_sse42_crc32_intrinsics=yes +fi + +if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=-msse4.2" >&5 +$as_echo_n "checking for _mm_crc32_u8 and _mm_crc32_u64 with CFLAGS=-msse4.2... " >&6; } +if ${pgac_cv_sse42_crc32_intrinsics__msse4_2+:} false; then : + $as_echo_n "(cached) " >&6 +else + pgac_save_CFLAGS=$CFLAGS +CFLAGS="$pgac_save_CFLAGS -msse4.2" +ac_save_c_werror_flag=$ac_c_werror_flag +ac_c_werror_flag=yes +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +unsigned int crc = 0; + crc = _mm_crc32_u8(crc, 0); + crc = (unsigned int) _mm_crc32_u64(crc, 0); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_sse42_crc32_intrinsics__msse4_2=yes +else + pgac_cv_sse42_crc32_intrinsics__msse4_2=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +ac_c_werror_flag=$ac_save_c_werror_flag +CFLAGS="$pgac_save_CFLAGS" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_crc32_intrinsics__msse4_2" >&5 +$as_echo "$pgac_cv_sse42_crc32_intrinsics__msse4_2" >&6; } +if test x"$pgac_cv_sse42_crc32_intrinsics__msse4_2" = x"yes"; then + CFLAGS_SSE42="-msse4.2" + pgac_sse42_crc32_intrinsics=yes +fi + +fi + + +# Select CRC-32C implementation. +# +# If the SSE 4.2 intrinsics are available without extra CFLAGS, then use them +# always. If they require extra CFLAGS, compile both implementations and +# select which one to use at runtime, depending on whether SSE 4.2 is +# supported by the processor we're running on. +# +# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 +# in the template or configure command line. +if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then + if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$CFLAGS_SSE42" = x"" ; then + USE_SSE42_CRC32C=1 + else + # the CPUID instruction is needed for the runtime check. + if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then + USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 + else + USE_SLICING_BY_8_CRC32C=1 + fi + fi +fi + +# Set PG_CRC32C_OBJS appropriately depending on the selected implementation. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking which CRC-32C implementation to use" >&5 +$as_echo_n "checking which CRC-32C implementation to use... " >&6; } +if test x"$USE_SSE42_CRC32C" = x"1"; then + +$as_echo "#define USE_SSE42_CRC32C 1" >>confdefs.h + + PG_CRC32C_OBJS="pg_crc32c_sse42.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2" >&5 +$as_echo "SSE 4.2" >&6; } +else + if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then + +$as_echo "#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK 1" >>confdefs.h + + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5 +$as_echo "SSE 4.2 with runtime check" >&6; } + else + +$as_echo "#define USE_SLICING_BY_8_CRC32C 1" >>confdefs.h + + PG_CRC32C_OBJS="pg_crc32c_sb8.o" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5 +$as_echo "slicing-by-8" >&6; } + fi +fi + + + +# Check that POSIX signals are available if thread safety is enabled. if test "$PORTNAME" != "win32" then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for POSIX signal interface" >&5 diff --git a/configure.in b/configure.in index 1f958cff1a8e2bb6839c436909f6204de46dc4d2..96efdafcbb32c8e238ed22e3454a9410bbb31106 100644 --- a/configure.in +++ b/configure.in @@ -1790,6 +1790,84 @@ PGAC_HAVE_GCC__SYNC_INT64_CAS PGAC_HAVE_GCC__ATOMIC_INT32_CAS PGAC_HAVE_GCC__ATOMIC_INT64_CAS + +# Check for x86 cpuid instruction +AC_CACHE_CHECK([for __get_cpuid], [pgac_cv__get_cpuid], +[AC_TRY_LINK([#include ], + [unsigned int exx[4] = {0, 0, 0, 0}; + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); + ], + [pgac_cv__get_cpuid="yes"], + [pgac_cv__get_cpuid="no"])]) +if test x"$pgac_cv__get_cpuid" = x"yes"; then + AC_DEFINE(HAVE__GET_CPUID, 1, [Define to 1 if you have __get_cpuid.]) +fi + +AC_CACHE_CHECK([for __cpuid], [pgac_cv__cpuid], +[AC_TRY_LINK([#include ], + [unsigned int exx[4] = {0, 0, 0, 0}; + __get_cpuid(exx[0], 1); + ], + [pgac_cv__cpuid="yes"], + [pgac_cv__cpuid="no"])]) +if test x"$pgac_cv__cpuid" = x"yes"; then + AC_DEFINE(HAVE__CPUID, 1, [Define to 1 if you have __cpuid.]) +fi + +# Check for Intel SSE 4.2 intrinsics to do CRC calculations. +# +# First check if the _mm_crc32_u8 and _mmcrc32_u64 intrinsics can be used +# with the default compiler flags. If not, check if adding the -msse4.2 +# flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required. +PGAC_SSE42_CRC32_INTRINSICS([]) +if test x"$pgac_sse42_crc32_intrinsics" != x"yes"; then + PGAC_SSE42_CRC32_INTRINSICS([-msse4.2]) +fi +AC_SUBST(CFLAGS_SSE42) + +# Select CRC-32C implementation. +# +# If the SSE 4.2 intrinsics are available without extra CFLAGS, then use them +# always. If they require extra CFLAGS, compile both implementations and +# select which one to use at runtime, depending on whether SSE 4.2 is +# supported by the processor we're running on. +# +# You can override this logic by setting the appropriate USE_*_CRC32 flag to 1 +# in the template or configure command line. +if test x"$USE_SSE42_CRC32C" = x"" && test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"" && test x"$USE_SLICING_BY_8_CRC32C" = x""; then + if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && test x"$CFLAGS_SSE42" = x"" ; then + USE_SSE42_CRC32C=1 + else + # the CPUID instruction is needed for the runtime check. + if test x"$pgac_sse42_crc32_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then + USE_SSE42_CRC32C_WITH_RUNTIME_CHECK=1 + else + USE_SLICING_BY_8_CRC32C=1 + fi + fi +fi + +# Set PG_CRC32C_OBJS appropriately depending on the selected implementation. +AC_MSG_CHECKING([which CRC-32C implementation to use]) +if test x"$USE_SSE42_CRC32C" = x"1"; then + AC_DEFINE(USE_SSE42_CRC32C, 1, [Define to 1 use Intel SSE 4.2 CRC instructions.]) + PG_CRC32C_OBJS="pg_crc32c_sse42.o" + AC_MSG_RESULT(SSE 4.2) +else + if test x"$USE_SSE42_CRC32C_WITH_RUNTIME_CHECK" = x"1"; then + AC_DEFINE(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check.]) + PG_CRC32C_OBJS="pg_crc32c_sse42.o pg_crc32c_sb8.o pg_crc32c_choose.o" + AC_MSG_RESULT(SSE 4.2 with runtime check) + else + AC_DEFINE(USE_SLICING_BY_8_CRC32C, 1, [Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check.]) + PG_CRC32C_OBJS="pg_crc32c_sb8.o" + AC_MSG_RESULT(slicing-by-8) + fi +fi +AC_SUBST(PG_CRC32C_OBJS) + + +# Check that POSIX signals are available if thread safety is enabled. if test "$PORTNAME" != "win32" then PGAC_FUNC_POSIX_SIGNALS diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 7c39d8272467f0a4b76feca11530106021b3664d..4b06fc2d96247750b6fd6ce32f3118e6d8370b69 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -225,6 +225,7 @@ GCC = @GCC@ SUN_STUDIO_CC = @SUN_STUDIO_CC@ CFLAGS = @CFLAGS@ CFLAGS_VECTOR = @CFLAGS_VECTOR@ +CFLAGS_SSE42 = @CFLAGS_SSE42@ # Kind-of compilers @@ -548,6 +549,9 @@ endif LIBOBJS = @LIBOBJS@ +# files needed for the chosen CRC-32C implementation +PG_CRC32C_OBJS = @PG_CRC32C_OBJS@ + LIBS := -lpgcommon -lpgport $(LIBS) # to make ws2_32.lib the last library diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 202c51a34a52bca7cb5a1f33223ccbb16e42a87e..5688f750af9b212a1f89a9a062bab9e223c0cab0 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -675,6 +675,12 @@ /* Define to 1 if your compiler understands __builtin_unreachable. */ #undef HAVE__BUILTIN_UNREACHABLE +/* Define to 1 if you have __cpuid. */ +#undef HAVE__CPUID + +/* Define to 1 if you have __get_cpuid. */ +#undef HAVE__GET_CPUID + /* Define to 1 if your compiler understands _Static_assert. */ #undef HAVE__STATIC_ASSERT @@ -818,6 +824,15 @@ /* Use replacement snprintf() functions. */ #undef USE_REPL_SNPRINTF +/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */ +#undef USE_SLICING_BY_8_CRC32C + +/* Define to 1 use Intel SSE 4.2 CRC instructions. */ +#undef USE_SSE42_CRC32C + +/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */ +#undef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK + /* Define to select SysV-style semaphores. */ #undef USE_SYSV_SEMAPHORES diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32 index 1baf64f00561c3c406e2d3b246c0dbb95b6234de..d9fa711ab57a03a39d7153ba3130fe90c319a0bf 100644 --- a/src/include/pg_config.h.win32 +++ b/src/include/pg_config.h.win32 @@ -6,8 +6,8 @@ * * HAVE_CBRT, HAVE_FUNCNAME_FUNC, HAVE_GETOPT, HAVE_GETOPT_H, HAVE_INTTYPES_H, * HAVE_GETOPT_LONG, HAVE_LOCALE_T, HAVE_RINT, HAVE_STRINGS_H, HAVE_STRTOLL, - * HAVE_STRTOULL, HAVE_STRUCT_OPTION, ENABLE_THREAD_SAFETY, - * PG_USE_INLINE, inline + * HAVE_STRTOULL, HAVE_STRUCT_OPTION, ENABLE_THREAD_SAFETY, PG_USE_INLINE, + * inline, USE_SSE42_CRC32C_WITH_RUNTIME_CHECK */ /* Define to the type of arg 1 of 'accept' */ @@ -529,6 +529,12 @@ /* Define to 1 if your compiler understands __builtin_unreachable. */ /* #undef HAVE__BUILTIN_UNREACHABLE */ +/* Define to 1 if you have __cpuid. */ +#define HAVE__CPUID 1 + +/* Define to 1 if you have __get_cpuid. */ +#undef HAVE__GET_CPUID + /* Define to 1 if your compiler understands _Static_assert. */ /* #undef HAVE__STATIC_ASSERT */ @@ -639,6 +645,19 @@ /* Use replacement snprintf() functions. */ #define USE_REPL_SNPRINTF 1 +/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */ +#if (_MSC_VER < 1500) +#define USE_SLICING_BY_8_CRC32C 1 +#end + +/* Define to 1 use Intel SSE 4.2 CRC instructions. */ +/* #undef USE_SSE42_CRC32C */ + +/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */ +#if (_MSC_VER >= 1500) +#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK +#endif + /* Define to select SysV-style semaphores. */ /* #undef USE_SYSV_SEMAPHORES */ diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index d07c0cb623d0e49a537133edb880eb51b83dad60..b14d194fb334c51d6c6dd398bf7785f9901b1d42 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -3,6 +3,25 @@ * pg_crc32c.h * Routines for computing CRC-32C checksums. * + * The speed of CRC-32C calculation has a big impact on performance, so we + * jump through some hoops to get the best implementation for each + * platform. Some CPU architectures have special instructions for speeding + * up CRC calculations (e.g. Intel SSE 4.2), on other platforms we use the + * Slicing-by-8 algorithm which uses lookup tables. + * + * The public interface consists of four macros: + * + * INIT_CRC32C(crc) + * Initialize a CRC accumulator + * + * COMP_CRC32C(crc, data, len) + * Accumulate some (more) bytes into a CRC + * + * FIN_CRC32C(crc) + * Finish a CRC calculation + * + * EQ_CRC32C(c1, c2) + * Check for equality of two CRCs. * * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -16,9 +35,32 @@ typedef uint32 pg_crc32c; +/* The INIT and EQ macros are the same for all implementations. */ #define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF) #define EQ_CRC32C(c1, c2) ((c1) == (c2)) +#if defined(USE_SSE42_CRC32C) +/* Use SSE4.2 instructions. */ +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); + +#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) +/* + * Use SSE4.2 instructions, but perform a runtime check first to check that + * they are available. + */ +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); + +#else /* * Use slicing-by-8 algorithm. * @@ -46,4 +88,6 @@ typedef uint32 pg_crc32c; extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +#endif + #endif /* PG_CRC32C_H */ diff --git a/src/port/Makefile b/src/port/Makefile index d1c9c8a9877179577f2b63bfbcb53ffef1b2bbb9..bc9b63add0479459d146550c0338e1a22a478400 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -30,10 +30,10 @@ include $(top_builddir)/src/Makefile.global override CPPFLAGS := -I$(top_builddir)/src/port -DFRONTEND $(CPPFLAGS) LIBS += $(PTHREAD_LIBS) -OBJS = $(LIBOBJS) chklocale.o erand48.o inet_net_ntop.o \ +OBJS = $(LIBOBJS) $(PG_CRC32C_OBJS) chklocale.o erand48.o inet_net_ntop.o \ noblock.o path.o pgcheckdir.o pgmkdirp.o pgsleep.o \ pgstrcasecmp.o pqsignal.o \ - qsort.o qsort_arg.o quotes.o sprompt.o tar.o thread.o pg_crc32c_sb8.o + qsort.o qsort_arg.o quotes.o sprompt.o tar.o thread.o # foo_srv.o and foo.o are both built from foo.c, but only foo.o has -DFRONTEND OBJS_SRV = $(OBJS:%.o=%_srv.o) @@ -57,6 +57,10 @@ libpgport.a: $(OBJS) # thread.o needs PTHREAD_CFLAGS (but thread_srv.o does not) thread.o: CFLAGS+=$(PTHREAD_CFLAGS) +# pg_crc32c_sse42.o and its _srv.o version need CFLAGS_SSE42 +pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42) +pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42) + # # Server versions of object files # diff --git a/src/port/pg_crc32c_choose.c b/src/port/pg_crc32c_choose.c new file mode 100644 index 0000000000000000000000000000000000000000..ba0d1670f82bc9b540da958531b2374c4208d8e7 --- /dev/null +++ b/src/port/pg_crc32c_choose.c @@ -0,0 +1,63 @@ +/*------------------------------------------------------------------------- + * + * pg_crc32c_choose.c + * Choose which CRC-32C implementation to use, at runtime. + * + * Try to the special CRC instructions introduced in Intel SSE 4.2, + * if available on the platform we're running on, but fall back to the + * slicing-by-8 implementation otherwise. + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/pg_crc32c_choose.c + * + *------------------------------------------------------------------------- + */ + +#include "c.h" + +#ifdef HAVE__GET_CPUID +#include +#endif + +#ifdef HAVE__CPUID +#include +#endif + +#include "port/pg_crc32c.h" + +static bool +pg_crc32c_sse42_available(void) +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(HAVE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); +#else +#error cpuid instruction not available +#endif + + return (exx[2] & (1 << 20)) != 0; /* SSE 4.2 */ +} + +/* + * This gets called on the first call. It replaces the function pointer + * so that subsequent calls are routed directly to the chosen implementation. + */ +static pg_crc32c +pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) +{ + if (pg_crc32c_sse42_available()) + pg_comp_crc32c = pg_comp_crc32c_sse42; + else + pg_comp_crc32c = pg_comp_crc32c_sb8; + + return pg_comp_crc32c(crc, data, len); +} + +pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose; diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c new file mode 100644 index 0000000000000000000000000000000000000000..b6107103bef5d32f0dbf2ec8123b5c8f084119e0 --- /dev/null +++ b/src/port/pg_crc32c_sse42.c @@ -0,0 +1,52 @@ +/*------------------------------------------------------------------------- + * + * pg_crc32c_sse42.c + * Compute CRC-32C checksum using Intel SSE 4.2 instructions. + * + * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/port/pg_crc32c_sse42.c + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "port/pg_crc32c.h" + +#include + +pg_crc32c +pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) +{ + const unsigned char *p = data; + const uint64 *p8; + + /* + * Process eight bytes of data at a time. + * + * NB: We do unaligned 8-byte accesses here. The Intel architecture + * allows that, and performance testing didn't show any performance + * gain from aligning the beginning address. + */ + p8 = (const uint64 *) p; + while (len >= 8) + { + crc = (uint32) _mm_crc32_u64(crc, *p8++); + len -= 8; + } + + /* + * Handle any remaining bytes one at a time. + */ + p = (const unsigned char *) p8; + while (len > 0) + { + crc = _mm_crc32_u8(crc, *p++); + len--; + } + + return crc; +} diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index b2c0dfbd7b7089f0f12233af19bf0b828c89952e..39281db901173ed5ce21dba226b9e45c476710ec 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -92,10 +92,21 @@ sub mkvcbuild pgcheckdir.c pgmkdirp.c pgsleep.c pgstrcasecmp.c pqsignal.c mkdtemp.c qsort.c qsort_arg.c quotes.c system.c sprompt.c tar.c thread.c getopt.c getopt_long.c dirent.c - win32env.c win32error.c win32setlocale.c pg_crc32c_sb8.c); + win32env.c win32error.c win32setlocale.c); push(@pgportfiles, 'rint.c') if ($vsVersion < '12.00'); + if ($vsVersion >= '9.00') + { + push(@pgportfiles, 'pg_crc32c_choose.c'); + push(@pgportfiles, 'pg_crc32c_sse42.c'); + push(@pgportfiles, 'pg_crc32c_sb8.c'); + } + else + { + push(@pgportfiles, 'pg_crc32c_sb8.c') + } + our @pgcommonallfiles = qw( exec.c pg_lzcompress.c pgfnames.c psprintf.c relpath.c rmtree.c string.c username.c wait_error.c);