From bef76802de97c7ba917c4d397d9e7d648866c770 Mon Sep 17 00:00:00 2001 From: Glenn Randers-Pehrson Date: Thu, 5 Jan 2017 18:09:33 -0600 Subject: [PATCH] [libpng16] Moved SSE2 optimization code into the main libpng source directory. Configure libpng with "configure --enable-intel-sse" or compile libpng with "-DPNG_INTEL_SSE" in CPPFLAGS to enable it. This patch was previously applied to libpng-1.6.28rc03 but withdrawn to allow time for QA. --- ANNOUNCE | 7 +- CHANGES | 5 +- contrib/intel/INSTALL | 158 --------------- contrib/intel/intel_sse.patch | 190 ------------------ .../intel => intel}/filter_sse2_intrinsics.c | 9 +- {contrib/intel => intel}/intel_init.c | 11 +- png.h | 11 +- pngpriv.h | 56 ++++++ 8 files changed, 80 insertions(+), 367 deletions(-) delete mode 100644 contrib/intel/INSTALL delete mode 100644 contrib/intel/intel_sse.patch rename {contrib/intel => intel}/filter_sse2_intrinsics.c (98%) rename {contrib/intel => intel}/intel_init.c (86%) diff --git a/ANNOUNCE b/ANNOUNCE index 0ece79787..5722f7907 100644 --- a/ANNOUNCE +++ b/ANNOUNCE @@ -1,4 +1,4 @@ -Libpng 1.6.29beta01 - January 5, 2017 +Libpng 1.6.29beta01 - January 6, 2017 This is not intended to be a public release. It will be replaced within a few weeks by a public version or by another test version. @@ -25,8 +25,11 @@ Other information: Changes since the last public release (1.6.28): -Version 1.6.29beta01 [January 5, 2017] +Version 1.6.29beta01 [January 6, 2017] Readded "include(GNUInstallDirs)" to CMakeLists.txt (Gianfranco Costamagna). + Moved SSE2 optimization code into the main libpng source directory. + Configure libpng with "configure --enable-intel-sse" or compile + libpng with "-DPNG_INTEL_SSE" in CPPFLAGS to enable it. Send comments/corrections/commendations to png-mng-implement at lists.sf.net (subscription required; visit diff --git a/CHANGES b/CHANGES index 546516580..789adb9e8 100644 --- a/CHANGES +++ b/CHANGES @@ -5794,8 +5794,11 @@ Version 1.6.28rc03 [January 4, 2017] Version 1.6.28 [January 5, 2017] No changes. -Version 1.6.29beta01 [January 5, 2017] +Version 1.6.29beta01 [January 6, 2017] Readded "include(GNUInstallDirs)" to CMakeLists.txt (Gianfranco Costamagna). + Moved SSE2 optimization code into the main libpng source directory. + Configure libpng with "configure --enable-intel-sse" or compile + libpng with "-DPNG_INTEL_SSE" in CPPFLAGS to enable it. Send comments/corrections/commendations to png-mng-implement at lists.sf.net (subscription required; visit diff --git a/contrib/intel/INSTALL b/contrib/intel/INSTALL deleted file mode 100644 index cd5cdd94e..000000000 --- a/contrib/intel/INSTALL +++ /dev/null @@ -1,158 +0,0 @@ -Enabling SSE support - -Copyright (c) 2016 Google, Inc. -Written by Mike Klein, Matt Sarett - -This INSTALL file written by Glenn Randers-Pehrson, 2016. - -If you have moved intel_init.c and filter_sse2_intrinsics.c to a different -directory, be sure to update the '#include "../../pngpriv.h"' line in both -files if necessary to point to the correct relative location of pngpriv.h -with respect to the new location of those files. - -To enable SSE support in libpng, follow the instructions in I, II, or III, -below: - -I. Using patched "configure" scripts: - -First, apply intel_sse.patch in your build directory. - - patch -i contrib/intel/intel_sse.patch -p1 - -Then, if you are not building in a new GIT clone, e.g., in a tar -distribution, remove any existing pre-built configure scripts: - - ./configure --enable-maintainer-mode - make maintainer-clean - ./autogen.sh --maintainer --clean - -Finally, configure libpng with -DPNG_INTEL_SSE in CPPFLAGS: - - ./autogen.sh --maintainer - CPPFLAGS="-DPNG_INTEL_SSE" ./configure [options] - make CPPFLAGS="-DPNG_INTEL_SSE" [options] - make - -II. Using a custom makefile: - -If you are using a custom makefile makefile, you will have to update it -manually to include contrib/intel/*.o in the dependencies, and to define -PNG_INTEL_SSE. - -III. Using manually updated "configure" scripts: - -If you prefer, manually edit pngpriv.h, configure.ac, and Makefile.am, -following the instructions below, then follow the instructions in -section II of INSTALL in the main libpng directory, then configure libpng -with -DPNG_INTEL_SSE in CPPFLAGS. - -1. Add the following code to configure.ac under HOST SPECIFIC OPTIONS -directly beneath the section for ARM: - ------------------cut---------------- -# INTEL -# ===== -# -# INTEL SSE (SIMD) support. - -AC_ARG_ENABLE([intel-sse], - AS_HELP_STRING([[[--enable-intel-sse]]], - [Enable Intel SSE optimizations: =no/off, yes/on:] - [no/off: disable the optimizations;] - [yes/on: enable the optimizations.] - [If not specified: determined by the compiler.]), - [case "$enableval" in - no|off) - # disable the default enabling: - AC_DEFINE([PNG_INTEL_SSE_OPT], [0], - [Disable Intel SSE optimizations]) - # Prevent inclusion of the assembler files below: - enable_intel_sse=no;; - yes|on) - AC_DEFINE([PNG_INTEL_SSE_OPT], [1], - [Enable Intel SSE optimizations]);; - *) - AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value]) - esac]) - -# Add Intel specific files to all builds where the host_cpu is Intel ('x86*') -# or where Intel optimizations were explicitly requested (this allows a -# fallback if a future host CPU does not match 'x86*') -AM_CONDITIONAL([PNG_INTEL_SSE], - [test "$enable_intel_sse" != 'no' && - case "$host_cpu" in - i?86|x86_64) :;; - *) test "$enable_intel_sse" != '';; - esac]) ------------------cut---------------- - -2. Add the following code to Makefile.am under HOST SPECIFIC OPTIONS -directly beneath the "if PNG_ARM_NEON ... endif" statement: - ------------------cut---------------- -if PNG_INTEL_SSE -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\ - contrib/intel/filter_sse2_intrinsics.c -endif ------------------cut---------------- - -3. Add the following lines to pngpriv.h, following the PNG_ARM_NEON_OPT -code: - ------------------cut---------------- -#ifndef PNG_INTEL_SSE_OPT -# ifdef PNG_INTEL_SSE - /* Only check for SSE if the build configuration has been modified to - * enable SSE optimizations. This means that these optimizations will - * be off by default. See contrib/intel for more details. - */ -# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ - defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -# define PNG_INTEL_SSE_OPT 1 -# endif -# endif -#endif - -#if PNG_INTEL_SSE_OPT > 0 -# ifndef PNG_INTEL_SSE_IMPLEMENTATION -# if defined(__SSE4_1__) || defined(__AVX__) - /* We are not actually using AVX, but checking for AVX is the best - way we can detect SSE4.1 and SSSE3 on MSVC. - */ -# define PNG_INTEL_SSE_IMPLEMENTATION 3 -# elif defined(__SSSE3__) -# define PNG_INTEL_SSE_IMPLEMENTATION 2 -# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -# define PNG_INTEL_SSE_IMPLEMENTATION 1 -# else -# define PNG_INTEL_SSE_IMPLEMENTATION 0 -# endif -# endif - -# if PNG_INTEL_SSE_IMPLEMENTATION > 0 -# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 -# endif -#endif - ------------------cut---------------- - -4. Add the following lines to pngpriv.h, following the prototype for -png_read_filter_row_paeth4_neon: - ------------------cut---------------- -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); - ------------------cut---------------- diff --git a/contrib/intel/intel_sse.patch b/contrib/intel/intel_sse.patch deleted file mode 100644 index 24d4883c7..000000000 --- a/contrib/intel/intel_sse.patch +++ /dev/null @@ -1,190 +0,0 @@ -diff --git a/configure.ac b/configure.ac ---- a/configure.ac 2016-08-29 11:46:27.000000000 -0400 -+++ b/configure.ac 2016-08-29 16:57:03.866355018 -0400 -@@ -386,16 +386,51 @@ AC_ARG_ENABLE([mips-msa], - # future host CPU does not match 'mips*') - - AM_CONDITIONAL([PNG_MIPS_MSA], - [test "$enable_mips_msa" != 'no' && - case "$host_cpu" in - mipsel*|mips64el*) :;; - esac]) - -+# INTEL -+# ===== -+# -+# INTEL SSE (SIMD) support. -+ -+AC_ARG_ENABLE([intel-sse], -+ AS_HELP_STRING([[[--enable-intel-sse]]], -+ [Enable Intel SSE optimizations: =no/off, yes/on:] -+ [no/off: disable the optimizations;] -+ [yes/on: enable the optimizations.] -+ [If not specified: determined by the compiler.]), -+ [case "$enableval" in -+ no|off) -+ # disable the default enabling: -+ AC_DEFINE([PNG_INTEL_SSE_OPT], [0], -+ [Disable Intel SSE optimizations]) -+ # Prevent inclusion of the assembler files below: -+ enable_intel_sse=no;; -+ yes|on) -+ AC_DEFINE([PNG_INTEL_SSE_OPT], [1], -+ [Enable Intel SSE optimizations]);; -+ *) -+ AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value]) -+ esac]) -+ -+# Add Intel specific files to all builds where the host_cpu is Intel ('x86*') -+# or where Intel optimizations were explicitly requested (this allows a -+# fallback if a future host CPU does not match 'x86*') -+AM_CONDITIONAL([PNG_INTEL_SSE], -+ [test "$enable_intel_sse" != 'no' && -+ case "$host_cpu" in -+ i?86|x86_64) :;; -+ *) test "$enable_intel_sse" != '';; -+ esac]) -+ - AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]]) - - # Config files, substituting as above - AC_CONFIG_FILES([Makefile libpng.pc:libpng.pc.in]) - AC_CONFIG_FILES([libpng-config:libpng-config.in], - [chmod +x libpng-config]) - - AC_OUTPUT -diff --git a/Makefile.am b/Makefile.am ---- a/Makefile.am 2016-08-29 11:46:27.000000000 -0400 -+++ b/Makefile.am 2016-08-29 16:57:45.955528215 -0400 -@@ -97,16 +97,21 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SO - arm/filter_neon.S arm/filter_neon_intrinsics.c - endif - - if PNG_MIPS_MSA - libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c\ - mips/filter_msa_intrinsics.c - endif - -+if PNG_INTEL_SSE -+libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\ -+ contrib/intel/filter_sse2_intrinsics.c -+endif -+ - nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h - - libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LDFLAGS = -no-undefined -export-dynamic \ - -version-number @PNGLIB_MAJOR@@PNGLIB_MINOR@:@PNGLIB_RELEASE@:0 - - if HAVE_LD_VERSION_SCRIPT - # Versioned symbols and restricted exports - if HAVE_SOLARIS_LD -diff --git a/pngpriv.h b/pngpriv.h ---- debug16/pngpriv.h 2016-08-30 10:46:36.000000000 -0400 -+++ libpng16/pngpriv.h 2016-08-30 11:57:25.672280202 -0400 -@@ -185,16 +185,52 @@ - #ifndef PNG_MIPS_MSA_OPT - # if defined(__mips_msa) && (__mips_isa_rev >= 5) && defined(PNG_ALIGNED_MEMORY_SUPPORTED) - # define PNG_MIPS_MSA_OPT 2 - # else - # define PNG_MIPS_MSA_OPT 0 - # endif - #endif - -+#ifndef PNG_INTEL_SSE_OPT -+# ifdef PNG_INTEL_SSE -+ /* Only check for SSE if the build configuration has been modified to -+ * enable SSE optimizations. This means that these optimizations will -+ * be off by default. See contrib/intel for more details. -+ */ -+# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ -+ defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ -+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -+# define PNG_INTEL_SSE_OPT 1 -+# endif -+# endif -+#endif -+ -+#if PNG_INTEL_SSE_OPT > 0 -+# ifndef PNG_INTEL_SSE_IMPLEMENTATION -+# if defined(__SSE4_1__) || defined(__AVX__) -+ /* We are not actually using AVX, but checking for AVX is the best -+ way we can detect SSE4.1 and SSSE3 on MSVC. -+ */ -+# define PNG_INTEL_SSE_IMPLEMENTATION 3 -+# elif defined(__SSSE3__) -+# define PNG_INTEL_SSE_IMPLEMENTATION 2 -+# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ -+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -+# define PNG_INTEL_SSE_IMPLEMENTATION 1 -+# else -+# define PNG_INTEL_SSE_IMPLEMENTATION 0 -+# endif -+# endif -+ -+# if PNG_INTEL_SSE_IMPLEMENTATION > 0 -+# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 -+# endif -+#endif -+ - #if PNG_MIPS_MSA_OPT > 0 - # define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_msa - # ifndef PNG_MIPS_MSA_IMPLEMENTATION - # if defined(__mips_msa) - # if defined(__clang__) - # elif defined(__GNUC__) - # if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 7) - # define PNG_MIPS_MSA_IMPLEMENTATION 2 -@@ -1251,16 +1287,31 @@ PNG_INTERNAL_FUNCTION(void,png_read_filt - PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_msa,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); - PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_msa,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); - PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); - #endif - -+#if PNG_INTEL_SSE_IMPLEMENTATION > 0 -+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop -+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop -+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop -+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop -+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop -+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop -+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -+#endif -+ - /* Choose the best filter to use and filter the row data */ - PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr, - png_row_infop row_info),PNG_EMPTY); - - #ifdef PNG_SEQUENTIAL_READ_SUPPORTED - PNG_INTERNAL_FUNCTION(void,png_read_IDAT_data,(png_structrp png_ptr, - png_bytep output, png_alloc_size_t avail_out),PNG_EMPTY); - /* Read 'avail_out' bytes of data from the IDAT stream. If the output buffer -@@ -1986,16 +2037,21 @@ PNG_INTERNAL_FUNCTION(void, PNG_FILTER_O - PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon, - (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); - #endif - - #if PNG_MIPS_MSA_OPT > 0 - PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_msa, - (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); - #endif -+ -+# if PNG_INTEL_SSE_IMPLEMENTATION > 0 -+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2, -+ (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); -+# endif - #endif - - PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr, - png_const_charp key, png_bytep new_key), PNG_EMPTY); - - /* Maintainer: Put new private prototypes here ^ */ - - #include "pngdebug.h" diff --git a/contrib/intel/filter_sse2_intrinsics.c b/intel/filter_sse2_intrinsics.c similarity index 98% rename from contrib/intel/filter_sse2_intrinsics.c rename to intel/filter_sse2_intrinsics.c index b02840c55..2534391c8 100644 --- a/contrib/intel/filter_sse2_intrinsics.c +++ b/intel/filter_sse2_intrinsics.c @@ -1,19 +1,18 @@ /* filter_sse2_intrinsics.c - SSE2 optimized filter functions * - * Copyright (c) 2016 Google, Inc. + * Copyright (c) 2016-2017 Glenn Randers-Pehrson * Written by Mike Klein and Matt Sarett - * Derived from arm/filter_neon_intrinsics.c, which was - * Copyright (c) 2014,2016 Glenn Randers-Pehrson + * Derived from arm/filter_neon_intrinsics.c * - * Last changed in libpng 1.6.24 [August 4, 2016] + * Last changed in libpng 1.6.29 [(PENDING RELEASE)] * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer * and license in png.h */ -#include "../../pngpriv.h" +#include "../pngpriv.h" #ifdef PNG_READ_SUPPORTED diff --git a/contrib/intel/intel_init.c b/intel/intel_init.c similarity index 86% rename from contrib/intel/intel_init.c rename to intel/intel_init.c index 328e90e9a..4472a9367 100644 --- a/contrib/intel/intel_init.c +++ b/intel/intel_init.c @@ -1,19 +1,18 @@ /* intel_init.c - SSE2 optimized filter functions * - * Copyright (c) 2016 Google, Inc. - * Written by Mike Klein and Matt Sarett - * Derived from arm/arm_init.c, which was - * Copyright (c) 2014,2016 Glenn Randers-Pehrson + * Copyright (c) 2016-2017 Glenn Randers-Pehrson + * Written by Mike Klein and Matt Sarett, Google, Inc. + * Derived from arm/arm_init.c * - * Last changed in libpng 1.6.22 [May 26, 2016] + * Last changed in libpng 1.6.29 [(PENDING RELEASE)] * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer * and license in png.h */ -#include "../../pngpriv.h" +#include "../pngpriv.h" #ifdef PNG_READ_SUPPORTED #if PNG_INTEL_SSE_IMPLEMENTATION > 0 diff --git a/png.h b/png.h index aef773466..cad9a825e 100644 --- a/png.h +++ b/png.h @@ -1,7 +1,7 @@ /* png.h - header file for PNG reference library * - * libpng version 1.6.29beta01, January 5, 2017 + * libpng version 1.6.29beta01, January 6, 2017 * * Copyright (c) 1998-2002,2004,2006-2017 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) @@ -12,7 +12,7 @@ * Authors and maintainers: * libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat * libpng versions 0.89, June 1996, through 0.96, May 1997: Andreas Dilger - * libpng versions 0.97, January 1998, through 1.6.29beta01, January 5, 2017: + * libpng versions 0.97, January 1998, through 1.6.29beta01, January 6, 2017: * Glenn Randers-Pehrson. * See also "Contributing Authors", below. */ @@ -25,7 +25,7 @@ * * This code is released under the libpng license. * - * libpng versions 1.0.7, July 1, 2000 through 1.6.29beta01, January 5, 2017 are + * libpng versions 1.0.7, July 1, 2000 through 1.6.29beta01, January 6, 2017 are * Copyright (c) 2000-2002, 2004, 2006-2017 Glenn Randers-Pehrson, are * derived from libpng-1.0.6, and are distributed according to the same * disclaimer and license as libpng-1.0.6 with the following individuals @@ -38,6 +38,7 @@ * Gilles Vollant * James Yu * Mandar Sahastrabuddhe + * Google Inc. * * and with the following additions to the disclaimer: * @@ -239,7 +240,7 @@ * Y2K compliance in libpng: * ========================= * - * January 5, 2017 + * January 6, 2017 * * Since the PNG Development group is an ad-hoc body, we can't make * an official declaration. @@ -308,7 +309,7 @@ /* Version information for png.h - this should match the version in png.c */ #define PNG_LIBPNG_VER_STRING "1.6.29beta01" -#define PNG_HEADER_VERSION_STRING " libpng version 1.6.29beta01 - January 5, 2017\n" +#define PNG_HEADER_VERSION_STRING " libpng version 1.6.29beta01 - January 6, 2017\n" #define PNG_LIBPNG_VER_SONUM 16 #define PNG_LIBPNG_VER_DLLNUM 16 diff --git a/pngpriv.h b/pngpriv.h index ed61165b2..50ff68b1c 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -190,6 +190,42 @@ # endif #endif +#ifndef PNG_INTEL_SSE_OPT +# ifdef PNG_INTEL_SSE + /* Only check for SSE if the build configuration has been modified to + * enable SSE optimizations. This means that these optimizations will + * be off by default. See contrib/intel for more details. + */ +# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ + defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +# define PNG_INTEL_SSE_OPT 1 +# endif +# endif +#endif + +#if PNG_INTEL_SSE_OPT > 0 +# ifndef PNG_INTEL_SSE_IMPLEMENTATION +# if defined(__SSE4_1__) || defined(__AVX__) + /* We are not actually using AVX, but checking for AVX is the best + way we can detect SSE4.1 and SSSE3 on MSVC. + */ +# define PNG_INTEL_SSE_IMPLEMENTATION 3 +# elif defined(__SSSE3__) +# define PNG_INTEL_SSE_IMPLEMENTATION 2 +# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +# define PNG_INTEL_SSE_IMPLEMENTATION 1 +# else +# define PNG_INTEL_SSE_IMPLEMENTATION 0 +# endif +# endif + +# if PNG_INTEL_SSE_IMPLEMENTATION > 0 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 +# endif +#endif + #if PNG_MIPS_MSA_OPT > 0 # define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_msa # ifndef PNG_MIPS_MSA_IMPLEMENTATION @@ -1256,6 +1292,21 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); #endif +#if PNG_INTEL_SSE_IMPLEMENTATION > 0 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif + /* Choose the best filter to use and filter the row data */ PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr, png_row_infop row_info),PNG_EMPTY); @@ -1991,6 +2042,11 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon, PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_msa, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); #endif + +# if PNG_INTEL_SSE_IMPLEMENTATION > 0 +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2, + (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); +# endif #endif PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr, -- GitLab