diff --git a/arm/arm_init.c b/arm/arm_init.c index fb3d50d04ae8dc5dad891905d13127a379494e59..3c012317417630696c4f0233ba413cb30693d2a2 100644 --- a/arm/arm_init.c +++ b/arm/arm_init.c @@ -1,9 +1,9 @@ /* arm_init.c - NEON optimised filter functions * - * Copyright (c) 2014 Glenn Randers-Pehrson + * Copyright (c) 2014,2016 Glenn Randers-Pehrson * Written by Mans Rullgard, 2011. - * Last changed in libpng 1.6.16 [December 22, 2014] + * Last changed in libpng 1.6.22 [(PENDING RELEASE)] * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -66,6 +66,7 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp) * wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF, * as documented in png.h */ + png_debug(1, "in png_init_filter_functions_neon"); #ifdef PNG_ARM_NEON_API_SUPPORTED switch ((pp->options >> PNG_ARM_NEON) & 3) { diff --git a/arm/filter_neon_intrinsics.c b/arm/filter_neon_intrinsics.c index d42c78890911c2cbf6ac035970dd8233f7421d04..22923b4b4fbb64a249b4e478c94fd3648eb463ed 100644 --- a/arm/filter_neon_intrinsics.c +++ b/arm/filter_neon_intrinsics.c @@ -1,11 +1,11 @@ /* filter_neon_intrinsics.c - NEON optimised filter functions * - * Copyright (c) 2014 Glenn Randers-Pehrson + * Copyright (c) 2014,2016 Glenn Randers-Pehrson * Written by James Yu , October 2013. * Based on filter_neon.S, written by Mans Rullgard, 2011. * - * Last changed in libpng 1.6.16 [December 22, 2014] + * Last changed in libpng 1.6.22 [(PENDING RELEASE)] * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -47,6 +47,8 @@ png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row, png_bytep rp_stop = row + row_info->rowbytes; png_const_bytep pp = prev_row; + png_debug(1, "in png_read_filter_row_up_neon"); + for (; rp < rp_stop; rp += 16, pp += 16) { uint8x16_t qrp, qpp; @@ -72,6 +74,8 @@ png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row, uint8x8x4_t vdest; vdest.val[3] = vdup_n_u8(0); + png_debug(1, "in png_read_filter_row_sub3_neon"); + for (; rp < rp_stop;) { uint8x8_t vtmp1, vtmp2; @@ -113,6 +117,8 @@ png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row, uint8x8x4_t vdest; vdest.val[3] = vdup_n_u8(0); + png_debug(1, "in png_read_filter_row_sub4_neon"); + for (; rp < rp_stop; rp += 16) { uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp)); @@ -148,6 +154,8 @@ png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row, vrpt = png_ptr(uint8x8x2_t,&vtmp); vrp = *vrpt; + png_debug(1, "in png_read_filter_row_avg3_neon"); + for (; rp < rp_stop; pp += 12) { uint8x8_t vtmp1, vtmp2, vtmp3; @@ -207,6 +215,8 @@ png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row, uint8x8x4_t vdest; vdest.val[3] = vdup_n_u8(0); + png_debug(1, "in png_read_filter_row_avg4_neon"); + for (; rp < rp_stop; rp += 16, pp += 16) { uint32x2x4_t vtmp; @@ -280,6 +290,8 @@ png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row, vrpt = png_ptr(uint8x8x2_t,&vtmp); vrp = *vrpt; + png_debug(1, "in png_read_filter_row_paeth3_neon"); + for (; rp < rp_stop; pp += 12) { uint8x8x2_t *vppt; @@ -339,6 +351,8 @@ png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row, uint8x8x4_t vdest; vdest.val[3] = vdup_n_u8(0); + png_debug(1, "in png_read_filter_row_paeth4_neon"); + for (; rp < rp_stop; rp += 16, pp += 16) { uint32x2x4_t vtmp; diff --git a/contrib/intel/INSTALL b/contrib/intel/INSTALL index d13675449ef8e8a7c75e55fa41b2f70cdf0db827..105c953feb0f0e2b39be4752eb1fed9cba3543d4 100644 --- a/contrib/intel/INSTALL +++ b/contrib/intel/INSTALL @@ -2,3 +2,7 @@ To enable SSE support in libpng, manually edit configure.ac and Makefile.am, following the instructions in the configure.ac.patch and Makefile.am.patch files, then configure with -DPNG_INTEL_SSE in CPPFLAGS. + +If you have moved the *.c files to a different directory, be sure to update +the '#include "../../pngpriv.h"' line in both files if necessary to point +to the correct relative location of pngpriv.h. diff --git a/contrib/intel/filter_sse2_intrinsics.c b/contrib/intel/filter_sse2_intrinsics.c index 7c359b580e92420b49ff301e34a923b029958130..fcd875f6b3fe553972e016d0a2b7d131df279c74 100644 --- a/contrib/intel/filter_sse2_intrinsics.c +++ b/contrib/intel/filter_sse2_intrinsics.c @@ -4,7 +4,7 @@ * Copyright (c) 2016 Google, Inc. * Written by Mike Klein and Matt Sarett * Derived from arm/filter_neon_intrinsics.c, which was - * Copyright (c) 2014 Glenn Randers-Pehrson + * Copyright (c) 2014,2016 Glenn Randers-Pehrson * * Last changed in libpng 1.6.22 [(PENDING RELEASE)] * @@ -55,6 +55,7 @@ void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row, * There is no pixel to the left of the first pixel. It's encoded directly. * That works with our main loop if we just say that left pixel was zero. */ + png_debug(1, "in png_read_filter_row_sub3_sse2"); __m128i a, d = _mm_setzero_si128(); int rb = row_info->rowbytes; @@ -75,6 +76,7 @@ void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row, * There is no pixel to the left of the first pixel. It's encoded directly. * That works with our main loop if we just say that left pixel was zero. */ + png_debug(1, "in png_read_filter_row_sub4_sse2"); __m128i a, d = _mm_setzero_si128(); int rb = row_info->rowbytes; @@ -96,6 +98,7 @@ void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row, * predicted to be half of the pixel above it. So again, this works * perfectly with our loop if we make sure a starts at zero. */ + png_debug(1, "in png_read_filter_row_avg3_sse2"); const __m128i zero = _mm_setzero_si128(); __m128i b; __m128i a, d = zero; @@ -128,6 +131,7 @@ void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row, * predicted to be half of the pixel above it. So again, this works * perfectly with our loop if we make sure a starts at zero. */ + png_debug(1, "in png_read_filter_row_avg4_sse2"); const __m128i zero = _mm_setzero_si128(); __m128i b; __m128i a, d = zero; @@ -196,6 +200,7 @@ void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row, * Here we zero b and d, which become c and a respectively at the start of * the loop. */ + png_debug(1, "in png_read_filter_row_paeth3_sse2"); const __m128i zero = _mm_setzero_si128(); __m128i c, b = zero, a, d = zero; @@ -254,6 +259,7 @@ void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row, * Here we zero b and d, which become c and a respectively at the start of * the loop. */ + png_debug(1, "in png_read_filter_row_paeth4_sse2"); const __m128i zero = _mm_setzero_si128(); __m128i c, b = zero, a, d = zero; diff --git a/contrib/intel/intel_init.c b/contrib/intel/intel_init.c index fc0d9abfdb3fdc78af9087e4b18cb65c6c7a42d4..357e147b9d9062dfd76e96ffd58cd032a6cb4e1c 100644 --- a/contrib/intel/intel_init.c +++ b/contrib/intel/intel_init.c @@ -4,7 +4,7 @@ * Copyright (c) 2016 Google, Inc. * Written by Mike Klein and Matt Sarett * Derived from arm/arm_init.c, which was - * Copyright (c) 2014 Glenn Randers-Pehrson + * Copyright (c) 2014,2016 Glenn Randers-Pehrson * * Last changed in libpng 1.6.22 [(PENDING RELEASE)] * @@ -29,6 +29,7 @@ png_init_filter_functions_sse2(png_structp pp, unsigned int bpp) * Most of these can be implemented using only MMX and 64-bit registers, * but they end up a bit slower than using the equally-ubiquitous SSE2. */ + png_debug(1, "in png_init_filter_functions_sse2"); if (bpp == 3) { pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;