提交 da9d1d7a 编写于 作者: G Glenn Randers-Pehrson

[libpng16] Added pngdebug() statements to the new intel code and the arm code.

上级 52846504
/* arm_init.c - NEON optimised filter functions /* arm_init.c - NEON optimised filter functions
* *
* Copyright (c) 2014 Glenn Randers-Pehrson * Copyright (c) 2014,2016 Glenn Randers-Pehrson
* Written by Mans Rullgard, 2011. * Written by Mans Rullgard, 2011.
* Last changed in libpng 1.6.16 [December 22, 2014] * Last changed in libpng 1.6.22 [(PENDING RELEASE)]
* *
* This code is released under the libpng license. * This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer * For conditions of distribution and use, see the disclaimer
...@@ -66,6 +66,7 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp) ...@@ -66,6 +66,7 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
* wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF, * wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF,
* as documented in png.h * as documented in png.h
*/ */
png_debug(1, "in png_init_filter_functions_neon");
#ifdef PNG_ARM_NEON_API_SUPPORTED #ifdef PNG_ARM_NEON_API_SUPPORTED
switch ((pp->options >> PNG_ARM_NEON) & 3) switch ((pp->options >> PNG_ARM_NEON) & 3)
{ {
......
/* filter_neon_intrinsics.c - NEON optimised filter functions /* filter_neon_intrinsics.c - NEON optimised filter functions
* *
* Copyright (c) 2014 Glenn Randers-Pehrson * Copyright (c) 2014,2016 Glenn Randers-Pehrson
* Written by James Yu <james.yu at linaro.org>, October 2013. * Written by James Yu <james.yu at linaro.org>, October 2013.
* Based on filter_neon.S, written by Mans Rullgard, 2011. * Based on filter_neon.S, written by Mans Rullgard, 2011.
* *
* Last changed in libpng 1.6.16 [December 22, 2014] * Last changed in libpng 1.6.22 [(PENDING RELEASE)]
* *
* This code is released under the libpng license. * This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer * For conditions of distribution and use, see the disclaimer
...@@ -47,6 +47,8 @@ png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row, ...@@ -47,6 +47,8 @@ png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row,
png_bytep rp_stop = row + row_info->rowbytes; png_bytep rp_stop = row + row_info->rowbytes;
png_const_bytep pp = prev_row; png_const_bytep pp = prev_row;
png_debug(1, "in png_read_filter_row_up_neon");
for (; rp < rp_stop; rp += 16, pp += 16) for (; rp < rp_stop; rp += 16, pp += 16)
{ {
uint8x16_t qrp, qpp; uint8x16_t qrp, qpp;
...@@ -72,6 +74,8 @@ png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row, ...@@ -72,6 +74,8 @@ png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row,
uint8x8x4_t vdest; uint8x8x4_t vdest;
vdest.val[3] = vdup_n_u8(0); vdest.val[3] = vdup_n_u8(0);
png_debug(1, "in png_read_filter_row_sub3_neon");
for (; rp < rp_stop;) for (; rp < rp_stop;)
{ {
uint8x8_t vtmp1, vtmp2; uint8x8_t vtmp1, vtmp2;
...@@ -113,6 +117,8 @@ png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row, ...@@ -113,6 +117,8 @@ png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row,
uint8x8x4_t vdest; uint8x8x4_t vdest;
vdest.val[3] = vdup_n_u8(0); vdest.val[3] = vdup_n_u8(0);
png_debug(1, "in png_read_filter_row_sub4_neon");
for (; rp < rp_stop; rp += 16) for (; rp < rp_stop; rp += 16)
{ {
uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp)); uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp));
...@@ -148,6 +154,8 @@ png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row, ...@@ -148,6 +154,8 @@ png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row,
vrpt = png_ptr(uint8x8x2_t,&vtmp); vrpt = png_ptr(uint8x8x2_t,&vtmp);
vrp = *vrpt; vrp = *vrpt;
png_debug(1, "in png_read_filter_row_avg3_neon");
for (; rp < rp_stop; pp += 12) for (; rp < rp_stop; pp += 12)
{ {
uint8x8_t vtmp1, vtmp2, vtmp3; uint8x8_t vtmp1, vtmp2, vtmp3;
...@@ -207,6 +215,8 @@ png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row, ...@@ -207,6 +215,8 @@ png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row,
uint8x8x4_t vdest; uint8x8x4_t vdest;
vdest.val[3] = vdup_n_u8(0); vdest.val[3] = vdup_n_u8(0);
png_debug(1, "in png_read_filter_row_avg4_neon");
for (; rp < rp_stop; rp += 16, pp += 16) for (; rp < rp_stop; rp += 16, pp += 16)
{ {
uint32x2x4_t vtmp; uint32x2x4_t vtmp;
...@@ -280,6 +290,8 @@ png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row, ...@@ -280,6 +290,8 @@ png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row,
vrpt = png_ptr(uint8x8x2_t,&vtmp); vrpt = png_ptr(uint8x8x2_t,&vtmp);
vrp = *vrpt; vrp = *vrpt;
png_debug(1, "in png_read_filter_row_paeth3_neon");
for (; rp < rp_stop; pp += 12) for (; rp < rp_stop; pp += 12)
{ {
uint8x8x2_t *vppt; uint8x8x2_t *vppt;
...@@ -339,6 +351,8 @@ png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row, ...@@ -339,6 +351,8 @@ png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row,
uint8x8x4_t vdest; uint8x8x4_t vdest;
vdest.val[3] = vdup_n_u8(0); vdest.val[3] = vdup_n_u8(0);
png_debug(1, "in png_read_filter_row_paeth4_neon");
for (; rp < rp_stop; rp += 16, pp += 16) for (; rp < rp_stop; rp += 16, pp += 16)
{ {
uint32x2x4_t vtmp; uint32x2x4_t vtmp;
......
...@@ -2,3 +2,7 @@ ...@@ -2,3 +2,7 @@
To enable SSE support in libpng, manually edit configure.ac and Makefile.am, To enable SSE support in libpng, manually edit configure.ac and Makefile.am,
following the instructions in the configure.ac.patch and Makefile.am.patch following the instructions in the configure.ac.patch and Makefile.am.patch
files, then configure with -DPNG_INTEL_SSE in CPPFLAGS. files, then configure with -DPNG_INTEL_SSE in CPPFLAGS.
If you have moved the *.c files to a different directory, be sure to update
the '#include "../../pngpriv.h"' line in both files if necessary to point
to the correct relative location of pngpriv.h.
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* Copyright (c) 2016 Google, Inc. * Copyright (c) 2016 Google, Inc.
* Written by Mike Klein and Matt Sarett * Written by Mike Klein and Matt Sarett
* Derived from arm/filter_neon_intrinsics.c, which was * Derived from arm/filter_neon_intrinsics.c, which was
* Copyright (c) 2014 Glenn Randers-Pehrson * Copyright (c) 2014,2016 Glenn Randers-Pehrson
* *
* Last changed in libpng 1.6.22 [(PENDING RELEASE)] * Last changed in libpng 1.6.22 [(PENDING RELEASE)]
* *
...@@ -55,6 +55,7 @@ void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row, ...@@ -55,6 +55,7 @@ void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row,
* There is no pixel to the left of the first pixel. It's encoded directly. * There is no pixel to the left of the first pixel. It's encoded directly.
* That works with our main loop if we just say that left pixel was zero. * That works with our main loop if we just say that left pixel was zero.
*/ */
png_debug(1, "in png_read_filter_row_sub3_sse2");
__m128i a, d = _mm_setzero_si128(); __m128i a, d = _mm_setzero_si128();
int rb = row_info->rowbytes; int rb = row_info->rowbytes;
...@@ -75,6 +76,7 @@ void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row, ...@@ -75,6 +76,7 @@ void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row,
* There is no pixel to the left of the first pixel. It's encoded directly. * There is no pixel to the left of the first pixel. It's encoded directly.
* That works with our main loop if we just say that left pixel was zero. * That works with our main loop if we just say that left pixel was zero.
*/ */
png_debug(1, "in png_read_filter_row_sub4_sse2");
__m128i a, d = _mm_setzero_si128(); __m128i a, d = _mm_setzero_si128();
int rb = row_info->rowbytes; int rb = row_info->rowbytes;
...@@ -96,6 +98,7 @@ void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row, ...@@ -96,6 +98,7 @@ void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row,
* predicted to be half of the pixel above it. So again, this works * predicted to be half of the pixel above it. So again, this works
* perfectly with our loop if we make sure a starts at zero. * perfectly with our loop if we make sure a starts at zero.
*/ */
png_debug(1, "in png_read_filter_row_avg3_sse2");
const __m128i zero = _mm_setzero_si128(); const __m128i zero = _mm_setzero_si128();
__m128i b; __m128i b;
__m128i a, d = zero; __m128i a, d = zero;
...@@ -128,6 +131,7 @@ void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row, ...@@ -128,6 +131,7 @@ void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row,
* predicted to be half of the pixel above it. So again, this works * predicted to be half of the pixel above it. So again, this works
* perfectly with our loop if we make sure a starts at zero. * perfectly with our loop if we make sure a starts at zero.
*/ */
png_debug(1, "in png_read_filter_row_avg4_sse2");
const __m128i zero = _mm_setzero_si128(); const __m128i zero = _mm_setzero_si128();
__m128i b; __m128i b;
__m128i a, d = zero; __m128i a, d = zero;
...@@ -196,6 +200,7 @@ void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row, ...@@ -196,6 +200,7 @@ void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row,
* Here we zero b and d, which become c and a respectively at the start of * Here we zero b and d, which become c and a respectively at the start of
* the loop. * the loop.
*/ */
png_debug(1, "in png_read_filter_row_paeth3_sse2");
const __m128i zero = _mm_setzero_si128(); const __m128i zero = _mm_setzero_si128();
__m128i c, b = zero, __m128i c, b = zero,
a, d = zero; a, d = zero;
...@@ -254,6 +259,7 @@ void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row, ...@@ -254,6 +259,7 @@ void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row,
* Here we zero b and d, which become c and a respectively at the start of * Here we zero b and d, which become c and a respectively at the start of
* the loop. * the loop.
*/ */
png_debug(1, "in png_read_filter_row_paeth4_sse2");
const __m128i zero = _mm_setzero_si128(); const __m128i zero = _mm_setzero_si128();
__m128i c, b = zero, __m128i c, b = zero,
a, d = zero; a, d = zero;
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* Copyright (c) 2016 Google, Inc. * Copyright (c) 2016 Google, Inc.
* Written by Mike Klein and Matt Sarett * Written by Mike Klein and Matt Sarett
* Derived from arm/arm_init.c, which was * Derived from arm/arm_init.c, which was
* Copyright (c) 2014 Glenn Randers-Pehrson * Copyright (c) 2014,2016 Glenn Randers-Pehrson
* *
* Last changed in libpng 1.6.22 [(PENDING RELEASE)] * Last changed in libpng 1.6.22 [(PENDING RELEASE)]
* *
...@@ -29,6 +29,7 @@ png_init_filter_functions_sse2(png_structp pp, unsigned int bpp) ...@@ -29,6 +29,7 @@ png_init_filter_functions_sse2(png_structp pp, unsigned int bpp)
* Most of these can be implemented using only MMX and 64-bit registers, * Most of these can be implemented using only MMX and 64-bit registers,
* but they end up a bit slower than using the equally-ubiquitous SSE2. * but they end up a bit slower than using the equally-ubiquitous SSE2.
*/ */
png_debug(1, "in png_init_filter_functions_sse2");
if (bpp == 3) if (bpp == 3)
{ {
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2; pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册