From 084c726ba306eb19077ac525764acec6c110f1d4 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Fri, 20 Sep 2002 10:03:01 +0000 Subject: [PATCH] pix_sum16_mmx() Originally committed as revision 961 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/i386/dsputil_mmx.c | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 4336e4bde0..eaec8fe45a 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -420,6 +420,44 @@ static void clear_blocks_mmx(DCTELEM *blocks) ); } +static int pix_sum16_mmx(UINT8 * pix, int line_size){ + const int h=16; + int sum; + int index= -line_size*h; + + __asm __volatile( + "pxor %%mm7, %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq (%2, %1), %%mm0 \n\t" + "movq (%2, %1), %%mm1 \n\t" + "movq 8(%2, %1), %%mm2 \n\t" + "movq 8(%2, %1), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "paddw %%mm0, %%mm1 \n\t" + "paddw %%mm2, %%mm3 \n\t" + "paddw %%mm1, %%mm3 \n\t" + "paddw %%mm3, %%mm6 \n\t" + "addl %3, %1 \n\t" + " js 1b \n\t" + "movq %%mm6, %%mm5 \n\t" + "psrlq $32, %%mm6 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "movq %%mm6, %%mm5 \n\t" + "psrlq $16, %%mm6 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "movd %%mm6, %0 \n\t" + "andl $0xFFFF, %0 \n\t" + : "=&r" (sum), "+r" (index) + : "r" (pix - index), "r" (line_size) + ); + + return sum; +} + #if 0 static void just_return() { return; } #endif @@ -448,6 +486,7 @@ void dsputil_init_mmx(void) put_pixels_clamped = put_pixels_clamped_mmx; add_pixels_clamped = add_pixels_clamped_mmx; clear_blocks= clear_blocks_mmx; + pix_sum= pix_sum16_mmx; pix_abs16x16 = pix_abs16x16_mmx; pix_abs16x16_x2 = pix_abs16x16_x2_mmx; -- GitLab