Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
e9200351
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e9200351
编写于
1月 27, 2012
作者:
R
Ronald S. Bultje
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
png: move DSP functions to their own DSP context.
上级
f9708e9a
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
230 addition
and
127 deletion
+230
-127
libavcodec/Makefile
libavcodec/Makefile
+1
-1
libavcodec/dsputil.c
libavcodec/dsputil.c
+0
-16
libavcodec/dsputil.h
libavcodec/dsputil.h
+0
-3
libavcodec/pngdec.c
libavcodec/pngdec.c
+5
-5
libavcodec/pngdsp.c
libavcodec/pngdsp.c
+48
-0
libavcodec/pngdsp.h
libavcodec/pngdsp.h
+40
-0
libavcodec/x86/Makefile
libavcodec/x86/Makefile
+1
-0
libavcodec/x86/dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+0
-102
libavcodec/x86/pngdsp-init.c
libavcodec/x86/pngdsp-init.c
+135
-0
未找到文件。
libavcodec/Makefile
浏览文件 @
e9200351
...
...
@@ -296,7 +296,7 @@ OBJS-$(CONFIG_PGMYUV_DECODER) += pnmdec.o pnm.o
OBJS-$(CONFIG_PGMYUV_ENCODER)
+=
pnmenc.o pnm.o
OBJS-$(CONFIG_PGSSUB_DECODER)
+=
pgssubdec.o
OBJS-$(CONFIG_PICTOR_DECODER)
+=
pictordec.o cga_data.o
OBJS-$(CONFIG_PNG_DECODER)
+=
png.o pngdec.o
OBJS-$(CONFIG_PNG_DECODER)
+=
png.o pngdec.o
pngdsp.o
OBJS-$(CONFIG_PNG_ENCODER)
+=
png.o pngenc.o
OBJS-$(CONFIG_PPM_DECODER)
+=
pnmdec.o pnm.o
OBJS-$(CONFIG_PPM_ENCODER)
+=
pnmenc.o pnm.o
...
...
libavcodec/dsputil.c
浏览文件 @
e9200351
...
...
@@ -38,7 +38,6 @@
#include "config.h"
#include "ac3dec.h"
#include "vorbis.h"
#include "png.h"
uint8_t
ff_cropTbl
[
256
+
2
*
MAX_NEG_CROP
]
=
{
0
,
};
uint32_t
ff_squareTbl
[
512
]
=
{
0
,
};
...
...
@@ -1882,17 +1881,6 @@ static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
dst
[
i
+
0
]
+=
src
[
i
+
0
];
}
static
void
add_bytes_l2_c
(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
w
){
long
i
;
for
(
i
=
0
;
i
<=
w
-
sizeof
(
long
);
i
+=
sizeof
(
long
)){
long
a
=
*
(
long
*
)(
src1
+
i
);
long
b
=
*
(
long
*
)(
src2
+
i
);
*
(
long
*
)(
dst
+
i
)
=
((
a
&
pb_7f
)
+
(
b
&
pb_7f
))
^
((
a
^
b
)
&
pb_80
);
}
for
(;
i
<
w
;
i
++
)
dst
[
i
]
=
src1
[
i
]
+
src2
[
i
];
}
static
void
diff_bytes_c
(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
w
){
long
i
;
#if !HAVE_FAST_UNALIGNED
...
...
@@ -3003,7 +2991,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
ssd_int8_vs_int16
=
ssd_int8_vs_int16_c
;
c
->
add_bytes
=
add_bytes_c
;
c
->
add_bytes_l2
=
add_bytes_l2_c
;
c
->
diff_bytes
=
diff_bytes_c
;
c
->
add_hfyu_median_prediction
=
add_hfyu_median_prediction_c
;
c
->
sub_hfyu_median_prediction
=
sub_hfyu_median_prediction_c
;
...
...
@@ -3011,9 +2998,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
add_hfyu_left_prediction_bgr32
=
add_hfyu_left_prediction_bgr32_c
;
c
->
bswap_buf
=
bswap_buf
;
c
->
bswap16_buf
=
bswap16_buf
;
#if CONFIG_PNG_DECODER
c
->
add_png_paeth_prediction
=
ff_add_png_paeth_prediction
;
#endif
if
(
CONFIG_H263_DECODER
||
CONFIG_H263_ENCODER
)
{
c
->
h263_h_loop_filter
=
h263_h_loop_filter_c
;
...
...
libavcodec/dsputil.h
浏览文件 @
e9200351
...
...
@@ -378,7 +378,6 @@ typedef struct DSPContext {
/* huffyuv specific */
void
(
*
add_bytes
)(
uint8_t
*
dst
/*align 16*/
,
uint8_t
*
src
/*align 16*/
,
int
w
);
void
(
*
add_bytes_l2
)(
uint8_t
*
dst
/*align 16*/
,
uint8_t
*
src1
/*align 16*/
,
uint8_t
*
src2
/*align 16*/
,
int
w
);
void
(
*
diff_bytes
)(
uint8_t
*
dst
/*align 16*/
,
uint8_t
*
src1
/*align 16*/
,
uint8_t
*
src2
/*align 1*/
,
int
w
);
/**
* subtract huffyuv's variant of median prediction
...
...
@@ -388,8 +387,6 @@ typedef struct DSPContext {
void
(
*
add_hfyu_median_prediction
)(
uint8_t
*
dst
,
const
uint8_t
*
top
,
const
uint8_t
*
diff
,
int
w
,
int
*
left
,
int
*
left_top
);
int
(
*
add_hfyu_left_prediction
)(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
w
,
int
left
);
void
(
*
add_hfyu_left_prediction_bgr32
)(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
w
,
int
*
red
,
int
*
green
,
int
*
blue
,
int
*
alpha
);
/* this might write to dst[w] */
void
(
*
add_png_paeth_prediction
)(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
(
*
bswap_buf
)(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
(
*
bswap16_buf
)(
uint16_t
*
dst
,
const
uint16_t
*
src
,
int
len
);
...
...
libavcodec/pngdec.c
浏览文件 @
e9200351
...
...
@@ -22,7 +22,7 @@
#include "avcodec.h"
#include "bytestream.h"
#include "png.h"
#include "
dsputil
.h"
#include "
pngdsp
.h"
/* TODO:
* - add 2, 4 and 16 bit depth support
...
...
@@ -33,7 +33,7 @@
//#define DEBUG
typedef
struct
PNGDecContext
{
DSPContext
dsp
;
PNG
DSPContext
dsp
;
const
uint8_t
*
bytestream
;
const
uint8_t
*
bytestream_start
;
...
...
@@ -191,7 +191,7 @@ void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w
}
/* NOTE: 'dst' can be equal to 'last' */
static
void
png_filter_row
(
DSPContext
*
dsp
,
uint8_t
*
dst
,
int
filter_type
,
static
void
png_filter_row
(
PNG
DSPContext
*
dsp
,
uint8_t
*
dst
,
int
filter_type
,
uint8_t
*
src
,
uint8_t
*
last
,
int
size
,
int
bpp
)
{
int
i
,
p
,
r
,
g
,
b
,
a
;
...
...
@@ -235,7 +235,7 @@ static void png_filter_row(DSPContext *dsp, uint8_t *dst, int filter_type,
if
(
bpp
>
1
&&
size
>
4
)
{
// would write off the end of the array if we let it process the last pixel with bpp=3
int
w
=
bpp
==
4
?
size
:
size
-
3
;
dsp
->
add_p
ng_p
aeth_prediction
(
dst
+
i
,
src
+
i
,
last
+
i
,
w
-
i
,
bpp
);
dsp
->
add_paeth_prediction
(
dst
+
i
,
src
+
i
,
last
+
i
,
w
-
i
,
bpp
);
i
=
w
;
}
ff_add_png_paeth_prediction
(
dst
+
i
,
src
+
i
,
last
+
i
,
size
-
i
,
bpp
);
...
...
@@ -639,7 +639,7 @@ static av_cold int png_dec_init(AVCodecContext *avctx){
s
->
last_picture
=
&
s
->
picture2
;
avcodec_get_frame_defaults
(
&
s
->
picture1
);
avcodec_get_frame_defaults
(
&
s
->
picture2
);
dsputil_init
(
&
s
->
dsp
,
avctx
);
ff_pngdsp_init
(
&
s
->
dsp
);
return
0
;
}
...
...
libavcodec/pngdsp.c
0 → 100644
浏览文件 @
e9200351
/*
* PNG image format
* Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/common.h"
#include "png.h"
#include "pngdsp.h"
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
#define pb_7f (~0UL/255 * 0x7f)
#define pb_80 (~0UL/255 * 0x80)
static
void
add_bytes_l2_c
(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
w
)
{
long
i
;
for
(
i
=
0
;
i
<=
w
-
sizeof
(
long
);
i
+=
sizeof
(
long
))
{
long
a
=
*
(
long
*
)(
src1
+
i
);
long
b
=
*
(
long
*
)(
src2
+
i
);
*
(
long
*
)(
dst
+
i
)
=
((
a
&
pb_7f
)
+
(
b
&
pb_7f
))
^
((
a
^
b
)
&
pb_80
);
}
for
(;
i
<
w
;
i
++
)
dst
[
i
]
=
src1
[
i
]
+
src2
[
i
];
}
void
ff_pngdsp_init
(
PNGDSPContext
*
dsp
)
{
dsp
->
add_bytes_l2
=
add_bytes_l2_c
;
dsp
->
add_paeth_prediction
=
ff_add_png_paeth_prediction
;
if
(
HAVE_MMX
)
ff_pngdsp_init_x86
(
dsp
);
}
libavcodec/pngdsp.h
0 → 100644
浏览文件 @
e9200351
/*
* PNG image format
* Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_PNGDSP_H
#define AVCODEC_PNGDSP_H
#include <stdint.h>
typedef
struct
PNGDSPContext
{
void
(
*
add_bytes_l2
)(
uint8_t
*
dst
/* align 16 */
,
uint8_t
*
src1
/* align 16 */
,
uint8_t
*
src2
/* align 16 */
,
int
w
);
/* this might write to dst[w] */
void
(
*
add_paeth_prediction
)(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
}
PNGDSPContext
;
void
ff_pngdsp_init
(
PNGDSPContext
*
dsp
);
void
ff_pngdsp_init_x86
(
PNGDSPContext
*
dsp
);
#endif
/* AVCDODEC_PNGDSP_H */
libavcodec/x86/Makefile
浏览文件 @
e9200351
...
...
@@ -42,6 +42,7 @@ MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o
YASM-OBJS-$(CONFIG_ENCODERS)
+=
x86/dsputilenc_yasm.o
MMX-OBJS-$(CONFIG_GPL)
+=
x86/idct_mmx.o
MMX-OBJS-$(CONFIG_LPC)
+=
x86/lpc_mmx.o
MMX-OBJS-$(CONFIG_PNG_DECODER)
+=
x86/pngdsp-init.o
YASM-OBJS-$(CONFIG_PRORES_DECODER)
+=
x86/proresdsp.o
MMX-OBJS-$(CONFIG_PRORES_DECODER)
+=
x86/proresdsp-init.o
MMX-OBJS-$(CONFIG_DWT)
+=
x86/snowdsp_mmx.o
...
...
libavcodec/x86/dsputil_mmx.c
浏览文件 @
e9200351
...
...
@@ -582,28 +582,6 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
dst
[
i
+
0
]
+=
src
[
i
+
0
];
}
static
void
add_bytes_l2_mmx
(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
w
){
x86_reg
i
=
0
;
__asm__
volatile
(
"jmp 2f
\n\t
"
"1:
\n\t
"
"movq (%2, %0), %%mm0
\n\t
"
"movq 8(%2, %0), %%mm1
\n\t
"
"paddb (%3, %0), %%mm0
\n\t
"
"paddb 8(%3, %0), %%mm1
\n\t
"
"movq %%mm0, (%1, %0)
\n\t
"
"movq %%mm1, 8(%1, %0)
\n\t
"
"add $16, %0
\n\t
"
"2:
\n\t
"
"cmp %4, %0
\n\t
"
" js 1b
\n\t
"
:
"+r"
(
i
)
:
"r"
(
dst
),
"r"
(
src1
),
"r"
(
src2
),
"r"
((
x86_reg
)
w
-
15
)
);
for
(;
i
<
w
;
i
++
)
dst
[
i
]
=
src1
[
i
]
+
src2
[
i
];
}
#if HAVE_7REGS
static
void
add_hfyu_median_prediction_cmov
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
const
uint8_t
*
diff
,
int
w
,
int
*
left
,
int
*
left_top
)
{
x86_reg
w2
=
-
w
;
...
...
@@ -879,80 +857,6 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w,
}
}
#define PAETH(cpu, abs3)\
static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
{\
x86_reg i = -bpp;\
x86_reg end = w-3;\
__asm__ volatile(\
"pxor %%mm7, %%mm7 \n"\
"movd (%1,%0), %%mm0 \n"\
"movd (%2,%0), %%mm1 \n"\
"punpcklbw %%mm7, %%mm0 \n"\
"punpcklbw %%mm7, %%mm1 \n"\
"add %4, %0 \n"\
"1: \n"\
"movq %%mm1, %%mm2 \n"\
"movd (%2,%0), %%mm1 \n"\
"movq %%mm2, %%mm3 \n"\
"punpcklbw %%mm7, %%mm1 \n"\
"movq %%mm2, %%mm4 \n"\
"psubw %%mm1, %%mm3 \n"\
"psubw %%mm0, %%mm4 \n"\
"movq %%mm3, %%mm5 \n"\
"paddw %%mm4, %%mm5 \n"\
abs3\
"movq %%mm4, %%mm6 \n"\
"pminsw %%mm5, %%mm6 \n"\
"pcmpgtw %%mm6, %%mm3 \n"\
"pcmpgtw %%mm5, %%mm4 \n"\
"movq %%mm4, %%mm6 \n"\
"pand %%mm3, %%mm4 \n"\
"pandn %%mm3, %%mm6 \n"\
"pandn %%mm0, %%mm3 \n"\
"movd (%3,%0), %%mm0 \n"\
"pand %%mm1, %%mm6 \n"\
"pand %%mm4, %%mm2 \n"\
"punpcklbw %%mm7, %%mm0 \n"\
"movq %6, %%mm5 \n"\
"paddw %%mm6, %%mm0 \n"\
"paddw %%mm2, %%mm3 \n"\
"paddw %%mm3, %%mm0 \n"\
"pand %%mm5, %%mm0 \n"\
"movq %%mm0, %%mm3 \n"\
"packuswb %%mm3, %%mm3 \n"\
"movd %%mm3, (%1,%0) \n"\
"add %4, %0 \n"\
"cmp %5, %0 \n"\
"jle 1b \n"\
:"+r"(i)\
:"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
"m"(ff_pw_255)\
:"memory"\
);\
}
#define ABS3_MMX2\
"psubw %%mm5, %%mm7 \n"\
"pmaxsw %%mm7, %%mm5 \n"\
"pxor %%mm6, %%mm6 \n"\
"pxor %%mm7, %%mm7 \n"\
"psubw %%mm3, %%mm6 \n"\
"psubw %%mm4, %%mm7 \n"\
"pmaxsw %%mm6, %%mm3 \n"\
"pmaxsw %%mm7, %%mm4 \n"\
"pxor %%mm7, %%mm7 \n"
#define ABS3_SSSE3\
"pabsw %%mm3, %%mm3 \n"\
"pabsw %%mm4, %%mm4 \n"\
"pabsw %%mm5, %%mm5 \n"
PAETH
(
mmx2
,
ABS3_MMX2
)
#if HAVE_SSSE3
PAETH
(
ssse3
,
ABS3_SSSE3
)
#endif
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
"paddw " #m4 ", " #m3 " \n\t"
/* x1 */
\
"movq "MANGLE(ff_pw_20)", %%mm4 \n\t"
/* 20 */
\
...
...
@@ -2552,7 +2456,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#endif
c
->
add_bytes
=
add_bytes_mmx
;
c
->
add_bytes_l2
=
add_bytes_l2_mmx
;
if
(
!
high_bit_depth
)
c
->
draw_edges
=
draw_edges_mmx
;
...
...
@@ -2686,8 +2589,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if
(
HAVE_AMD3DNOW
&&
(
mm_flags
&
AV_CPU_FLAG_3DNOW
))
c
->
add_hfyu_median_prediction
=
add_hfyu_median_prediction_cmov
;
#endif
c
->
add_png_paeth_prediction
=
add_png_paeth_prediction_mmx2
;
}
else
if
(
HAVE_AMD3DNOW
&&
(
mm_flags
&
AV_CPU_FLAG_3DNOW
))
{
c
->
prefetch
=
prefetch_3dnow
;
...
...
@@ -2826,9 +2727,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS_10
(
2
,
0
,
ssse3_cache64
)
H264_QPEL_FUNCS_10
(
3
,
0
,
ssse3_cache64
)
}
#endif
c
->
add_png_paeth_prediction
=
add_png_paeth_prediction_ssse3
;
#if HAVE_YASM
if
(
!
high_bit_depth
&&
CONFIG_H264CHROMA
)
{
c
->
put_h264_chroma_pixels_tab
[
0
]
=
ff_put_h264_chroma_mc8_ssse3_rnd
;
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
ff_avg_h264_chroma_mc8_ssse3_rnd
;
...
...
libavcodec/x86/pngdsp-init.c
0 → 100644
浏览文件 @
e9200351
/*
* x86 PNG optimizations.
* Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/pngdsp.h"
#include "dsputil_mmx.h"
#define PAETH(cpu, abs3)\
static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
{\
x86_reg i = -bpp;\
x86_reg end = w-3;\
__asm__ volatile(\
"pxor %%mm7, %%mm7 \n"\
"movd (%1,%0), %%mm0 \n"\
"movd (%2,%0), %%mm1 \n"\
"punpcklbw %%mm7, %%mm0 \n"\
"punpcklbw %%mm7, %%mm1 \n"\
"add %4, %0 \n"\
"1: \n"\
"movq %%mm1, %%mm2 \n"\
"movd (%2,%0), %%mm1 \n"\
"movq %%mm2, %%mm3 \n"\
"punpcklbw %%mm7, %%mm1 \n"\
"movq %%mm2, %%mm4 \n"\
"psubw %%mm1, %%mm3 \n"\
"psubw %%mm0, %%mm4 \n"\
"movq %%mm3, %%mm5 \n"\
"paddw %%mm4, %%mm5 \n"\
abs3\
"movq %%mm4, %%mm6 \n"\
"pminsw %%mm5, %%mm6 \n"\
"pcmpgtw %%mm6, %%mm3 \n"\
"pcmpgtw %%mm5, %%mm4 \n"\
"movq %%mm4, %%mm6 \n"\
"pand %%mm3, %%mm4 \n"\
"pandn %%mm3, %%mm6 \n"\
"pandn %%mm0, %%mm3 \n"\
"movd (%3,%0), %%mm0 \n"\
"pand %%mm1, %%mm6 \n"\
"pand %%mm4, %%mm2 \n"\
"punpcklbw %%mm7, %%mm0 \n"\
"movq %6, %%mm5 \n"\
"paddw %%mm6, %%mm0 \n"\
"paddw %%mm2, %%mm3 \n"\
"paddw %%mm3, %%mm0 \n"\
"pand %%mm5, %%mm0 \n"\
"movq %%mm0, %%mm3 \n"\
"packuswb %%mm3, %%mm3 \n"\
"movd %%mm3, (%1,%0) \n"\
"add %4, %0 \n"\
"cmp %5, %0 \n"\
"jle 1b \n"\
:"+r"(i)\
:"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
"m"(ff_pw_255)\
:"memory"\
);\
}
#define ABS3_MMX2\
"psubw %%mm5, %%mm7 \n"\
"pmaxsw %%mm7, %%mm5 \n"\
"pxor %%mm6, %%mm6 \n"\
"pxor %%mm7, %%mm7 \n"\
"psubw %%mm3, %%mm6 \n"\
"psubw %%mm4, %%mm7 \n"\
"pmaxsw %%mm6, %%mm3 \n"\
"pmaxsw %%mm7, %%mm4 \n"\
"pxor %%mm7, %%mm7 \n"
#define ABS3_SSSE3\
"pabsw %%mm3, %%mm3 \n"\
"pabsw %%mm4, %%mm4 \n"\
"pabsw %%mm5, %%mm5 \n"
PAETH
(
mmx2
,
ABS3_MMX2
)
#if HAVE_SSSE3
PAETH
(
ssse3
,
ABS3_SSSE3
)
#endif
static
void
add_bytes_l2_mmx
(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
w
)
{
x86_reg
i
=
0
;
__asm__
volatile
(
"jmp 2f
\n\t
"
"1:
\n\t
"
"movq (%2, %0), %%mm0
\n\t
"
"movq 8(%2, %0), %%mm1
\n\t
"
"paddb (%3, %0), %%mm0
\n\t
"
"paddb 8(%3, %0), %%mm1
\n\t
"
"movq %%mm0, (%1, %0)
\n\t
"
"movq %%mm1, 8(%1, %0)
\n\t
"
"add $16, %0
\n\t
"
"2:
\n\t
"
"cmp %4, %0
\n\t
"
" js 1b
\n\t
"
:
"+r"
(
i
)
:
"r"
(
dst
),
"r"
(
src1
),
"r"
(
src2
),
"r"
((
x86_reg
)
w
-
15
)
);
for
(;
i
<
w
;
i
++
)
dst
[
i
]
=
src1
[
i
]
+
src2
[
i
];
}
void
ff_pngdsp_init_x86
(
PNGDSPContext
*
dsp
)
{
int
flags
=
av_get_cpu_flags
();
if
(
flags
&
AV_CPU_FLAG_MMX
)
dsp
->
add_bytes_l2
=
add_bytes_l2_mmx
;
if
(
flags
&
AV_CPU_FLAG_MMX2
)
dsp
->
add_paeth_prediction
=
add_png_paeth_prediction_mmx2
;
if
(
flags
&
AV_CPU_FLAG_SSSE3
)
dsp
->
add_paeth_prediction
=
add_png_paeth_prediction_ssse3
;
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录