Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
59f474b4
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
59f474b4
编写于
1月 27, 2012
作者:
R
Ronald S. Bultje
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
png: convert DSP functions to yasm.
上级
3715d841
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
156 addition
and
103 deletion
+156
-103
libavcodec/x86/Makefile
libavcodec/x86/Makefile
+1
-0
libavcodec/x86/pngdsp-init.c
libavcodec/x86/pngdsp-init.c
+13
-103
libavcodec/x86/pngdsp.asm
libavcodec/x86/pngdsp.asm
+142
-0
未找到文件。
libavcodec/x86/Makefile
浏览文件 @
59f474b4
...
...
@@ -42,6 +42,7 @@ MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o
YASM-OBJS-$(CONFIG_ENCODERS)
+=
x86/dsputilenc_yasm.o
MMX-OBJS-$(CONFIG_GPL)
+=
x86/idct_mmx.o
MMX-OBJS-$(CONFIG_LPC)
+=
x86/lpc_mmx.o
YASM-OBJS-$(CONFIG_PNG_DECODER)
+=
x86/pngdsp.o
MMX-OBJS-$(CONFIG_PNG_DECODER)
+=
x86/pngdsp-init.o
YASM-OBJS-$(CONFIG_PRORES_DECODER)
+=
x86/proresdsp.o
MMX-OBJS-$(CONFIG_PRORES_DECODER)
+=
x86/proresdsp-init.o
...
...
libavcodec/x86/pngdsp-init.c
浏览文件 @
59f474b4
...
...
@@ -19,117 +19,27 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/common.h"
#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/pngdsp.h"
#include "dsputil_mmx.h"
#define PAETH(cpu, abs3)\
static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
{\
x86_reg i = -bpp;\
x86_reg end = w-3;\
__asm__ volatile(\
"pxor %%mm7, %%mm7 \n"\
"movd (%1,%0), %%mm0 \n"\
"movd (%2,%0), %%mm1 \n"\
"punpcklbw %%mm7, %%mm0 \n"\
"punpcklbw %%mm7, %%mm1 \n"\
"add %4, %0 \n"\
"1: \n"\
"movq %%mm1, %%mm2 \n"\
"movd (%2,%0), %%mm1 \n"\
"movq %%mm2, %%mm3 \n"\
"punpcklbw %%mm7, %%mm1 \n"\
"movq %%mm2, %%mm4 \n"\
"psubw %%mm1, %%mm3 \n"\
"psubw %%mm0, %%mm4 \n"\
"movq %%mm3, %%mm5 \n"\
"paddw %%mm4, %%mm5 \n"\
abs3\
"movq %%mm4, %%mm6 \n"\
"pminsw %%mm5, %%mm6 \n"\
"pcmpgtw %%mm6, %%mm3 \n"\
"pcmpgtw %%mm5, %%mm4 \n"\
"movq %%mm4, %%mm6 \n"\
"pand %%mm3, %%mm4 \n"\
"pandn %%mm3, %%mm6 \n"\
"pandn %%mm0, %%mm3 \n"\
"movd (%3,%0), %%mm0 \n"\
"pand %%mm1, %%mm6 \n"\
"pand %%mm4, %%mm2 \n"\
"punpcklbw %%mm7, %%mm0 \n"\
"movq %6, %%mm5 \n"\
"paddw %%mm6, %%mm0 \n"\
"paddw %%mm2, %%mm3 \n"\
"paddw %%mm3, %%mm0 \n"\
"pand %%mm5, %%mm0 \n"\
"movq %%mm0, %%mm3 \n"\
"packuswb %%mm3, %%mm3 \n"\
"movd %%mm3, (%1,%0) \n"\
"add %4, %0 \n"\
"cmp %5, %0 \n"\
"jle 1b \n"\
:"+r"(i)\
:"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
"m"(ff_pw_255)\
:"memory"\
);\
}
#define ABS3_MMX2\
"psubw %%mm5, %%mm7 \n"\
"pmaxsw %%mm7, %%mm5 \n"\
"pxor %%mm6, %%mm6 \n"\
"pxor %%mm7, %%mm7 \n"\
"psubw %%mm3, %%mm6 \n"\
"psubw %%mm4, %%mm7 \n"\
"pmaxsw %%mm6, %%mm3 \n"\
"pmaxsw %%mm7, %%mm4 \n"\
"pxor %%mm7, %%mm7 \n"
#define ABS3_SSSE3\
"pabsw %%mm3, %%mm3 \n"\
"pabsw %%mm4, %%mm4 \n"\
"pabsw %%mm5, %%mm5 \n"
PAETH
(
mmx2
,
ABS3_MMX2
)
#if HAVE_SSSE3
PAETH
(
ssse3
,
ABS3_SSSE3
)
#endif
static
void
add_bytes_l2_mmx
(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
w
)
{
x86_reg
i
=
0
;
__asm__
volatile
(
"jmp 2f
\n\t
"
"1:
\n\t
"
"movq (%2, %0), %%mm0
\n\t
"
"movq 8(%2, %0), %%mm1
\n\t
"
"paddb (%3, %0), %%mm0
\n\t
"
"paddb 8(%3, %0), %%mm1
\n\t
"
"movq %%mm0, (%1, %0)
\n\t
"
"movq %%mm1, 8(%1, %0)
\n\t
"
"add $16, %0
\n\t
"
"2:
\n\t
"
"cmp %4, %0
\n\t
"
" js 1b
\n\t
"
:
"+r"
(
i
)
:
"r"
(
dst
),
"r"
(
src1
),
"r"
(
src2
),
"r"
((
x86_reg
)
w
-
15
)
);
for
(;
i
<
w
;
i
++
)
dst
[
i
]
=
src1
[
i
]
+
src2
[
i
];
}
void
ff_add_png_paeth_prediction_mmx2
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
ff_add_png_paeth_prediction_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
ff_add_bytes_l2_mmx
(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
w
);
void
ff_pngdsp_init_x86
(
PNGDSPContext
*
dsp
)
{
#if HAVE_YASM
int
flags
=
av_get_cpu_flags
();
if
(
flags
&
AV_CPU_FLAG_MMX
)
dsp
->
add_bytes_l2
=
add_bytes_l2_mmx
;
dsp
->
add_bytes_l2
=
ff_
add_bytes_l2_mmx
;
if
(
flags
&
AV_CPU_FLAG_MMX2
)
dsp
->
add_paeth_prediction
=
add_png_paeth_prediction_mmx2
;
if
(
HAVE_SSSE3
&&
flags
&
AV_CPU_FLAG_SSSE3
)
dsp
->
add_paeth_prediction
=
add_png_paeth_prediction_ssse3
;
dsp
->
add_paeth_prediction
=
ff_add_png_paeth_prediction_mmx2
;
if
(
flags
&
AV_CPU_FLAG_SSSE3
)
dsp
->
add_paeth_prediction
=
ff_add_png_paeth_prediction_ssse3
;
#endif
}
libavcodec/x86/pngdsp.asm
0 → 100644
浏览文件 @
59f474b4
;******************************************************************************
;* x86 optimizations for PNG decoding
;*
;* Copyright (c) 2008 Loren Merritt <lorenm@u.washington.edu>
;*
;* This file is part of Libav.
;*
;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with Libav; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "x86inc.asm"
%include "x86util.asm"
SECTION
_RODATA
cextern
pw_255
section
.text
al
ign
=
16
; %1 = nr. of xmm registers used
%macro ADD_BYTES_FN 1
cglobal
add_bytes_l2
,
4
,
6
,
%
1
,
ds
t
,
src1
,
src2
,
wa
,
w
,
i
%if ARCH_X86_64
movsxd
waq
,
wad
%endif
xor
iq
,
iq
; vector loop
mov
wq
,
waq
and
waq
,
~
(
mmsize
*
2
-
1
)
jmp
.end_v
.loop_v:
mova
m0
,
[
src1q
+
iq
]
mova
m1
,
[
src1q
+
iq
+
mmsize
]
paddb
m0
,
[
src2q
+
iq
]
paddb
m1
,
[
src2q
+
iq
+
mmsize
]
mova
[
ds
tq
+
iq
],
m0
mova
[
ds
tq
+
iq
+
mmsize
],
m1
add
iq
,
mmsize
*
2
.end_v:
cmp
iq
,
waq
jl
.loop_v
; scalar loop for leftover
jmp
.end_s
.loop_s:
mov
wab
,
[
src1q
+
iq
]
add
wab
,
[
src2q
+
iq
]
mov
[
ds
tq
+
iq
],
wab
inc
iq
.end_s:
cmp
iq
,
wq
jl
.loop_s
REP_RET
%endmacro
INIT_MMX
mmx
ADD_BYTES_FN
0
%macro ADD_PAETH_PRED_FN 1
cglobal
add_png_paeth_prediction
,
5
,
7
,
%
1
,
ds
t
,
src
,
top
,
w
,
bp
p
,
end
,
cntr
%if ARCH_X86_64
movsxd
bp
pq
,
bp
pd
movsxd
wq
,
wd
%endif
lea
endq
,
[
ds
tq
+
wq
-
(
mmsize
/
2
-
1
)]
sub
topq
,
ds
tq
sub
srcq
,
ds
tq
sub
ds
tq
,
bp
pq
pxor
m7
,
m7
movh
m0
,
[
ds
tq
]
movh
m1
,
[
topq
+
ds
tq
]
punpcklbw
m0
,
m7
punpcklbw
m1
,
m7
add
ds
tq
,
bp
pq
.loop:
mova
m2
,
m1
movh
m1
,
[
topq
+
ds
tq
]
mova
m3
,
m2
punpcklbw
m1
,
m7
mova
m4
,
m2
psubw
m3
,
m1
psubw
m4
,
m0
mova
m5
,
m3
paddw
m5
,
m4
%if cpuflag(ssse3)
pabsw
m3
,
m3
pabsw
m4
,
m4
pabsw
m5
,
m5
%else
; !cpuflag(ssse3)
psubw
m7
,
m5
pmaxsw
m5
,
m7
pxor
m6
,
m6
pxor
m7
,
m7
psubw
m6
,
m3
psubw
m7
,
m4
pmaxsw
m3
,
m6
pmaxsw
m4
,
m7
pxor
m7
,
m7
%endif
; cpuflag(ssse3)
mova
m6
,
m4
pminsw
m6
,
m5
pcmpgtw
m3
,
m6
pcmpgtw
m4
,
m5
mova
m6
,
m4
pand
m4
,
m3
pandn
m6
,
m3
pandn
m3
,
m0
movh
m0
,
[
srcq
+
ds
tq
]
pand
m6
,
m1
pand
m2
,
m4
punpcklbw
m0
,
m7
paddw
m0
,
m6
paddw
m3
,
m2
paddw
m0
,
m3
pand
m0
,
[
pw_255
]
mova
m3
,
m0
packuswb
m3
,
m3
movh
[
ds
tq
],
m3
add
ds
tq
,
bp
pq
cmp
ds
tq
,
endq
jle
.loop
REP_RET
%endmacro
INIT_MMX
mmx2
ADD_PAETH_PRED_FN
0
INIT_MMX
ss
se3
ADD_PAETH_PRED_FN
0
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录