Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
小白菜888
Ffmpeg
提交
659d4ba5
F
Ffmpeg
项目概览
小白菜888
/
Ffmpeg
通知
3
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
Ffmpeg
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
659d4ba5
编写于
2月 06, 2013
作者:
D
Daniel Kang
提交者:
Luca Barbato
2月 06, 2013
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
dsputil: x86: Convert h263 loop filter to yasm
Signed-off-by:
N
Luca Barbato
<
lu_zero@gentoo.org
>
上级
12b54a1f
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
171 addition
and
177 deletion
+171
-177
libavcodec/x86/dsputil.asm
libavcodec/x86/dsputil.asm
+163
-0
libavcodec/x86/dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+8
-177
未找到文件。
libavcodec/x86/dsputil.asm
浏览文件 @
659d4ba5
...
...
@@ -22,6 +22,8 @@
%include "libavutil/x86/x86util.asm"
SECTION
_RODATA
cextern
pb_FC
cextern
h263_loop_filter_strength
pb_f:
times
16
db
15
pb_zzzzzzzz77777777:
times
8
db
-
1
pb_7:
times
8
db
7
...
...
@@ -648,3 +650,164 @@ BSWAP32_BUF
INIT_XMM
ss
se3
BSWAP32_BUF
%macro H263_LOOP_FILTER 5
pxor
m7
,
m7
mova
m0
,
[
%
1
]
mova
m1
,
[
%
1
]
mova
m2
,
[
%
4
]
mova
m3
,
[
%
4
]
punpcklbw
m0
,
m7
punpckhbw
m1
,
m7
punpcklbw
m2
,
m7
punpckhbw
m3
,
m7
psubw
m0
,
m2
psubw
m1
,
m3
mova
m2
,
[
%
2
]
mova
m3
,
[
%
2
]
mova
m4
,
[
%
3
]
mova
m5
,
[
%
3
]
punpcklbw
m2
,
m7
punpckhbw
m3
,
m7
punpcklbw
m4
,
m7
punpckhbw
m5
,
m7
psubw
m4
,
m2
psubw
m5
,
m3
psllw
m4
,
2
psllw
m5
,
2
paddw
m4
,
m0
paddw
m5
,
m1
pxor
m6
,
m6
pcmpgtw
m6
,
m4
pcmpgtw
m7
,
m5
pxor
m4
,
m6
pxor
m5
,
m7
psubw
m4
,
m6
psubw
m5
,
m7
psrlw
m4
,
3
psrlw
m5
,
3
packuswb
m4
,
m5
packsswb
m6
,
m7
pxor
m7
,
m7
movd
m2
,
%
5
punpcklbw
m2
,
m2
punpcklbw
m2
,
m2
punpcklbw
m2
,
m2
psubusb
m2
,
m4
mova
m3
,
m2
psubusb
m3
,
m4
psubb
m2
,
m3
mova
m3
,
[
%
2
]
mova
m4
,
[
%
3
]
pxor
m3
,
m6
pxor
m4
,
m6
paddusb
m3
,
m2
psubusb
m4
,
m2
pxor
m3
,
m6
pxor
m4
,
m6
paddusb
m2
,
m2
packsswb
m0
,
m1
pcmpgtb
m7
,
m0
pxor
m0
,
m7
psubb
m0
,
m7
mova
m1
,
m0
psubusb
m0
,
m2
psubb
m1
,
m0
pand
m1
,
[
pb_FC
]
psrlw
m1
,
2
pxor
m1
,
m7
psubb
m1
,
m7
mova
m5
,
[
%
1
]
mova
m6
,
[
%
4
]
psubb
m5
,
m1
paddb
m6
,
m1
%endmacro
INIT_MMX
mmx
; void h263_v_loop_filter(uint8_t *src, int stride, int qscale)
cglobal
h263_v_loop_filter
,
3
,
5
movsxdifnidn
r1
,
r1d
movsxdifnidn
r2
,
r2d
lea
r4
,
[
ff_h263_loop_filter_strength
]
movzx
r3d
,
BYTE
[
r4
+
r2
]
movsx
r2
,
r3b
shl
r2
,
1
mov
r3
,
r0
sub
r3
,
r1
mov
r4
,
r3
sub
r4
,
r1
H263_LOOP_FILTER
r4
,
r3
,
r0
,
r0
+
r1
,
r2d
mova
[
r3
],
m3
mova
[
r0
],
m4
mova
[
r4
],
m5
mova
[
r0
+
r1
],
m6
RET
%macro TRANSPOSE4X4 2
movd
m0
,
[
%
1
]
movd
m1
,
[
%
1
+
r1
]
movd
m2
,
[
%
1
+
r1
*
2
]
movd
m3
,
[
%
1
+
r3
]
punpcklbw
m0
,
m1
punpcklbw
m2
,
m3
mova
m1
,
m0
punpcklwd
m0
,
m2
punpckhwd
m1
,
m2
movd
[
%
2
+
0
],
m0
punpckhdq
m0
,
m0
movd
[
%
2
+
8
],
m0
movd
[
%
2
+
16
],
m1
punpckhdq
m1
,
m1
movd
[
%
2
+
24
],
m1
%endmacro
; void h263_h_loop_filter(uint8_t *src, int stride, int qscale)
INIT_MMX
mmx
cglobal
h263_h_loop_filter
,
3
,
5
,
0
,
32
movsxdifnidn
r1
,
r1d
movsxdifnidn
r2
,
r2d
lea
r4
,
[
ff_h263_loop_filter_strength
]
movzx
r3d
,
BYTE
[
r4
+
r2
]
movsx
r2
,
r3b
shl
r2
,
1
sub
r0
,
2
lea
r3
,
[
r1
*
3
]
TRANSPOSE4X4
r0
,
rsp
lea
r4
,
[
r0
+
r1
*
4
]
TRANSPOSE4X4
r4
,
rsp
+
4
H263_LOOP_FILTER
rsp
,
rsp
+
8
,
rsp
+
16
,
rsp
+
24
,
r2d
mova
m1
,
m5
mova
m0
,
m4
punpcklbw
m5
,
m3
punpcklbw
m4
,
m6
punpckhbw
m1
,
m3
punpckhbw
m0
,
m6
mova
m3
,
m5
mova
m6
,
m1
punpcklwd
m5
,
m4
punpcklwd
m1
,
m0
punpckhwd
m3
,
m4
punpckhwd
m6
,
m0
movd
[
r0
],
m5
punpckhdq
m5
,
m5
movd
[
r0
+
r1
*
1
],
m5
movd
[
r0
+
r1
*
2
],
m3
punpckhdq
m3
,
m3
movd
[
r0
+
r3
],
m3
movd
[
r4
],
m1
punpckhdq
m1
,
m1
movd
[
r4
+
r1
*
1
],
m1
movd
[
r4
+
r1
*
2
],
m6
punpckhdq
m6
,
m6
movd
[
r4
+
r3
],
m6
RET
libavcodec/x86/dsputil_mmx.c
浏览文件 @
659d4ba5
...
...
@@ -651,181 +651,12 @@ static void add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
*
left_top
=
tl
;
}
#endif
#endif
/* HAVE_INLINE_ASM */
static
inline
void
transpose4x4
(
uint8_t
*
dst
,
uint8_t
*
src
,
x86_reg
dst_stride
,
x86_reg
src_stride
){
__asm__
volatile
(
//FIXME could save 1 instruction if done as 8x4 ...
"movd (%1), %%mm0
\n\t
"
"add %3, %1
\n\t
"
"movd (%1), %%mm1
\n\t
"
"movd (%1,%3,1), %%mm2
\n\t
"
"movd (%1,%3,2), %%mm3
\n\t
"
"punpcklbw %%mm1, %%mm0
\n\t
"
"punpcklbw %%mm3, %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"punpcklwd %%mm2, %%mm0
\n\t
"
"punpckhwd %%mm2, %%mm1
\n\t
"
"movd %%mm0, (%0)
\n\t
"
"add %2, %0
\n\t
"
"punpckhdq %%mm0, %%mm0
\n\t
"
"movd %%mm0, (%0)
\n\t
"
"movd %%mm1, (%0,%2,1)
\n\t
"
"punpckhdq %%mm1, %%mm1
\n\t
"
"movd %%mm1, (%0,%2,2)
\n\t
"
:
"+&r"
(
dst
),
"+&r"
(
src
)
:
"r"
(
dst_stride
),
"r"
(
src_stride
)
:
"memory"
);
}
#define H263_LOOP_FILTER \
"pxor %%mm7, %%mm7 \n\t" \
"movq %0, %%mm0 \n\t" \
"movq %0, %%mm1 \n\t" \
"movq %3, %%mm2 \n\t" \
"movq %3, %%mm3 \n\t" \
"punpcklbw %%mm7, %%mm0 \n\t" \
"punpckhbw %%mm7, %%mm1 \n\t" \
"punpcklbw %%mm7, %%mm2 \n\t" \
"punpckhbw %%mm7, %%mm3 \n\t" \
"psubw %%mm2, %%mm0 \n\t" \
"psubw %%mm3, %%mm1 \n\t" \
"movq %1, %%mm2 \n\t" \
"movq %1, %%mm3 \n\t" \
"movq %2, %%mm4 \n\t" \
"movq %2, %%mm5 \n\t" \
"punpcklbw %%mm7, %%mm2 \n\t" \
"punpckhbw %%mm7, %%mm3 \n\t" \
"punpcklbw %%mm7, %%mm4 \n\t" \
"punpckhbw %%mm7, %%mm5 \n\t" \
"psubw %%mm2, %%mm4 \n\t" \
"psubw %%mm3, %%mm5 \n\t" \
"psllw $2, %%mm4 \n\t" \
"psllw $2, %%mm5 \n\t" \
"paddw %%mm0, %%mm4 \n\t" \
"paddw %%mm1, %%mm5 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"pcmpgtw %%mm4, %%mm6 \n\t" \
"pcmpgtw %%mm5, %%mm7 \n\t" \
"pxor %%mm6, %%mm4 \n\t" \
"pxor %%mm7, %%mm5 \n\t" \
"psubw %%mm6, %%mm4 \n\t" \
"psubw %%mm7, %%mm5 \n\t" \
"psrlw $3, %%mm4 \n\t" \
"psrlw $3, %%mm5 \n\t" \
"packuswb %%mm5, %%mm4 \n\t" \
"packsswb %%mm7, %%mm6 \n\t" \
"pxor %%mm7, %%mm7 \n\t" \
"movd %4, %%mm2 \n\t" \
"punpcklbw %%mm2, %%mm2 \n\t" \
"punpcklbw %%mm2, %%mm2 \n\t" \
"punpcklbw %%mm2, %%mm2 \n\t" \
"psubusb %%mm4, %%mm2 \n\t" \
"movq %%mm2, %%mm3 \n\t" \
"psubusb %%mm4, %%mm3 \n\t" \
"psubb %%mm3, %%mm2 \n\t" \
"movq %1, %%mm3 \n\t" \
"movq %2, %%mm4 \n\t" \
"pxor %%mm6, %%mm3 \n\t" \
"pxor %%mm6, %%mm4 \n\t" \
"paddusb %%mm2, %%mm3 \n\t" \
"psubusb %%mm2, %%mm4 \n\t" \
"pxor %%mm6, %%mm3 \n\t" \
"pxor %%mm6, %%mm4 \n\t" \
"paddusb %%mm2, %%mm2 \n\t" \
"packsswb %%mm1, %%mm0 \n\t" \
"pcmpgtb %%mm0, %%mm7 \n\t" \
"pxor %%mm7, %%mm0 \n\t" \
"psubb %%mm7, %%mm0 \n\t" \
"movq %%mm0, %%mm1 \n\t" \
"psubusb %%mm2, %%mm0 \n\t" \
"psubb %%mm0, %%mm1 \n\t" \
"pand %5, %%mm1 \n\t" \
"psrlw $2, %%mm1 \n\t" \
"pxor %%mm7, %%mm1 \n\t" \
"psubb %%mm7, %%mm1 \n\t" \
"movq %0, %%mm5 \n\t" \
"movq %3, %%mm6 \n\t" \
"psubb %%mm1, %%mm5 \n\t" \
"paddb %%mm1, %%mm6 \n\t"
static
void
h263_v_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
qscale
)
{
if
(
CONFIG_H263_DECODER
||
CONFIG_H263_ENCODER
)
{
const
int
strength
=
ff_h263_loop_filter_strength
[
qscale
];
__asm__
volatile
(
H263_LOOP_FILTER
"movq %%mm3, %1
\n\t
"
"movq %%mm4, %2
\n\t
"
"movq %%mm5, %0
\n\t
"
"movq %%mm6, %3
\n\t
"
:
"+m"
(
*
(
uint64_t
*
)(
src
-
2
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
src
-
1
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
src
+
0
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
src
+
1
*
stride
))
:
"g"
(
2
*
strength
),
"m"
(
ff_pb_FC
)
);
}
}
static
void
h263_h_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
qscale
)
{
if
(
CONFIG_H263_DECODER
||
CONFIG_H263_ENCODER
)
{
const
int
strength
=
ff_h263_loop_filter_strength
[
qscale
];
DECLARE_ALIGNED
(
8
,
uint64_t
,
temp
)[
4
];
uint8_t
*
btemp
=
(
uint8_t
*
)
temp
;
src
-=
2
;
transpose4x4
(
btemp
,
src
,
8
,
stride
);
transpose4x4
(
btemp
+
4
,
src
+
4
*
stride
,
8
,
stride
);
__asm__
volatile
(
H263_LOOP_FILTER
// 5 3 4 6
:
"+m"
(
temp
[
0
]),
"+m"
(
temp
[
1
]),
"+m"
(
temp
[
2
]),
"+m"
(
temp
[
3
])
:
"g"
(
2
*
strength
),
"m"
(
ff_pb_FC
)
);
__asm__
volatile
(
"movq %%mm5, %%mm1
\n\t
"
"movq %%mm4, %%mm0
\n\t
"
"punpcklbw %%mm3, %%mm5
\n\t
"
"punpcklbw %%mm6, %%mm4
\n\t
"
"punpckhbw %%mm3, %%mm1
\n\t
"
"punpckhbw %%mm6, %%mm0
\n\t
"
"movq %%mm5, %%mm3
\n\t
"
"movq %%mm1, %%mm6
\n\t
"
"punpcklwd %%mm4, %%mm5
\n\t
"
"punpcklwd %%mm0, %%mm1
\n\t
"
"punpckhwd %%mm4, %%mm3
\n\t
"
"punpckhwd %%mm0, %%mm6
\n\t
"
"movd %%mm5, (%0)
\n\t
"
"punpckhdq %%mm5, %%mm5
\n\t
"
"movd %%mm5, (%0, %2)
\n\t
"
"movd %%mm3, (%0, %2, 2)
\n\t
"
"punpckhdq %%mm3, %%mm3
\n\t
"
"movd %%mm3, (%0, %3)
\n\t
"
"movd %%mm1, (%1)
\n\t
"
"punpckhdq %%mm1, %%mm1
\n\t
"
"movd %%mm1, (%1, %2)
\n\t
"
"movd %%mm6, (%1, %2, 2)
\n\t
"
"punpckhdq %%mm6, %%mm6
\n\t
"
"movd %%mm6, (%1, %3)
\n\t
"
::
"r"
(
src
),
"r"
(
src
+
4
*
stride
),
"r"
((
x86_reg
)
stride
),
"r"
((
x86_reg
)(
3
*
stride
))
);
}
}
void
ff_h263_v_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
qscale
);
void
ff_h263_h_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
qscale
);
#if HAVE_INLINE_ASM
/* Draw the edges of width 'w' of an image of size width, height
* this MMX version can only handle w == 8 || w == 16. */
static
void
draw_edges_mmx
(
uint8_t
*
buf
,
int
wrap
,
int
width
,
int
height
,
...
...
@@ -1653,14 +1484,14 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
c
->
gmc
=
gmc_mmx
;
c
->
add_bytes
=
add_bytes_mmx
;
#endif
/* HAVE_INLINE_ASM */
#if HAVE_YASM
if
(
CONFIG_H263_DECODER
||
CONFIG_H263_ENCODER
)
{
c
->
h263_v_loop_filter
=
h263_v_loop_filter_mmx
;
c
->
h263_h_loop_filter
=
h263_h_loop_filter_mmx
;
c
->
h263_v_loop_filter
=
ff_
h263_v_loop_filter_mmx
;
c
->
h263_h_loop_filter
=
ff_
h263_h_loop_filter_mmx
;
}
#endif
/* HAVE_INLINE_ASM */
#if HAVE_YASM
c
->
vector_clip_int32
=
ff_vector_clip_int32_mmx
;
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录